Files
shengkao_pachong/app/controller/Crawler.php
2026-01-20 16:15:46 +08:00

372 lines
13 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
declare (strict_types = 1);
namespace app\controller;
use app\BaseController;
use app\service\CrawlerService;
use think\facade\View;
/**
* 爬虫控制器
*/
class Crawler extends BaseController
{
/**
* 显示爬虫工具首页
*/
public function index()
{
return View::fetch();
}
/**
* 获取地区选项从网页HTML中提取
*/
public function getDsdmOptions()
{
try {
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$cookiesParam = $this->request->param('cookies', '');
$aa = $this->request->param('aa', (string)round(microtime(true) * 1000));
if (empty($examid) || empty($bmid) || empty($userid)) {
return json([
'code' => 0,
'msg' => '请先填写examid、bmid和userid',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$cookieString = $this->buildCookieString($cookies);
// 构建URL获取HTML - 使用GET请求
$url = "http://gzrsks.oumakspt.com:62/tyzpwb/stuchooseexam/selectPosition.htm?examstupid=1015&userid={$userid}&bmid={$bmid}&examid={$examid}&aa={$aa}";
// 构建Referer URL与浏览器实际访问一致
$refererUrl = "http://gzrsks.oumakspt.com:62/tyzpwb/stuchooseexam/input.htm";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_ENCODING, ''); // 自动处理gzip编码
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language: zh-CN,zh;q=0.9',
'Accept-Encoding: gzip, deflate',
'Cache-Control: no-cache',
'Connection: keep-alive',
'Cookie: ' . $cookieString,
'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36',
'Referer: ' . $refererUrl,
'Upgrade-Insecure-Requests: 1',
]);
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
return json([
'code' => 0,
'msg' => '获取网页失败: ' . $error,
]);
}
if ($httpCode !== 200) {
return json([
'code' => 0,
'msg' => '获取网页失败HTTP状态码: ' . $httpCode,
]);
}
$service = new CrawlerService();
$options = $service->extractDsdmOptions($html);
return json([
'code' => 1,
'data' => $options,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 构建Cookie字符串
* @param array $cookies Cookie数组
* @return string
*/
private function buildCookieString(array|string $cookies): string
{
// 如果直接传入原始Cookie字符串优先使用支持多重同名键
if (is_string($cookies)) {
return trim($cookies);
}
// 只保留用户填写的Cookie去重并优先保留后者防止重复JSESSIONID
$cookieData = $cookies['请求 Cookie'] ?? $cookies;
$normalized = []; // 同名键保留多值
foreach ($cookieData as $key => $value) {
$k = trim((string)$key);
if ($k === '') {
continue;
}
$vals = is_array($value) ? $value : [$value];
foreach ($vals as $v) {
$v = trim((string)$v);
if ($v === '') {
continue;
}
$normalized[$k][] = $v; // 多值按输入顺序保留
}
}
// 将 JSESSIONID 放在最前(如果存在),其余按键名顺序
$parts = [];
if (isset($normalized['JSESSIONID'])) {
foreach ($normalized['JSESSIONID'] as $v) {
$parts[] = 'JSESSIONID=' . $v;
}
unset($normalized['JSESSIONID']);
}
foreach ($normalized as $k => $vArr) {
foreach ($vArr as $v) {
$parts[] = $k . '=' . $v;
}
}
return implode('; ', $parts);
}
/**
* 获取职位代码列表
*/
public function getZwdmList()
{
try {
$dsdm = $this->request->param('dsdm', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($dsdm)) {
return json([
'code' => 0,
'msg' => '请选择地区',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$service = new CrawlerService();
$treeData = $service->getPositionTree($dsdm, $cookies);
// 提取所有CODE作为zwdm
$zwdmList = [];
if (is_array($treeData)) {
foreach ($treeData as $item) {
if (isset($item['CODE']) && !empty($item['CODE'])) {
$zwdmList[] = [
'zwdm' => $item['CODE'],
'title' => $item['TITLE'] ?? $item['CODE'],
];
}
}
}
return json([
'code' => 1,
'data' => $zwdmList,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取职位详细信息
*/
public function getPositionInfo()
{
try {
$zwdm = $this->request->param('zwdm', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($zwdm) || empty($examid)) {
return json([
'code' => 0,
'msg' => '参数不完整',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$service = new CrawlerService();
$info = $service->getPositionInfo($zwdm, $examid, $cookies);
if (!empty($info)) {
// 处理单条数据或数组数据
if (isset($info[0])) {
$item = $info[0];
} else {
$item = $info;
}
// 计算竞争比
$zprs = isset($item['zprs']) ? intval($item['zprs']) : 0;
$bkrs = isset($item['bkrs']) ? intval($item['bkrs']) : 0;
$competitionRatio = $zprs > 0 ? ($bkrs / $zprs) : 0;
$result = [
'sbmc' => $item['sbmc'] ?? '', // 省份
'dsmc' => $item['dsmc'] ?? '', // 地区
'zpdwmc' => $item['zpdwmc'] ?? '', // 招聘单位/用人司局
'zwmc' => $item['zwmc'] ?? '', // 职位名称
'zwdm' => $item['zwdm'] ?? $zwdm, // 职位代码
'zprs' => $zprs, // 招聘人数
'bkrs' => $bkrs, // 审核通过人数
'competition_ratio' => number_format($competitionRatio, 2), // 竞争比
];
return json([
'code' => 1,
'data' => $result,
'msg' => '获取成功',
]);
} else {
return json([
'code' => 0,
'msg' => '未获取到数据',
]);
}
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 批量获取职位信息
*/
public function batchGetPositionInfo()
{
try {
$zwdmListParam = $this->request->param('zwdm_list', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
// 解析JSON格式的zwdm_list
$zwdmList = is_string($zwdmListParam) ? json_decode($zwdmListParam, true) : $zwdmListParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($zwdmList) || !is_array($zwdmList)) {
return json([
'code' => 0,
'msg' => '请选择职位代码',
]);
}
if (empty($examid)) {
return json([
'code' => 0,
'msg' => '请填写examid',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$service = new CrawlerService();
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
return json([
'code' => 1,
'data' => $results,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
}