Files
shengkao_pachong/app/controller/Crawler.php
2026-01-20 15:24:02 +08:00

340 lines
11 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
declare (strict_types = 1);
namespace app\controller;
use app\BaseController;
use app\service\CrawlerService;
use think\facade\View;
/**
* 爬虫控制器
*/
class Crawler extends BaseController
{
/**
* 显示爬虫工具首页
*/
public function index()
{
return View::fetch();
}
/**
* 获取地区选项从网页HTML中提取
*/
public function getDsdmOptions()
{
try {
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($examid) || empty($bmid) || empty($userid)) {
return json([
'code' => 0,
'msg' => '请先填写examid、bmid和userid',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$cookieString = $this->buildCookieString($cookies);
// 构建URL获取HTML - 使用GET请求
$url = "http://gzrsks.oumakspt.com:62/tyzpwb/stuchooseexam/selectPosition.htm?examstupid=1015&userid={$userid}&bmid={$bmid}&examid={$examid}&aa=" . time() . '000';
// 构建Referer URL模拟从createbmpdf.htm页面跳转过来
$refererUrl = "http://gzrsks.oumakspt.com:62/tyzpwb/stubm/createbmpdf.htm?userid={$userid}&bmid={$bmid}&examid={$examid}&jsessionid=&mydepid=&dqssds=";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_ENCODING, ''); // 自动处理gzip编码
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: zh-CN,zh;q=0.9,zh-TW;q=0.8,zh-HK;q=0.7,en-US;q=0.6,en;q=0.5',
'Accept-Encoding: gzip, deflate',
'Connection: keep-alive',
'Cookie: ' . $cookieString,
'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:147.0) Gecko/20100101 Firefox/147.0',
'Referer: ' . $refererUrl,
'Upgrade-Insecure-Requests: 1',
]);
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
return json([
'code' => 0,
'msg' => '获取网页失败: ' . $error,
]);
}
if ($httpCode !== 200) {
return json([
'code' => 0,
'msg' => '获取网页失败HTTP状态码: ' . $httpCode,
]);
}
$service = new CrawlerService();
$options = $service->extractDsdmOptions($html);
return json([
'code' => 1,
'data' => $options,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 构建Cookie字符串
* @param array $cookies Cookie数组
* @return string
*/
private function buildCookieString(array $cookies): string
{
$cookieArray = [];
$cookieData = $cookies['请求 Cookie'] ?? $cookies;
foreach ($cookieData as $key => $value) {
$cookieArray[] = $key . '=' . $value;
}
return implode('; ', $cookieArray);
}
/**
* 获取职位代码列表
*/
public function getZwdmList()
{
try {
$dsdm = $this->request->param('dsdm', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($dsdm)) {
return json([
'code' => 0,
'msg' => '请选择地区',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$service = new CrawlerService();
$treeData = $service->getPositionTree($dsdm, $cookies);
// 提取所有CODE作为zwdm
$zwdmList = [];
if (is_array($treeData)) {
foreach ($treeData as $item) {
if (isset($item['CODE']) && !empty($item['CODE'])) {
$zwdmList[] = [
'zwdm' => $item['CODE'],
'title' => $item['TITLE'] ?? $item['CODE'],
];
}
}
}
return json([
'code' => 1,
'data' => $zwdmList,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取职位详细信息
*/
public function getPositionInfo()
{
try {
$zwdm = $this->request->param('zwdm', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($zwdm) || empty($examid)) {
return json([
'code' => 0,
'msg' => '参数不完整',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$service = new CrawlerService();
$info = $service->getPositionInfo($zwdm, $examid, $cookies);
if (!empty($info)) {
// 处理单条数据或数组数据
if (isset($info[0])) {
$item = $info[0];
} else {
$item = $info;
}
// 计算竞争比
$zprs = isset($item['zprs']) ? intval($item['zprs']) : 0;
$bkrs = isset($item['bkrs']) ? intval($item['bkrs']) : 0;
$competitionRatio = $zprs > 0 ? ($bkrs / $zprs) : 0;
$result = [
'sbmc' => $item['sbmc'] ?? '', // 省份
'dsmc' => $item['dsmc'] ?? '', // 地区
'zpdwmc' => $item['zpdwmc'] ?? '', // 招聘单位/用人司局
'zwmc' => $item['zwmc'] ?? '', // 职位名称
'zwdm' => $item['zwdm'] ?? $zwdm, // 职位代码
'zprs' => $zprs, // 招聘人数
'bkrs' => $bkrs, // 审核通过人数
'competition_ratio' => number_format($competitionRatio, 2), // 竞争比
];
return json([
'code' => 1,
'data' => $result,
'msg' => '获取成功',
]);
} else {
return json([
'code' => 0,
'msg' => '未获取到数据',
]);
}
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 批量获取职位信息
*/
public function batchGetPositionInfo()
{
try {
$zwdmListParam = $this->request->param('zwdm_list', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
// 解析JSON格式的zwdm_list
$zwdmList = is_string($zwdmListParam) ? json_decode($zwdmListParam, true) : $zwdmListParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($zwdmList) || !is_array($zwdmList)) {
return json([
'code' => 0,
'msg' => '请选择职位代码',
]);
}
if (empty($examid)) {
return json([
'code' => 0,
'msg' => '请填写examid',
]);
}
if (empty($cookiesParam)) {
return json([
'code' => 0,
'msg' => '请填写Cookie数据',
]);
}
// 解析JSON格式的cookies
$cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) {
return json([
'code' => 0,
'msg' => 'Cookie数据格式错误请检查JSON格式',
]);
}
$service = new CrawlerService();
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
return json([
'code' => 1,
'data' => $results,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
}