Files
shengkao_pachong/app/controller/Crawler.php
杨志 e964409bb7 up
2026-01-21 08:39:32 +08:00

355 lines
11 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
declare (strict_types = 1);
namespace app\controller;
use app\BaseController;
use app\middleware\Auth;
use app\service\CrawlerService;
use think\facade\View;
/**
* 爬虫控制器
*/
class Crawler extends BaseController
{
protected $middleware = [Auth::class];
/**
* 显示爬虫工具首页
*/
public function index()
{
return View::fetch();
}
/**
* 获取地区选项从网页HTML中提取
*/
public function getDsdmOptions()
{
try {
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$cookiesParam = $this->request->param('cookies', '');
$aa = $this->request->param('aa', (string)round(microtime(true) * 1000));
if (empty($examid) || empty($bmid) || empty($userid)) {
return json([
'code' => 0,
'msg' => '请先填写examid、bmid和userid',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$cookieString = $service->buildCookieString($cookies);
// 构建URL获取HTML - 使用GET请求
$baseUrl = $service->getBaseUrl();
$url = "{$baseUrl}/stuchooseexam/selectPosition.htm?examstupid=1015&userid={$userid}&bmid={$bmid}&examid={$examid}&aa={$aa}";
// 构建Referer URL与浏览器实际访问一致
$refererUrl = "{$baseUrl}/stuchooseexam/input.htm";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_ENCODING, ''); // 自动处理gzip编码
curl_setopt($ch, CURLOPT_HTTPHEADER, $service->buildHtmlHeaders($cookieString, $refererUrl));
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
return json([
'code' => 0,
'msg' => '获取网页失败: ' . $error,
]);
}
if ($httpCode !== 200) {
return json([
'code' => 0,
'msg' => '获取网页失败HTTP状态码: ' . $httpCode,
]);
}
$options = $service->extractDsdmOptions($html);
return json([
'code' => 1,
'data' => $options,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取职位代码列表
*/
public function getZwdmList()
{
try {
$dsdm = $this->request->param('dsdm', '');
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$aa = $this->request->param('aa', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($dsdm)) {
return json([
'code' => 0,
'msg' => '请选择地区',
]);
}
if (empty($examid) || empty($bmid) || empty($userid)) {
return json([
'code' => 0,
'msg' => '请填写examid、bmid和userid',
]);
}
if (empty($aa)) {
return json([
'code' => 0,
'msg' => '缺少aa参数请先获取地区选项再获取职位代码',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$treeData = $service->getPositionTree($dsdm, $examid, $bmid, $userid, (string)$aa, $cookies);
// 提取所有CODE作为zwdm
$zwdmList = [];
if (is_array($treeData)) {
foreach ($treeData as $item) {
if (isset($item['CODE']) && !empty($item['CODE'])) {
$zwdmList[] = [
'zwdm' => $item['CODE'],
'title' => $item['TITLE'] ?? $item['CODE'],
];
}
}
}
return json([
'code' => 1,
'data' => $zwdmList,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 自动获取全部职位代码并返回职位详情(无需手动勾选)
*/
public function fetchAllPositions()
{
try {
$dsdm = $this->request->param('dsdm', '');
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$aa = $this->request->param('aa', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($dsdm) || empty($examid) || empty($bmid) || empty($userid) || empty($aa)) {
return json([
'code' => 0,
'msg' => '请填写examid、bmid、userid、dsdm并先获取地区选项生成aa',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$treeData = $service->getPositionTree($dsdm, $examid, $bmid, $userid, (string)$aa, $cookies);
$zwdmList = [];
if (is_array($treeData)) {
foreach ($treeData as $item) {
if (isset($item['CODE']) && !empty($item['CODE'])) {
$code = $item['CODE'];
// 跳过152开头的职位代码
if (strpos($code, '152') === 0) {
continue;
}
$zwdmList[] = $code;
}
}
}
if (empty($zwdmList)) {
return json([
'code' => 0,
'msg' => '未获取到职位代码',
]);
}
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
return json([
'code' => 1,
'data' => $results,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取职位详细信息
*/
public function getPositionInfo()
{
try {
$zwdm = $this->request->param('zwdm', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($zwdm) || empty($examid)) {
return json([
'code' => 0,
'msg' => '参数不完整',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$info = $service->getPositionInfo($zwdm, $examid, $cookies);
if (!empty($info)) {
$result = $service->formatPositionInfo($info, $zwdm);
return json([
'code' => 1,
'data' => $result,
'msg' => '获取成功',
]);
} else {
return json([
'code' => 0,
'msg' => '未获取到数据',
]);
}
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 批量获取职位信息
*/
public function batchGetPositionInfo()
{
try {
$zwdmListParam = $this->request->param('zwdm_list', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
// 解析JSON格式的zwdm_list
$zwdmList = is_string($zwdmListParam) ? json_decode($zwdmListParam, true) : $zwdmListParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($zwdmList) || !is_array($zwdmList)) {
return json([
'code' => 0,
'msg' => '请选择职位代码',
]);
}
if (empty($examid)) {
return json([
'code' => 0,
'msg' => '请填写examid',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
return json([
'code' => 1,
'data' => $results,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
}