Files
shengkao_pachong/app/controller/Crawler.php
杨志 e39126d54c up
2026-02-02 14:53:38 +08:00

482 lines
15 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
declare (strict_types = 1);
namespace app\controller;
use app\BaseController;
use app\middleware\Auth;
use app\service\CrawlerService;
use app\service\UserConfigService;
use app\service\UserService;
use think\facade\Session;
use think\facade\View;
/**
* 爬虫控制器
*/
class Crawler extends BaseController
{
protected $middleware = [Auth::class];
/**
* 初始化方法
*/
protected function initialize()
{
parent::initialize();
// 对于API方法直接设置响应头
$action = $this->request->action(true);
$apiActions = ['getUserConfig', 'saveUserConfig', 'getDsdmOptions', 'getZwdmList', 'getPositionInfo', 'batchGetPositionInfo', 'fetchAllPositions'];
if (in_array($action, $apiActions)) {
// 设置JSON响应头并禁用视图渲染
header('Content-Type: application/json; charset=utf-8');
// 禁用视图自动输出
$this->app->view->config('auto_render', false);
}
}
/**
* 显示爬虫工具首页
*/
public function index()
{
// 获取当前用户的配置
$username = Session::get('username', '');
$config = [];
if ($username) {
$configService = new UserConfigService();
$config = $configService->getUserConfig($username);
}
// 将配置传递给视图
View::assign('userConfig', $config);
return View::fetch();
}
/**
* 获取用户配置
*/
public function getUserConfig()
{
try {
$username = Session::get('username', '');
if (empty($username)) {
return json([
'code' => 0,
'msg' => '未登录',
]);
}
$configService = new UserConfigService();
$config = $configService->getUserConfig($username);
return json([
'code' => 1,
'data' => $config,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 用户修改密码
*/
public function changePassword()
{
try {
$username = Session::get('username', '');
if (empty($username)) {
return json(['code' => 0, 'msg' => '未登录']);
}
$old = $this->request->param('old_password', '');
$new = $this->request->param('new_password', '');
if (empty($old) || empty($new)) {
return json(['code' => 0, 'msg' => '旧密码和新密码不能为空']);
}
$service = new UserService();
$result = $service->changeUserPassword($username, $old, $new);
return json($result);
} catch (\Exception $e) {
return json(['code' => 0, 'msg' => '修改失败: ' . $e->getMessage()]);
}
}
/**
* 保存用户配置
*/
public function saveUserConfig()
{
try {
$username = Session::get('username', '');
if (empty($username)) {
return json([
'code' => 0,
'msg' => '未登录',
]);
}
$config = [
'jsessionid1' => $this->request->param('jsessionid1', ''),
'jsessionid2' => $this->request->param('jsessionid2', ''),
'serverid' => $this->request->param('serverid', ''),
'examid' => $this->request->param('examid', ''),
'bmid' => $this->request->param('bmid', ''),
'userid' => $this->request->param('userid', ''),
];
$configService = new UserConfigService();
$result = $configService->saveUserConfig($username, $config);
return json($result);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '保存失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取地区选项从网页HTML中提取
*/
public function getDsdmOptions()
{
try {
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$cookiesParam = $this->request->param('cookies', '');
$aa = $this->request->param('aa', (string)round(microtime(true) * 1000));
if (empty($examid) || empty($bmid) || empty($userid)) {
return json([
'code' => 0,
'msg' => '请先填写examid、bmid和userid',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$cookieString = $service->buildCookieString($cookies);
// 构建URL获取HTML - 使用GET请求
$baseUrl = $service->getBaseUrl();
$url = "{$baseUrl}/stuchooseexam/selectPosition.htm?examstupid=1015&userid={$userid}&bmid={$bmid}&examid={$examid}&aa={$aa}";
// 构建Referer URL与浏览器实际访问一致
$refererUrl = "{$baseUrl}/stuchooseexam/input.htm";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_ENCODING, ''); // 自动处理gzip编码
curl_setopt($ch, CURLOPT_HTTPHEADER, $service->buildHtmlHeaders($cookieString, $refererUrl));
$html = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
return json([
'code' => 0,
'msg' => '获取网页失败: ' . $error,
]);
}
if ($httpCode !== 200) {
return json([
'code' => 0,
'msg' => '获取网页失败HTTP状态码: ' . $httpCode.'请检查cookie是否正确',
]);
}
$options = $service->extractDsdmOptions($html);
return json([
'code' => 1,
'data' => $options,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取职位代码列表
*/
public function getZwdmList()
{
try {
$dsdm = $this->request->param('dsdm', '');
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$aa = $this->request->param('aa', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($dsdm)) {
return json([
'code' => 0,
'msg' => '请选择地区',
]);
}
if (empty($examid) || empty($bmid) || empty($userid)) {
return json([
'code' => 0,
'msg' => '请填写examid、bmid和userid',
]);
}
if (empty($aa)) {
return json([
'code' => 0,
'msg' => '缺少aa参数请先获取地区选项再获取职位代码',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$treeData = $service->getPositionTree($dsdm, $examid, $bmid, $userid, (string)$aa, $cookies);
// 提取所有CODE作为zwdm
$zwdmList = [];
if (is_array($treeData)) {
foreach ($treeData as $item) {
if (isset($item['CODE']) && !empty($item['CODE'])) {
$zwdmList[] = [
'zwdm' => $item['CODE'],
'title' => $item['TITLE'] ?? $item['CODE'],
];
}
}
}
return json([
'code' => 1,
'data' => $zwdmList,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 自动获取全部职位代码并返回职位详情(无需手动勾选)
*/
public function fetchAllPositions()
{
try {
$dsdm = $this->request->param('dsdm', '');
$examid = $this->request->param('examid', '');
$bmid = $this->request->param('bmid', '');
$userid = $this->request->param('userid', '');
$aa = $this->request->param('aa', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($dsdm) || empty($examid) || empty($bmid) || empty($userid) || empty($aa)) {
return json([
'code' => 0,
'msg' => '请填写examid、bmid、userid、dsdm并先获取地区选项生成aa',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$treeData = $service->getPositionTree($dsdm, $examid, $bmid, $userid, (string)$aa, $cookies);
$zwdmList = [];
if (is_array($treeData)) {
foreach ($treeData as $item) {
if (isset($item['CODE']) && !empty($item['CODE'])) {
// 跳过带 nocheck: true 的项(如父级/分组节点,不可选)
if (!empty($item['nocheck'])) {
continue;
}
$zwdmList[] = $item['CODE'];
}
}
}
if (empty($zwdmList)) {
return json([
'code' => 0,
'msg' => '未获取到职位代码',
]);
}
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
return json([
'code' => 1,
'data' => $results,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 获取职位详细信息
*/
public function getPositionInfo()
{
try {
$zwdm = $this->request->param('zwdm', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
if (empty($zwdm) || empty($examid)) {
return json([
'code' => 0,
'msg' => '参数不完整',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$info = $service->getPositionInfo($zwdm, $examid, $cookies);
if (!empty($info)) {
$result = $service->formatPositionInfo($info, $zwdm);
return json([
'code' => 1,
'data' => $result,
'msg' => '获取成功',
]);
} else {
return json([
'code' => 0,
'msg' => '未获取到数据',
]);
}
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
/**
* 批量获取职位信息
*/
public function batchGetPositionInfo()
{
try {
$zwdmListParam = $this->request->param('zwdm_list', '');
$examid = $this->request->param('examid', '');
$cookiesParam = $this->request->param('cookies', '');
// 解析JSON格式的zwdm_list
$zwdmList = is_string($zwdmListParam) ? json_decode($zwdmListParam, true) : $zwdmListParam;
if (json_last_error() !== JSON_ERROR_NONE || empty($zwdmList) || !is_array($zwdmList)) {
return json([
'code' => 0,
'msg' => '请选择职位代码',
]);
}
if (empty($examid)) {
return json([
'code' => 0,
'msg' => '请填写examid',
]);
}
$service = new CrawlerService();
try {
$cookies = $service->parseCookies($cookiesParam);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => $e->getMessage(),
]);
}
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
return json([
'code' => 1,
'data' => $results,
'msg' => '获取成功',
]);
} catch (\Exception $e) {
return json([
'code' => 0,
'msg' => '获取失败: ' . $e->getMessage(),
]);
}
}
}