456 lines
14 KiB
PHP
456 lines
14 KiB
PHP
<?php
|
||
declare (strict_types = 1);
|
||
|
||
namespace app\controller;
|
||
|
||
use app\BaseController;
|
||
use app\middleware\Auth;
|
||
use app\service\CrawlerService;
|
||
use app\service\UserConfigService;
|
||
use think\facade\Session;
|
||
use think\facade\View;
|
||
|
||
/**
|
||
* 爬虫控制器
|
||
*/
|
||
class Crawler extends BaseController
|
||
{
|
||
protected $middleware = [Auth::class];
|
||
|
||
/**
|
||
* 初始化方法
|
||
*/
|
||
protected function initialize()
|
||
{
|
||
parent::initialize();
|
||
|
||
// 对于API方法,直接设置响应头
|
||
$action = $this->request->action(true);
|
||
$apiActions = ['getUserConfig', 'saveUserConfig', 'getDsdmOptions', 'getZwdmList', 'getPositionInfo', 'batchGetPositionInfo', 'fetchAllPositions'];
|
||
|
||
if (in_array($action, $apiActions)) {
|
||
// 设置JSON响应头并禁用视图渲染
|
||
header('Content-Type: application/json; charset=utf-8');
|
||
// 禁用视图自动输出
|
||
$this->app->view->config('auto_render', false);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 显示爬虫工具首页
|
||
*/
|
||
public function index()
|
||
{
|
||
// 获取当前用户的配置
|
||
$username = Session::get('username', '');
|
||
$config = [];
|
||
|
||
if ($username) {
|
||
$configService = new UserConfigService();
|
||
$config = $configService->getUserConfig($username);
|
||
}
|
||
|
||
// 将配置传递给视图
|
||
View::assign('userConfig', $config);
|
||
|
||
return View::fetch();
|
||
}
|
||
|
||
/**
|
||
* 获取用户配置
|
||
*/
|
||
public function getUserConfig()
|
||
{
|
||
try {
|
||
$username = Session::get('username', '');
|
||
|
||
if (empty($username)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '未登录',
|
||
]);
|
||
}
|
||
|
||
$configService = new UserConfigService();
|
||
$config = $configService->getUserConfig($username);
|
||
|
||
return json([
|
||
'code' => 1,
|
||
'data' => $config,
|
||
'msg' => '获取成功',
|
||
]);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 保存用户配置
|
||
*/
|
||
public function saveUserConfig()
|
||
{
|
||
try {
|
||
$username = Session::get('username', '');
|
||
|
||
if (empty($username)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '未登录',
|
||
]);
|
||
}
|
||
|
||
$config = [
|
||
'jsessionid1' => $this->request->param('jsessionid1', ''),
|
||
'jsessionid2' => $this->request->param('jsessionid2', ''),
|
||
'serverid' => $this->request->param('serverid', ''),
|
||
'examid' => $this->request->param('examid', ''),
|
||
'bmid' => $this->request->param('bmid', ''),
|
||
'userid' => $this->request->param('userid', ''),
|
||
];
|
||
|
||
$configService = new UserConfigService();
|
||
$result = $configService->saveUserConfig($username, $config);
|
||
|
||
return json($result);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '保存失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取地区选项(从网页HTML中提取)
|
||
*/
|
||
public function getDsdmOptions()
|
||
{
|
||
try {
|
||
$examid = $this->request->param('examid', '');
|
||
$bmid = $this->request->param('bmid', '');
|
||
$userid = $this->request->param('userid', '');
|
||
$cookiesParam = $this->request->param('cookies', '');
|
||
$aa = $this->request->param('aa', (string)round(microtime(true) * 1000));
|
||
|
||
if (empty($examid) || empty($bmid) || empty($userid)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '请先填写examid、bmid和userid',
|
||
]);
|
||
}
|
||
|
||
$service = new CrawlerService();
|
||
try {
|
||
$cookies = $service->parseCookies($cookiesParam);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => $e->getMessage(),
|
||
]);
|
||
}
|
||
|
||
$cookieString = $service->buildCookieString($cookies);
|
||
|
||
// 构建URL获取HTML - 使用GET请求
|
||
$baseUrl = $service->getBaseUrl();
|
||
$url = "{$baseUrl}/stuchooseexam/selectPosition.htm?examstupid=1015&userid={$userid}&bmid={$bmid}&examid={$examid}&aa={$aa}";
|
||
|
||
// 构建Referer URL(与浏览器实际访问一致)
|
||
$refererUrl = "{$baseUrl}/stuchooseexam/input.htm";
|
||
|
||
$ch = curl_init();
|
||
curl_setopt($ch, CURLOPT_URL, $url);
|
||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
|
||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
|
||
curl_setopt($ch, CURLOPT_ENCODING, ''); // 自动处理gzip编码
|
||
curl_setopt($ch, CURLOPT_HTTPHEADER, $service->buildHtmlHeaders($cookieString, $refererUrl));
|
||
|
||
$html = curl_exec($ch);
|
||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||
$error = curl_error($ch);
|
||
curl_close($ch);
|
||
|
||
if ($error) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取网页失败: ' . $error,
|
||
]);
|
||
}
|
||
|
||
if ($httpCode !== 200) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取网页失败,HTTP状态码: ' . $httpCode.'请检查cookie是否正确',
|
||
]);
|
||
}
|
||
|
||
$options = $service->extractDsdmOptions($html);
|
||
|
||
return json([
|
||
'code' => 1,
|
||
'data' => $options,
|
||
'msg' => '获取成功',
|
||
]);
|
||
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
|
||
|
||
/**
|
||
* 获取职位代码列表
|
||
*/
|
||
public function getZwdmList()
|
||
{
|
||
try {
|
||
$dsdm = $this->request->param('dsdm', '');
|
||
$examid = $this->request->param('examid', '');
|
||
$bmid = $this->request->param('bmid', '');
|
||
$userid = $this->request->param('userid', '');
|
||
$aa = $this->request->param('aa', '');
|
||
$cookiesParam = $this->request->param('cookies', '');
|
||
|
||
if (empty($dsdm)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '请选择地区',
|
||
]);
|
||
}
|
||
|
||
if (empty($examid) || empty($bmid) || empty($userid)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '请填写examid、bmid和userid',
|
||
]);
|
||
}
|
||
|
||
if (empty($aa)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '缺少aa参数,请先获取地区选项再获取职位代码',
|
||
]);
|
||
}
|
||
|
||
$service = new CrawlerService();
|
||
try {
|
||
$cookies = $service->parseCookies($cookiesParam);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => $e->getMessage(),
|
||
]);
|
||
}
|
||
|
||
$treeData = $service->getPositionTree($dsdm, $examid, $bmid, $userid, (string)$aa, $cookies);
|
||
|
||
// 提取所有CODE作为zwdm
|
||
$zwdmList = [];
|
||
if (is_array($treeData)) {
|
||
foreach ($treeData as $item) {
|
||
if (isset($item['CODE']) && !empty($item['CODE'])) {
|
||
$zwdmList[] = [
|
||
'zwdm' => $item['CODE'],
|
||
'title' => $item['TITLE'] ?? $item['CODE'],
|
||
];
|
||
}
|
||
}
|
||
}
|
||
|
||
return json([
|
||
'code' => 1,
|
||
'data' => $zwdmList,
|
||
'msg' => '获取成功',
|
||
]);
|
||
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 自动获取全部职位代码并返回职位详情(无需手动勾选)
|
||
*/
|
||
public function fetchAllPositions()
|
||
{
|
||
try {
|
||
$dsdm = $this->request->param('dsdm', '');
|
||
$examid = $this->request->param('examid', '');
|
||
$bmid = $this->request->param('bmid', '');
|
||
$userid = $this->request->param('userid', '');
|
||
$aa = $this->request->param('aa', '');
|
||
$cookiesParam = $this->request->param('cookies', '');
|
||
|
||
if (empty($dsdm) || empty($examid) || empty($bmid) || empty($userid) || empty($aa)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '请填写examid、bmid、userid、dsdm,并先获取地区选项生成aa',
|
||
]);
|
||
}
|
||
|
||
$service = new CrawlerService();
|
||
try {
|
||
$cookies = $service->parseCookies($cookiesParam);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => $e->getMessage(),
|
||
]);
|
||
}
|
||
|
||
$treeData = $service->getPositionTree($dsdm, $examid, $bmid, $userid, (string)$aa, $cookies);
|
||
|
||
$zwdmList = [];
|
||
if (is_array($treeData)) {
|
||
foreach ($treeData as $item) {
|
||
if (isset($item['CODE']) && !empty($item['CODE'])) {
|
||
$code = $item['CODE'];
|
||
// 跳过152开头的职位代码
|
||
if (strpos($code, '152') === 0) {
|
||
continue;
|
||
}
|
||
$zwdmList[] = $code;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (empty($zwdmList)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '未获取到职位代码',
|
||
]);
|
||
}
|
||
|
||
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
|
||
|
||
return json([
|
||
'code' => 1,
|
||
'data' => $results,
|
||
'msg' => '获取成功',
|
||
]);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取职位详细信息
|
||
*/
|
||
public function getPositionInfo()
|
||
{
|
||
try {
|
||
$zwdm = $this->request->param('zwdm', '');
|
||
$examid = $this->request->param('examid', '');
|
||
$cookiesParam = $this->request->param('cookies', '');
|
||
|
||
if (empty($zwdm) || empty($examid)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '参数不完整',
|
||
]);
|
||
}
|
||
|
||
$service = new CrawlerService();
|
||
try {
|
||
$cookies = $service->parseCookies($cookiesParam);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => $e->getMessage(),
|
||
]);
|
||
}
|
||
|
||
$info = $service->getPositionInfo($zwdm, $examid, $cookies);
|
||
|
||
if (!empty($info)) {
|
||
$result = $service->formatPositionInfo($info, $zwdm);
|
||
|
||
return json([
|
||
'code' => 1,
|
||
'data' => $result,
|
||
'msg' => '获取成功',
|
||
]);
|
||
} else {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '未获取到数据',
|
||
]);
|
||
}
|
||
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 批量获取职位信息
|
||
*/
|
||
public function batchGetPositionInfo()
|
||
{
|
||
try {
|
||
$zwdmListParam = $this->request->param('zwdm_list', '');
|
||
$examid = $this->request->param('examid', '');
|
||
$cookiesParam = $this->request->param('cookies', '');
|
||
|
||
// 解析JSON格式的zwdm_list
|
||
$zwdmList = is_string($zwdmListParam) ? json_decode($zwdmListParam, true) : $zwdmListParam;
|
||
if (json_last_error() !== JSON_ERROR_NONE || empty($zwdmList) || !is_array($zwdmList)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '请选择职位代码',
|
||
]);
|
||
}
|
||
|
||
if (empty($examid)) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '请填写examid',
|
||
]);
|
||
}
|
||
|
||
$service = new CrawlerService();
|
||
try {
|
||
$cookies = $service->parseCookies($cookiesParam);
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => $e->getMessage(),
|
||
]);
|
||
}
|
||
|
||
$results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies);
|
||
|
||
return json([
|
||
'code' => 1,
|
||
'data' => $results,
|
||
'msg' => '获取成功',
|
||
]);
|
||
|
||
} catch (\Exception $e) {
|
||
return json([
|
||
'code' => 0,
|
||
'msg' => '获取失败: ' . $e->getMessage(),
|
||
]);
|
||
}
|
||
}
|
||
}
|