From 871aefc33e0188b41402e569590007d4f07871fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=BF=97?= Date: Tue, 20 Jan 2026 14:12:19 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E7=89=88=E6=9C=AC=E5=8F=91?= =?UTF-8?q?=E5=B8=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/controller/Crawler.php | 285 +++++++++++++++++ app/service/CrawlerService.php | 216 +++++++++++++ route/app.php | 7 + view/crawler/index.html | 543 +++++++++++++++++++++++++++++++++ 4 files changed, 1051 insertions(+) create mode 100644 app/controller/Crawler.php create mode 100644 app/service/CrawlerService.php create mode 100644 view/crawler/index.html diff --git a/app/controller/Crawler.php b/app/controller/Crawler.php new file mode 100644 index 0000000..097f018 --- /dev/null +++ b/app/controller/Crawler.php @@ -0,0 +1,285 @@ +request->param('examid', ''); + $bmid = $this->request->param('bmid', ''); + $userid = $this->request->param('userid', ''); + + if (empty($examid) || empty($bmid) || empty($userid)) { + return json([ + 'code' => 0, + 'msg' => '请先填写examid、bmid和userid', + ]); + } + + // 构建URL获取HTML + $url = "http://gzrsks.oumakspt.com:62/tyzpwb/stuchooseexam/selectPosition.htm?examstupid=1015&userid={$userid}&bmid={$bmid}&examid={$examid}&aa=" . time() . '000'; + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_TIMEOUT, 30); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); + + $html = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $error = curl_error($ch); + curl_close($ch); + + if ($error) { + return json([ + 'code' => 0, + 'msg' => '获取网页失败: ' . $error, + ]); + } + + if ($httpCode !== 200) { + return json([ + 'code' => 0, + 'msg' => '获取网页失败,HTTP状态码: ' . $httpCode, + ]); + } + + $service = new CrawlerService(); + $options = $service->extractDsdmOptions($html); + + return json([ + 'code' => 1, + 'data' => $options, + 'msg' => '获取成功', + ]); + + } catch (\Exception $e) { + return json([ + 'code' => 0, + 'msg' => '获取失败: ' . $e->getMessage(), + ]); + } + } + + /** + * 获取职位代码列表 + */ + public function getZwdmList() + { + try { + $dsdm = $this->request->param('dsdm', ''); + $cookiesParam = $this->request->param('cookies', ''); + + if (empty($dsdm)) { + return json([ + 'code' => 0, + 'msg' => '请选择地区', + ]); + } + + if (empty($cookiesParam)) { + return json([ + 'code' => 0, + 'msg' => '请填写Cookie数据', + ]); + } + + // 解析JSON格式的cookies + $cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam; + if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) { + return json([ + 'code' => 0, + 'msg' => 'Cookie数据格式错误,请检查JSON格式', + ]); + } + + $service = new CrawlerService(); + $treeData = $service->getPositionTree($dsdm, $cookies); + + // 提取所有CODE作为zwdm + $zwdmList = []; + if (is_array($treeData)) { + foreach ($treeData as $item) { + if (isset($item['CODE']) && !empty($item['CODE'])) { + $zwdmList[] = [ + 'zwdm' => $item['CODE'], + 'title' => $item['TITLE'] ?? $item['CODE'], + ]; + } + } + } + + return json([ + 'code' => 1, + 'data' => $zwdmList, + 'msg' => '获取成功', + ]); + + } catch (\Exception $e) { + return json([ + 'code' => 0, + 'msg' => '获取失败: ' . $e->getMessage(), + ]); + } + } + + /** + * 获取职位详细信息 + */ + public function getPositionInfo() + { + try { + $zwdm = $this->request->param('zwdm', ''); + $examid = $this->request->param('examid', ''); + $cookiesParam = $this->request->param('cookies', ''); + + if (empty($zwdm) || empty($examid)) { + return json([ + 'code' => 0, + 'msg' => '参数不完整', + ]); + } + + if (empty($cookiesParam)) { + return json([ + 'code' => 0, + 'msg' => '请填写Cookie数据', + ]); + } + + // 解析JSON格式的cookies + $cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam; + if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) { + return json([ + 'code' => 0, + 'msg' => 'Cookie数据格式错误,请检查JSON格式', + ]); + } + + $service = new CrawlerService(); + $info = $service->getPositionInfo($zwdm, $examid, $cookies); + + if (!empty($info)) { + // 处理单条数据或数组数据 + if (isset($info[0])) { + $item = $info[0]; + } else { + $item = $info; + } + + // 计算竞争比 + $zprs = isset($item['zprs']) ? intval($item['zprs']) : 0; + $bkrs = isset($item['bkrs']) ? intval($item['bkrs']) : 0; + $competitionRatio = $zprs > 0 ? ($bkrs / $zprs) : 0; + + $result = [ + 'sbmc' => $item['sbmc'] ?? '', // 省份 + 'dsmc' => $item['dsmc'] ?? '', // 地区 + 'zpdwmc' => $item['zpdwmc'] ?? '', // 招聘单位/用人司局 + 'zwmc' => $item['zwmc'] ?? '', // 职位名称 + 'zwdm' => $item['zwdm'] ?? $zwdm, // 职位代码 + 'zprs' => $zprs, // 招聘人数 + 'bkrs' => $bkrs, // 审核通过人数 + 'competition_ratio' => number_format($competitionRatio, 2), // 竞争比 + ]; + + return json([ + 'code' => 1, + 'data' => $result, + 'msg' => '获取成功', + ]); + } else { + return json([ + 'code' => 0, + 'msg' => '未获取到数据', + ]); + } + + } catch (\Exception $e) { + return json([ + 'code' => 0, + 'msg' => '获取失败: ' . $e->getMessage(), + ]); + } + } + + /** + * 批量获取职位信息 + */ + public function batchGetPositionInfo() + { + try { + $zwdmListParam = $this->request->param('zwdm_list', ''); + $examid = $this->request->param('examid', ''); + $cookiesParam = $this->request->param('cookies', ''); + + // 解析JSON格式的zwdm_list + $zwdmList = is_string($zwdmListParam) ? json_decode($zwdmListParam, true) : $zwdmListParam; + if (json_last_error() !== JSON_ERROR_NONE || empty($zwdmList) || !is_array($zwdmList)) { + return json([ + 'code' => 0, + 'msg' => '请选择职位代码', + ]); + } + + if (empty($examid)) { + return json([ + 'code' => 0, + 'msg' => '请填写examid', + ]); + } + + if (empty($cookiesParam)) { + return json([ + 'code' => 0, + 'msg' => '请填写Cookie数据', + ]); + } + + // 解析JSON格式的cookies + $cookies = is_string($cookiesParam) ? json_decode($cookiesParam, true) : $cookiesParam; + if (json_last_error() !== JSON_ERROR_NONE || empty($cookies)) { + return json([ + 'code' => 0, + 'msg' => 'Cookie数据格式错误,请检查JSON格式', + ]); + } + + $service = new CrawlerService(); + $results = $service->batchGetPositionInfo($zwdmList, $examid, $cookies); + + return json([ + 'code' => 1, + 'data' => $results, + 'msg' => '获取成功', + ]); + + } catch (\Exception $e) { + return json([ + 'code' => 0, + 'msg' => '获取失败: ' . $e->getMessage(), + ]); + } + } +} diff --git a/app/service/CrawlerService.php b/app/service/CrawlerService.php new file mode 100644 index 0000000..95f3ac0 --- /dev/null +++ b/app/service/CrawlerService.php @@ -0,0 +1,216 @@ +baseUrl . '/tree/getPositionTree.htm'; + + $cookieString = $this->buildCookieString($cookies); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, [ + 'Cookie: ' . $cookieString, + 'Content-Type: application/x-www-form-urlencoded', + ]); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query(['dsdm' => $dsdm])); + curl_setopt($ch, CURLOPT_TIMEOUT, 30); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); + + $response = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $error = curl_error($ch); + curl_close($ch); + + if ($error) { + throw new \Exception('请求失败: ' . $error); + } + + if ($httpCode !== 200) { + throw new \Exception('请求失败,HTTP状态码: ' . $httpCode); + } + + $data = json_decode($response, true); + + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \Exception('JSON解析失败: ' . json_last_error_msg()); + } + + return $data ?: []; + } + + /** + * 获取职位详细信息 + * @param string $zwdm 职位代码 + * @param string $examid 考试ID + * @param array $cookies Cookie数据 + * @return array + */ + public function getPositionInfo(string $zwdm, string $examid, array $cookies): array + { + $url = $this->baseUrl . '/stuchooseexam/getPositionInfo.htm'; + + $cookieString = $this->buildCookieString($cookies); + + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, [ + 'Cookie: ' . $cookieString, + 'Content-Type: application/x-www-form-urlencoded', + ]); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query([ + 'zwdm' => $zwdm, + 'examid' => $examid + ])); + curl_setopt($ch, CURLOPT_TIMEOUT, 30); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); + + $response = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $error = curl_error($ch); + curl_close($ch); + + if ($error) { + throw new \Exception('请求失败: ' . $error); + } + + if ($httpCode !== 200) { + throw new \Exception('请求失败,HTTP状态码: ' . $httpCode); + } + + $data = json_decode($response, true); + + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \Exception('JSON解析失败: ' . json_last_error_msg()); + } + + return $data ?: []; + } + + /** + * 批量获取职位信息 + * @param array $zwdmList 职位代码列表 + * @param string $examid 考试ID + * @param array $cookies Cookie数据 + * @return array + */ + public function batchGetPositionInfo(array $zwdmList, string $examid, array $cookies): array + { + $results = []; + + foreach ($zwdmList as $zwdm) { + try { + $info = $this->getPositionInfo($zwdm, $examid, $cookies); + + if (!empty($info)) { + // 处理单条数据或数组数据 + if (isset($info[0])) { + $item = $info[0]; + } else { + $item = $info; + } + + // 计算竞争比 + $zprs = isset($item['zprs']) ? intval($item['zprs']) : 0; + $bkrs = isset($item['bkrs']) ? intval($item['bkrs']) : 0; + $competitionRatio = $zprs > 0 ? ($bkrs / $zprs) : 0; + + $results[] = [ + 'sbmc' => $item['sbmc'] ?? '', // 省份 + 'dsmc' => $item['dsmc'] ?? '', // 地区 + 'zpdwmc' => $item['zpdwmc'] ?? '', // 招聘单位/用人司局 + 'zwmc' => $item['zwmc'] ?? '', // 职位名称 + 'zwdm' => $item['zwdm'] ?? $zwdm, // 职位代码 + 'zprs' => $zprs, // 招聘人数 + 'bkrs' => $bkrs, // 审核通过人数 + 'competition_ratio' => number_format($competitionRatio, 2), // 竞争比 + ]; + } + + // 避免请求过快,添加小延迟 + usleep(200000); // 0.2秒 + + } catch (\Exception $e) { + // 记录错误但继续处理其他数据 + $results[] = [ + 'zwdm' => $zwdm, + 'error' => $e->getMessage(), + ]; + } + } + + return $results; + } + + /** + * 构建Cookie字符串 + * @param array $cookies Cookie数组 + * @return string + */ + private function buildCookieString(array $cookies): string + { + $cookieArray = []; + + // 处理嵌套的Cookie结构 + if (isset($cookies['请求 Cookie'])) { + $cookieData = $cookies['请求 Cookie']; + } else { + $cookieData = $cookies; + } + + foreach ($cookieData as $key => $value) { + $cookieArray[] = $key . '=' . $value; + } + + return implode('; ', $cookieArray); + } + + /** + * 从HTML中提取地区选项 + * @param string $html HTML内容 + * @return array + */ + public function extractDsdmOptions(string $html): array + { + $options = []; + + // 使用正则表达式提取option标签 + preg_match_all('/]*>([^<]+)<\/option>/i', $html, $matches); + + if (!empty($matches[1])) { + foreach ($matches[1] as $index => $value) { + if (!empty($value)) { // 跳过"请选择"等空值 + $options[] = [ + 'value' => $value, + 'text' => $matches[2][$index] ?? $value, + ]; + } + } + } + + return $options; + } +} diff --git a/route/app.php b/route/app.php index 69071f7..d8467ca 100644 --- a/route/app.php +++ b/route/app.php @@ -15,3 +15,10 @@ Route::get('think', function () { }); Route::get('hello/:name', 'index/hello'); + +// 爬虫工具路由 +Route::get('crawler', 'crawler/index'); +Route::post('crawler/getDsdmOptions', 'crawler/getDsdmOptions'); +Route::post('crawler/getZwdmList', 'crawler/getZwdmList'); +Route::post('crawler/getPositionInfo', 'crawler/getPositionInfo'); +Route::post('crawler/batchGetPositionInfo', 'crawler/batchGetPositionInfo'); diff --git a/view/crawler/index.html b/view/crawler/index.html new file mode 100644 index 0000000..3fda856 --- /dev/null +++ b/view/crawler/index.html @@ -0,0 +1,543 @@ + + + + + + 职位信息爬虫工具 + + + +
+

职位信息爬虫工具

+ + +
+

第一步:填写基础信息

+ +
+ + + 请填写完整的Cookie数据,格式为JSON +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ +
+
+ + +
+

第二步:选择地区

+
+
+ + +
+
+ +
+
+ + +
+

第三步:选择职位代码

+
+
+
+ +
+
+
请先获取职位代码列表
+
+
+
+ +
+
+ + +
+

职位信息结果

+
+ +
+
+ + + +