Files
shengkao_pachong/view/crawler/index.html
杨志 a962e06a18 up
2026-01-21 08:53:45 +08:00

716 lines
26 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>职位信息爬虫工具</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
background: #f5f5f5;
padding: 20px;
line-height: 1.6;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: #fff;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
padding: 30px;
}
h1 {
color: #333;
margin-bottom: 30px;
text-align: center;
font-size: 24px;
}
.form-section {
margin-bottom: 30px;
padding: 20px;
background: #f9f9f9;
border-radius: 6px;
border: 1px solid #e0e0e0;
}
.form-section h2 {
font-size: 18px;
color: #555;
margin-bottom: 15px;
padding-bottom: 10px;
border-bottom: 2px solid #4CAF50;
}
.form-group {
margin-bottom: 15px;
}
.form-group label {
display: block;
margin-bottom: 5px;
color: #333;
font-weight: 500;
}
.form-group input,
.form-group select,
.form-group textarea {
width: 100%;
padding: 10px;
border: 1px solid #ddd;
border-radius: 4px;
font-size: 14px;
}
.form-group textarea {
min-height: 100px;
font-family: monospace;
resize: vertical;
}
.form-group small {
display: block;
margin-top: 5px;
color: #666;
font-size: 12px;
}
.btn {
padding: 10px 20px;
background: #4CAF50;
color: #fff;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 14px;
transition: background 0.3s;
}
.btn:hover {
background: #45a049;
}
.btn:disabled {
background: #ccc;
cursor: not-allowed;
}
.btn-secondary {
background: #2196F3;
}
.btn-secondary:hover {
background: #0b7dda;
}
.btn-danger {
background: #f44336;
}
.btn-danger:hover {
background: #da190b;
}
.checkbox-group {
max-height: 300px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
padding: 10px;
background: #fff;
}
.checkbox-item {
padding: 8px;
border-bottom: 1px solid #f0f0f0;
}
.checkbox-item:last-child {
border-bottom: none;
}
.checkbox-item label {
display: flex;
align-items: center;
cursor: pointer;
}
.checkbox-item input[type="checkbox"] {
width: auto;
margin-right: 8px;
}
.message {
padding: 12px;
border-radius: 4px;
margin-bottom: 15px;
}
.message.success {
background: #d4edda;
color: #155724;
border: 1px solid #c3e6cb;
}
.message.error {
background: #f8d7da;
color: #721c24;
border: 1px solid #f5c6cb;
}
.message.info {
background: #d1ecf1;
color: #0c5460;
border: 1px solid #bee5eb;
}
.table-container {
margin-top: 30px;
overflow-x: auto;
}
table {
width: 100%;
border-collapse: collapse;
background: #fff;
}
table th,
table td {
padding: 12px;
text-align: left;
border: 1px solid #ddd;
}
table th {
background: #4CAF50;
color: #fff;
font-weight: 600;
position: sticky;
top: 0;
}
table tr:nth-child(even) {
background: #f9f9f9;
}
table tr:hover {
background: #f0f0f0;
}
.loading {
text-align: center;
padding: 20px;
color: #666;
}
.loading::after {
content: '...';
animation: dots 1.5s steps(4, end) infinite;
}
@keyframes dots {
0%, 20% { content: '.'; }
40% { content: '..'; }
60%, 100% { content: '...'; }
}
.action-buttons {
display: flex;
gap: 10px;
margin-top: 15px;
}
.select-all {
margin-bottom: 10px;
}
</style>
</head>
<body>
<div class="container">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
<h1 style="margin: 0;">职位信息爬虫工具</h1>
<div style="display: flex; gap: 15px;">
<a href="/auth/logout" style="color: #f44336; text-decoration: none; padding: 8px 15px; border-radius: 4px; background: #ffebee;">退出登录</a>
</div>
</div>
<!-- 第一步:填写基础信息 -->
<div class="form-section">
<h2>第一步:填写基础信息</h2>
<div class="form-group">
<label for="cookie-jsessionid2">JSESSIONID1</label>
<input type="text" id="cookie-jsessionid2" placeholder="">
</div>
<div class="form-group">
<label for="cookie-jsessionid">JSESSIONID2</label>
<input type="text" id="cookie-jsessionid" placeholder="">
</div>
<div class="form-group">
<label for="cookie-serverid">SERVERID</label>
<input type="text" id="cookie-serverid" placeholder="">
</div>
<div class="form-group">
<label for="examid">examid</label>
<input type="text" id="examid" placeholder="">
</div>
<div class="form-group">
<label for="bmid">bmid</label>
<input type="text" id="bmid" placeholder="">
</div>
<div class="form-group">
<label for="userid">userid</label>
<input type="text" id="userid" placeholder="">
</div>
<div class="action-buttons">
<button class="btn btn-secondary" onclick="getDsdmOptions()">获取地区选项</button>
</div>
</div>
<!-- 第二步:选择地区并自动抓取 -->
<div class="form-section">
<h2>第二步:选择地区并自动抓取</h2>
<div id="dsdm-message"></div>
<div class="form-group">
<label for="dsdm">地区代码dsdm</label>
<select id="dsdm">
<option value="">请先获取地区选项</option>
</select>
</div>
<div class="action-buttons">
<button class="btn" onclick="fetchAllPositions()">自动抓取全部职位</button>
</div>
</div>
<!-- 结果显示 -->
<div class="form-section">
<h2>职位信息结果</h2>
<div class="action-buttons" style="margin-bottom: 15px;">
<button class="btn btn-secondary" id="export-btn" onclick="exportCsv()" disabled>导出CSV</button>
</div>
<div id="result-message"></div>
<div class="table-container" id="result-table" style="display: none;">
<table id="data-table">
<thead>
<tr>
<th>省份</th>
<th>地区</th>
<th>招聘单位/用人司局</th>
<th>职位名称</th>
<th>职位代码</th>
<th>招聘人数</th>
<th>审核通过人数</th>
<th>竞争比</th>
</tr>
</thead>
<tbody id="data-table-body">
</tbody>
</table>
</div>
</div>
</div>
<script>
// API基础路径配置
const API_BASE_URL = ''; // 空字符串表示使用相对路径如需跨域可修改为完整URL'http://your-domain.com'
// 复用同一个aa确保selectPosition与getPositionTree的Referer一致
let lastAa = '';
let lastResults = [];
let isCrawling = false; // 爬取状态标志
// 获取地区选项
function getDsdmOptions() {
const examid = document.getElementById('examid').value.trim();
const bmid = document.getElementById('bmid').value.trim();
const userid = document.getElementById('userid').value.trim();
const aa = Date.now().toString();
lastAa = aa;
const cookieData = buildCookiesPayload('dsdm-message');
if (!examid || !bmid || !userid) {
showMessage('dsdm-message', '请先填写examid、bmid和userid', 'error');
return;
}
if (!cookieData) {
return;
}
showMessage('dsdm-message', '正在获取地区选项...', 'info');
fetch(API_BASE_URL + '/crawler/getDsdmOptions', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: `examid=${encodeURIComponent(examid)}&bmid=${encodeURIComponent(bmid)}&userid=${encodeURIComponent(userid)}&aa=${aa}&cookies=${encodeURIComponent(JSON.stringify(cookieData))}`
})
.then(response => response.json())
.then(data => {
if (data.code === 1) {
const select = document.getElementById('dsdm');
select.innerHTML = '<option value="">请选择</option>';
data.data.forEach(option => {
const opt = document.createElement('option');
opt.value = option.value;
opt.textContent = option.text;
select.appendChild(opt);
});
showMessage('dsdm-message', '获取地区选项成功,请选择地区', 'success');
} else {
showMessage('dsdm-message', data.msg || '获取失败', 'error');
}
})
.catch(error => {
showMessage('dsdm-message', '请求失败: ' + error.message, 'error');
});
}
// 获取职位代码列表
function getZwdmList() {
const dsdm = document.getElementById('dsdm').value;
const examid = document.getElementById('examid').value.trim();
const bmid = document.getElementById('bmid').value.trim();
const userid = document.getElementById('userid').value.trim();
const cookieData = buildCookiesPayload('zwdm-message');
const aa = lastAa || Date.now().toString();
if (!dsdm) {
showMessage('zwdm-message', '请先选择地区', 'error');
return;
}
if (!examid || !bmid || !userid) {
showMessage('zwdm-message', '请先填写examid、bmid和userid', 'error');
return;
}
if (!cookieData) {
return;
}
showMessage('zwdm-message', '正在获取职位代码列表...', 'info');
fetch(API_BASE_URL + '/crawler/getZwdmList', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: `dsdm=${encodeURIComponent(dsdm)}&examid=${encodeURIComponent(examid)}&bmid=${encodeURIComponent(bmid)}&userid=${encodeURIComponent(userid)}&aa=${encodeURIComponent(aa)}&cookies=${encodeURIComponent(JSON.stringify(cookieData))}`
})
.then(response => response.json())
.then(data => {
if (data.code === 1) {
const container = document.getElementById('zwdm-list');
container.innerHTML = '';
if (data.data.length === 0) {
container.innerHTML = '<div class="loading">未找到职位代码</div>';
showMessage('zwdm-message', '未找到职位代码', 'info');
return;
}
data.data.forEach(item => {
const div = document.createElement('div');
div.className = 'checkbox-item';
div.innerHTML = `
<label>
<input type="checkbox" value="${item.zwdm}" class="zwdm-checkbox">
${item.title}
</label>
`;
container.appendChild(div);
});
showMessage('zwdm-message', `获取成功,共找到 ${data.data.length} 个职位代码`, 'success');
} else {
showMessage('zwdm-message', data.msg || '获取失败', 'error');
}
})
.catch(error => {
showMessage('zwdm-message', '请求失败: ' + error.message, 'error');
});
}
// 组装Cookie数据
function buildCookiesPayload(messageContainerId) {
const jsessionid = document.getElementById('cookie-jsessionid').value.trim();
const jsessionid2 = document.getElementById('cookie-jsessionid2').value.trim();
const serverid = document.getElementById('cookie-serverid').value.trim();
if (!jsessionid || !serverid) {
showMessage(messageContainerId, '请填写JSESSIONID和SERVERID', 'error');
return null;
}
const cookies = { "请求 Cookie": {} };
// 支持双 JSESSIONID
cookies["请求 Cookie"]["JSESSIONID"] = jsessionid2 ? [jsessionid, jsessionid2] : jsessionid;
cookies["请求 Cookie"]["SERVERID"] = serverid;
return cookies;
}
// 全选/取消全选
function toggleAllZwdm() {
const selectAll = document.getElementById('select-all-zwdm').checked;
const checkboxes = document.querySelectorAll('.zwdm-checkbox');
checkboxes.forEach(checkbox => {
checkbox.checked = selectAll;
});
}
// 批量获取职位信息
function batchGetPositionInfo() {
const examid = document.getElementById('examid').value.trim();
const cookieData = buildCookiesPayload('result-message');
if (!examid) {
showMessage('result-message', '请先填写examid', 'error');
return;
}
if (!cookieData) {
return;
}
const checkboxes = document.querySelectorAll('.zwdm-checkbox:checked');
if (checkboxes.length === 0) {
showMessage('result-message', '请至少选择一个职位代码', 'error');
return;
}
const zwdmList = Array.from(checkboxes).map(cb => cb.value);
showMessage('result-message', `正在获取 ${zwdmList.length} 个职位的信息,请稍候...`, 'info');
fetch(API_BASE_URL + '/crawler/batchGetPositionInfo', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: `examid=${encodeURIComponent(examid)}&zwdm_list=${encodeURIComponent(JSON.stringify(zwdmList))}&cookies=${encodeURIComponent(JSON.stringify(cookieData))}`
})
.then(response => response.json())
.then(data => {
if (data.code === 1) {
lastResults = data.data;
displayResults(data.data);
showMessage('result-message', `成功获取 ${data.data.length} 条职位信息`, 'success');
} else {
showMessage('result-message', data.msg || '获取失败', 'error');
}
})
.catch(error => {
showMessage('result-message', '请求失败: ' + error.message, 'error');
});
}
// 显示结果表格
function displayResults(results) {
const tbody = document.getElementById('data-table-body');
tbody.innerHTML = '';
results.forEach(item => {
if (item.error) {
const tr = document.createElement('tr');
tr.innerHTML = `
<td colspan="8" style="color: red;">职位代码 ${item.zwdm}: ${item.error}</td>
`;
tbody.appendChild(tr);
} else {
const tr = document.createElement('tr');
tr.innerHTML = `
<td>${item.sbmc || ''}</td>
<td>${item.dsmc || ''}</td>
<td>${item.zpdwmc || ''}</td>
<td>${item.zwmc || ''}</td>
<td>${item.zwdm || ''}</td>
<td>${item.zprs || 0}</td>
<td>${item.bkrs || 0}</td>
<td>${item.competition_ratio || '0:0'}</td>
`;
tbody.appendChild(tr);
}
});
document.getElementById('result-table').style.display = 'block';
}
// 追加成功行
function appendResultRow(item) {
const tbody = document.getElementById('data-table-body');
const tr = document.createElement('tr');
tr.innerHTML = `
<td>${item.sbmc || ''}</td>
<td>${item.dsmc || ''}</td>
<td>${item.zpdwmc || ''}</td>
<td>${item.zwmc || ''}</td>
<td>${item.zwdm || ''}</td>
<td>${item.zprs || 0}</td>
<td>${item.bkrs || 0}</td>
<td>${item.competition_ratio || '0.00'}</td>
`;
tbody.appendChild(tr);
document.getElementById('result-table').style.display = 'block';
}
// 追加失败行
function appendErrorRow(code, msg) {
const tbody = document.getElementById('data-table-body');
const tr = document.createElement('tr');
tr.innerHTML = `<td colspan="8" style="color:red;">职位代码 ${code}: ${msg}</td>`;
tbody.appendChild(tr);
document.getElementById('result-table').style.display = 'block';
}
// 自动抓取全部职位代码并逐条获取详情(流式展示,避免超时)
async function fetchAllPositions() {
const dsdm = document.getElementById('dsdm').value;
const examid = document.getElementById('examid').value.trim();
const bmid = document.getElementById('bmid').value.trim();
const userid = document.getElementById('userid').value.trim();
const cookieData = buildCookiesPayload('result-message');
const aa = lastAa;
if (!dsdm) {
showMessage('dsdm-message', '请先选择地区', 'error');
return;
}
if (!examid || !bmid || !userid) {
showMessage('dsdm-message', '请先填写examid、bmid和userid', 'error');
return;
}
if (!aa) {
showMessage('result-message', '请先点击“获取地区选项”生成aa', 'error');
return;
}
if (!cookieData) {
return;
}
// 开始爬取,禁用导出按钮
isCrawling = true;
document.getElementById('export-btn').disabled = true;
document.getElementById('export-btn').textContent = '爬取中...';
// 清空旧数据
lastResults = [];
const tbody = document.getElementById('data-table-body');
tbody.innerHTML = '';
document.getElementById('result-table').style.display = 'block';
showMessage('result-message', '正在获取职位代码列表...', 'info');
// 获取全部职位代码
const listResp = await fetch('/crawler/getZwdmList', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: `dsdm=${encodeURIComponent(dsdm)}&examid=${encodeURIComponent(examid)}&bmid=${encodeURIComponent(bmid)}&userid=${encodeURIComponent(userid)}&aa=${encodeURIComponent(aa)}&cookies=${encodeURIComponent(JSON.stringify(cookieData))}`
}).then(r => r.json()).catch(e => ({ code: 0, msg: e.message }));
if (listResp.code !== 1 || !Array.isArray(listResp.data) || listResp.data.length === 0) {
showMessage('result-message', listResp.msg || '未获取到职位代码', 'error');
// 爬取失败,恢复按钮状态
isCrawling = false;
document.getElementById('export-btn').disabled = false;
document.getElementById('export-btn').textContent = '导出CSV';
return;
}
// 过滤掉152开头的职位代码
const codes = listResp.data
.map(it => it.zwdm)
.filter(code => !code.startsWith('152'));
showMessage('result-message', `${codes.length} 个职位已跳过152开头开始逐条获取...`, 'info');
// 逐条获取职位详情
for (let i = 0; i < codes.length; i++) {
const code = codes[i];
const infoResp = await fetch(API_BASE_URL + '/crawler/getPositionInfo', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: `zwdm=${encodeURIComponent(code)}&examid=${encodeURIComponent(examid)}&cookies=${encodeURIComponent(JSON.stringify(cookieData))}`
}).then(r => r.json()).catch(e => ({ code: 0, msg: e.message }));
if (infoResp.code === 1 && infoResp.data) {
const item = infoResp.data;
lastResults.push(item);
appendResultRow(item);
} else {
appendErrorRow(code, infoResp.msg || '获取失败');
}
showMessage('result-message', `进度:${i + 1}/${codes.length}`, 'info');
}
showMessage('result-message', `完成,共 ${lastResults.length} 条成功,失败 ${codes.length - lastResults.length}`, 'success');
// 爬取完成,启用导出按钮
isCrawling = false;
document.getElementById('export-btn').disabled = false;
document.getElementById('export-btn').textContent = '导出CSV';
}
// 导出CSV
function exportCsv() {
if (isCrawling) {
showMessage('result-message', '爬取进行中,请等待完成后再导出', 'error');
return;
}
if (!lastResults || lastResults.length === 0) {
showMessage('result-message', '暂无数据可导出', 'error');
return;
}
const headers = ['省份','地区','招聘单位/用人司局','职位名称','职位代码','招聘人数','审核通过人数','竞争比'];
const lines = [headers.join(',')];
lastResults.forEach(item => {
const row = [
item.sbmc || '',
item.dsmc || '',
item.zpdwmc || '',
item.zwmc || '',
item.zwdm || '',
item.zprs || 0,
item.bkrs || 0,
item.competition_ratio || '0:0'
];
lines.push(row.map(v => `"${String(v).replace(/"/g, '""')}"`).join(','));
});
const blob = new Blob([lines.join('\n')], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
link.download = 'positions.csv';
link.click();
URL.revokeObjectURL(url);
showMessage('result-message', '已导出CSV', 'success');
}
// 显示消息
function showMessage(containerId, message, type) {
const container = document.getElementById(containerId);
container.innerHTML = `<div class="message ${type}">${message}</div>`;
}
</script>
</body>
</html>