抓取懂车帝车型数据
抓这个数据的原因有两个
- 做数字钥匙相关的业务需要建立一个标准车型库
- 对接的是三方接口,每拉取一次要花费100多块钱
所以为了节省成本,以及最大程度的确保数据的准确性,选择抓取懂车帝数据。

排查思路
懂帝车型数据
- 是否通过
ajax来获取的数据 - 是否拿到数据之后存在了
localStorage中
localStorage

很显然没有,压根找不到车型的一条数据。
Ajax 来获取数据
找了一圈发现也没有完整的数据 
模拟用户点击获取数据
通过找规律发现
- 用户点完选择完品牌,会立马把品牌列表清空,
- 当前输入框下拉框出现车系选择框
- 然后才能点击车型选择框。
具体实现
STEP1找到品牌+车系输入框

STEP2模拟用户点击品牌、车系
js
let brandIndex = 0;
let SLEEP_TIME = 200
const brandInput = document.querySelectorAll(
".tw-relative.car-selector_selector__2E02F",
)[0];
brandInput.click(); // 调出来第一个面板
await sleep(SLEEP_TIME); // 等待一段时间,等车系选择框出现
const brandlistLength = document.querySelectorAll(
".tool-tip_wrapper__1lKlz .jsx-2401233222.brand-list",
)[0];
if (!brandlistLength) {
await sleep(SLEEP_TIME * 2);
} // 等待品牌列表出现完成
// ----------------分割线----------------
const brandList = document
.querySelectorAll(".tool-tip_wrapper__1lKlz .jsx-2401233222.brand-list")[0]
.querySelectorAll(".jsx-2401233222.brand");
// 模拟品牌点击
const brandDomObj = brandList[brandIndex];
if (!brandDomObj) {
await saveJson(vehicleDatabase);
return;
}
brandList[brandIndex].click(); // 模拟品牌点击
await sleep(SLEEP_TIME);
while (
document
.querySelector(".tool-tip_wrapper__1lKlz .jsx-232230372.series-selector")
?.textContent?.includes("加载中")
) {
await sleep(SLEEP_TIME); // 等待一段时间,等车系列表出现完成
}STEP3模拟用户点击车型
js
let SLEEP_TIME = 200
const mockModelInput = document.querySelectorAll(
".tw-relative.car-selector_selector__2E02F",
)[1];
mockModelInput.click();
await sleep(SLEEP_TIME);
// 获取已填入的品牌+车型
const brandSeries = document
.querySelectorAll(".tw-relative.car-selector_selector__2E02F")[0]
.querySelector("input").value;
vehicleDatabase[brandSeries] = [];
// 获取车型列表
const modelList = document
.querySelectorAll(".tool-tip_wrapper__1lKlz")[1]
.querySelectorAll("li");
while (
document
.querySelectorAll(".tool-tip_wrapper__1lKlz")[1]
.innerText.includes("加载中")
) {
await sleep(SLEEP_TIME);
}
for (let i = 0; i < modelList.length; i++) {
// modelList[i].click()
const modelNamePrice = modelList[i].innerText.replace(/\n/g, "/");
vehicleDatabase[brandSeries].push(modelNamePrice);
}
console.log(Object.keys(vehicleDatabase).length);STEP4保存数据到文件(会导致浏览器崩溃)
js
const json = JSON.stringify(data, null, 2);
const blob = new Blob([json], { type: "application/json" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = "vehicle-model-data.json";
a.click();上述代码是跑在 chrome 控制台上,最后爬取下来的数据都会存到一个变量里,所以第一次抓取的时候导致浏览器直接崩溃了。

使用 HTTP 请求来保存数据
server.js 用来保存数据到本地起的服务
js
const http = require('http');
const fs = require('fs');
const path = require('path');
const PORT = 3000;
const DATA_DIR = path.join(__dirname, 'data');
const sanitizeFilename = (name) =>
String(name).replace(/[\\/:*?"<>|/]/g, "_").slice(0, 120);
// 确保数据目录存在
if (!fs.existsSync(DATA_DIR)) {
fs.mkdirSync(DATA_DIR, { recursive: true });
}
const server = http.createServer((req, res) => {
if (req.method === 'POST' && req.url === '/save-data') {
let body = '';
req.on('data', chunk => {
body += chunk.toString();
});
req.on('end', () => {
try {
const data = JSON.parse(body);
const firstKeyRaw = Object.keys(data)[0];
if (!firstKeyRaw) {
res.writeHead(400, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'No data keys found' }));
return;
}
const filename = `${sanitizeFilename(firstKeyRaw)}.json`;
const filepath = path.join(DATA_DIR, filename);
const dir = path.dirname(filepath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
// 保存数据到文件
fs.writeFileSync(filepath, JSON.stringify(data, null, 2));
console.log(`Data saved to ${filename}`);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
success: true,
filename: filename,
message: `Data saved to ${filename}`
}));
} catch (error) {
console.error('Error processing data:', error);
console.error('Raw body:', body);
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'Failed to process data', message: error.message, raw: body }));
}
});
} else {
res.writeHead(404, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: 'Not found' }));
}
});
server.listen(PORT, () => {
console.log(`Server running at http://10.171.211.77:${PORT}`);
console.log(`Data will be saved to: ${DATA_DIR}`);
});
module.exports = server;index.js 模拟用户完整的操作
js
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
let vehicleDatabase = {};
const SLEEP_TIME = 200;
let brandIndex = 566;
let seriesIndex = 0;
const saveJson = async (data) => {
// 原有的本地保存逻辑
// const json = JSON.stringify(data, null, 2);
// const blob = new Blob([json], { type: "application/json" });
// const url = URL.createObjectURL(blob);
// const a = document.createElement("a");
// a.href = url;
// a.download = "vehicle-model-data.json";
// a.click();
try {
const payload = JSON.stringify(data);
const response = await fetch('http://10.171.211.77:3000/save-data', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: payload
});
const text = await response.text(); // 先拿纯文本,避免直接 parse 炸掉
let result;
try {
result = JSON.parse(text);
} catch (e) {
console.error('后端返回非 JSON:', text);
throw new Error(`后端返回非 JSON: ${text}`);
}
if (response.ok) {
console.log('数据已发送到云端:', result.message || result);
} else {
console.error('发送到云端失败:', response.status, result);
}
} catch (error) {
console.error('发送到云端出错:', error);
}
};
const task = async () => {
// tw-relative car-selector_selector__2E02F
// - 第一个 品牌/车系
// - 第二盒 车型
const brandInput = document.querySelectorAll(
".tw-relative.car-selector_selector__2E02F",
)[0];
brandInput.click(); // 调出来第一个面板
// tool-tip_wrapper__1lKlz
// - 第一个是品牌+车系的弹窗
// - 第二个是车型的弹窗
// document.querySelectorAll(".tool-tip_wrapper__1lKlz")[0].click(); // 调出来第一个弹窗
await sleep(SLEEP_TIME);
// if(brandIndex > 0) return
// return
// 获取品牌列表
const brandlistLength = document.querySelectorAll(
".tool-tip_wrapper__1lKlz .jsx-2401233222.brand-list",
)[0];
if (!brandlistLength) {
await sleep(SLEEP_TIME * 2);
}
const brandList = document
.querySelectorAll(".tool-tip_wrapper__1lKlz .jsx-2401233222.brand-list")[0]
.querySelectorAll(".jsx-2401233222.brand");
// 模拟品牌点击
const brandDomObj = brandList[brandIndex];
if (!brandDomObj) {
await saveJson(vehicleDatabase);
return;
}
brandList[brandIndex].click();
await sleep(SLEEP_TIME);
while (
document
.querySelector(".tool-tip_wrapper__1lKlz .jsx-232230372.series-selector")
?.textContent?.includes("加载中")
) {
await sleep(SLEEP_TIME);
}
// 品牌下有多少的车系
const seriesList = document.querySelectorAll(
".tool-tip_wrapper__1lKlz .jsx-232230372.series-selector p.jsx-232230372",
);
// const seriesLength = seriesList.length
const seriesObj = seriesList[seriesIndex];
if (seriesObj) {
seriesList[seriesIndex].click(); // 模拟车系车系点击
seriesIndex++;
} else {
// 挡车系不存在的时候,切换品牌
brandIndex++;
seriesIndex = 0;
seriesList[seriesIndex].click()
console.log(seriesIndex, vehicleDatabase);
await saveJson(vehicleDatabase);
vehicleDatabase = {}
return task();
}
await sleep(SLEEP_TIME);
// 模拟点击车型
const mockModelInput = document.querySelectorAll(
".tw-relative.car-selector_selector__2E02F",
)[1];
mockModelInput.click();
await sleep(SLEEP_TIME);
// 获取已填入的品牌+车型
const brandSeries = document
.querySelectorAll(".tw-relative.car-selector_selector__2E02F")[0]
.querySelector("input").value;
vehicleDatabase[brandSeries] = [];
// 获取车型列表
const modelList = document
.querySelectorAll(".tool-tip_wrapper__1lKlz")[1]
.querySelectorAll("li");
while (
document
.querySelectorAll(".tool-tip_wrapper__1lKlz")[1]
.innerText.includes("加载中")
) {
await sleep(SLEEP_TIME);
}
for (let i = 0; i < modelList.length; i++) {
// modelList[i].click()
const modelNamePrice = modelList[i].innerText.replace(/\n/g, "/");
vehicleDatabase[brandSeries].push(modelNamePrice);
}
console.log(Object.keys(vehicleDatabase).length);
task();
};
task();执行之后,不出意外就报错了,触发了浏览器的CSP策略。 
解决方案
方案一:通过修改浏览器CSP + mix content来解决
- 关闭浏览器mix content策略
text
open -na "Google Chrome" --args \
--user-data-dir=/tmp/chrome-insecure-profile \
--disable-web-security \
--allow-running-insecure-content- 修改浏览器网站的CSP策略,修改 文件 content
STEP1 打开浏览器的开发者工具,新建文件夹

STEP2 搜索 Content-Security-Polic关键词,删除 (ps: 当文件真正加载到本地,再去删除这段代码是无效的,由于浏览器已将CSP规则加载至渲染进程内存,通过开发者工具手动删除HTML中的meta标签无法撤销已生效的安全策略。为绕过限制,使用--disable-web-security启动浏览器,或利用Charles/Fiddler代理技术。)

STEP3 将代码写到控制到测试,一把过

方案二:通过正向代理来解决
做一个代理服务,将 懂车帝 所有资源都代理到本地
proxy.js
js
const http = require("http");
const https = require("https");
const { URL } = require("url");
const PORT = 8089;
const API_TARGET_HOST = "10.171.211.77";
const API_TARGET_PORT = 3000;
let lastTargetOrigin = null;
const HOP_BY_HOP_HEADERS = new Set([
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
]);
const removeCspHeaders = (headers) => {
const next = { ...headers };
delete next["content-security-policy"];
delete next["content-security-policy-report-only"];
return next;
};
const parseCookies = (cookieHeader) => {
const out = {};
const raw = String(cookieHeader || "");
if (!raw) return out;
for (const part of raw.split(";")) {
const idx = part.indexOf("=");
if (idx === -1) continue;
const key = part.slice(0, idx).trim();
const value = part.slice(idx + 1).trim();
if (!key) continue;
out[key] = value;
}
return out;
};
const appendSetCookie = (headers, cookie) => {
const existing = headers["set-cookie"];
if (!existing) {
headers["set-cookie"] = [cookie];
return;
}
if (Array.isArray(existing)) {
headers["set-cookie"] = [...existing, cookie];
return;
}
headers["set-cookie"] = [existing, cookie];
};
const filterRequestHeaders = (headers) => {
const next = {};
for (const [key, value] of Object.entries(headers || {})) {
const lower = key.toLowerCase();
if (HOP_BY_HOP_HEADERS.has(lower) || lower === "host") continue;
next[key] = value;
}
return next;
};
const filterResponseHeaders = (headers) => {
const next = {};
for (const [key, value] of Object.entries(headers || {})) {
if (HOP_BY_HOP_HEADERS.has(String(key).toLowerCase())) continue;
next[key] = value;
}
return next;
};
const getTargetBaseFromReferer = (referer, port) => {
if (!referer) return null;
try {
const refUrl = new URL(referer);
if (refUrl.hostname !== "localhost") return null;
if (String(refUrl.port || (refUrl.protocol === "https:" ? 443 : 80)) !== String(port)) return null;
return parseTargetUrl(refUrl.pathname);
} catch {
return null;
}
};
const rewriteLocationHeader = (location, currentTargetBase, port) => {
if (!location || !currentTargetBase) return location;
try {
const absolute = new URL(location, currentTargetBase.origin);
if (absolute.origin !== currentTargetBase.origin) return location;
return `http://localhost:${port}/${absolute.href}`;
} catch {
return location;
}
};
const sendJson = (res, statusCode, payload) => {
res.writeHead(statusCode, { "Content-Type": "application/json; charset=utf-8" });
res.end(JSON.stringify(payload));
};
const forwardToSaveServer = (clientReq, clientRes) => {
const options = {
hostname: API_TARGET_HOST,
port: API_TARGET_PORT,
path: "/save-data",
method: "POST",
headers: {
...filterRequestHeaders(clientReq.headers),
host: `${API_TARGET_HOST}:${API_TARGET_PORT}`,
},
};
const upstreamReq = http.request(options, (upstreamRes) => {
const headers = filterResponseHeaders(upstreamRes.headers);
clientRes.writeHead(upstreamRes.statusCode || 500, headers);
upstreamRes.pipe(clientRes);
});
upstreamReq.on("error", (error) => {
sendJson(clientRes, 502, { error: `save-data upstream error: ${error.message}` });
});
clientReq.pipe(upstreamReq);
};
const parseTargetUrl = (urlPath) => {
const raw = String(urlPath || "/").replace(/^\/+/, "");
if (!raw) return null;
if (!/^https?:\/\//i.test(raw)) return null;
try {
return new URL(raw);
} catch {
return null;
}
};
const proxyWebPage = (clientReq, clientRes, targetUrl) => {
lastTargetOrigin = targetUrl.origin;
const requester = targetUrl.protocol === "https:" ? https : http;
const options = {
protocol: targetUrl.protocol,
hostname: targetUrl.hostname,
port: targetUrl.port || (targetUrl.protocol === "https:" ? 443 : 80),
method: clientReq.method,
path: `${targetUrl.pathname}${targetUrl.search}`,
headers: {
...filterRequestHeaders(clientReq.headers),
host: targetUrl.host,
},
};
const upstreamReq = requester.request(options, (upstreamRes) => {
const stripped = removeCspHeaders(upstreamRes.headers);
const headers = filterResponseHeaders(stripped);
if (headers.location) {
const targetBase = new URL(targetUrl.origin);
headers.location = rewriteLocationHeader(headers.location, targetBase, PORT);
}
appendSetCookie(
headers,
`proxy_target=${encodeURIComponent(targetUrl.origin)}; Path=/; SameSite=Lax`,
);
headers["access-control-allow-origin"] = "*";
clientRes.writeHead(upstreamRes.statusCode || 500, headers);
upstreamRes.pipe(clientRes);
});
upstreamReq.on("error", (error) => {
sendJson(clientRes, 502, { error: `proxy upstream error: ${error.message}` });
});
clientReq.pipe(upstreamReq);
};
const server = http.createServer((req, res) => {
if (req.method === "OPTIONS") {
res.writeHead(204, {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "GET,POST,OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization",
});
res.end();
return;
}
if (req.url === "/") {
sendJson(res, 200, {
usage: [
"打开目标网页: http://localhost:8089/https://example.com/path?query=1",
"页面内上报接口: POST /save-data (会转发到 10.171.211.77:3000/save-data)",
],
});
return;
}
if (req.url === "/save-data" && req.method === "POST") {
forwardToSaveServer(req, res);
return;
}
let targetUrl = parseTargetUrl(req.url);
if (!targetUrl) {
const base = getTargetBaseFromReferer(req.headers.referer, PORT);
if (base) {
try {
targetUrl = new URL(req.url, base.origin);
} catch {
targetUrl = null;
}
}
}
if (!targetUrl) {
const cookies = parseCookies(req.headers.cookie);
if (cookies.proxy_target) {
try {
const origin = decodeURIComponent(cookies.proxy_target);
targetUrl = new URL(req.url, origin);
} catch {
targetUrl = null;
}
}
}
if (!targetUrl && lastTargetOrigin) {
try {
targetUrl = new URL(req.url, lastTargetOrigin);
} catch {
targetUrl = null;
}
}
if (!targetUrl) {
sendJson(res, 400, {
error: "invalid target url. open page via /https://your-target-url first",
hint: `example: http://localhost:${PORT}/https://example.com/path`,
});
return;
}
proxyWebPage(req, res, targetUrl);
});
server.listen(PORT, () => {
console.log(`Proxy running at http://localhost:${PORT}`);
console.log("Open page via: http://localhost:8089/https://目标网址");
});这样就可以通过正向代理来解决CSP和mix content策略问题了。
方案三,抓取懂车帝接口
我擦,这个是后来才知道,shit 💩~