175 lines
5.0 KiB
TypeScript
175 lines
5.0 KiB
TypeScript
export const runtime = 'nodejs'
|
||
|
||
import type { BrowserContext, Response } from 'playwright';
|
||
|
||
export async function safeJson<T>(res: Response): Promise<T | null> {
|
||
const ctype = res.headers()['content-type'] || '';
|
||
if (ctype.includes('application/json')) {
|
||
return (await res.json()) as T;
|
||
}
|
||
const t = await res.text();
|
||
try {
|
||
return JSON.parse(t) as T;
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 使用 Playwright 的 APIRequestContext 下载二进制内容
|
||
* - 使用指定 headers 模拟浏览器请求
|
||
* - referrer 使用链接本身
|
||
*/
|
||
export async function downloadBinary(
|
||
context: BrowserContext,
|
||
url: string,
|
||
): Promise<{ buffer: Buffer; contentType: string; ext: string }> {
|
||
console.log('下载:', url);
|
||
|
||
const headers = {
|
||
referer: 'https://www.douyin.com/',
|
||
} as Record<string, string>;
|
||
|
||
const res = await context.request.get(url, {
|
||
headers,
|
||
maxRedirects: 3,
|
||
timeout: 240_000,
|
||
failOnStatusCode: true,
|
||
});
|
||
|
||
if (!res.ok()) {
|
||
throw new Error(`下载内容失败: ${res.status()} ${res.statusText()}`);
|
||
}
|
||
|
||
const buffer = await res.body();
|
||
const contentType = res.headers()['content-type'] || 'application/octet-stream';
|
||
const ext = (contentType.split('/')[1] || 'bin').split(';')[0] || 'bin';
|
||
return { buffer, contentType, ext };
|
||
}
|
||
|
||
/**
|
||
* 在多个候选匹配中“先到先得”地返回首个命中的 Response。
|
||
* - 不为每个候选单独设长超时,改用整体兜底超时,避免无意义等待。
|
||
*/
|
||
export function waitForFirstResponse(
|
||
context: BrowserContext,
|
||
candidates: { key: string; test: (r: Response) => boolean }[],
|
||
timeoutMs = 20_000
|
||
): Promise<{ key: string; response: Response } | null> {
|
||
return new Promise((resolve) => {
|
||
let resolved = false;
|
||
let timer: NodeJS.Timeout | undefined;
|
||
|
||
const handler = (res: Response) => {
|
||
if (resolved) return;
|
||
for (const c of candidates) {
|
||
try {
|
||
if (c.test(res)) {
|
||
resolved = true;
|
||
cleanup();
|
||
resolve({ key: c.key, response: res });
|
||
return;
|
||
}
|
||
} catch {
|
||
// ignore predicate errors
|
||
}
|
||
}
|
||
};
|
||
|
||
const cleanup = () => {
|
||
context.off('response', handler);
|
||
if (timer) clearTimeout(timer);
|
||
};
|
||
|
||
context.on('response', handler);
|
||
if (timeoutMs > 0) {
|
||
timer = setTimeout(() => {
|
||
if (!resolved) {
|
||
resolved = true;
|
||
cleanup();
|
||
resolve(null);
|
||
}
|
||
}, timeoutMs);
|
||
}
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 在指定时间内持续收集所有符合条件的 Response
|
||
* 用于评论等需要滚动加载的数据
|
||
*/
|
||
export function collectResponsesWithinTime(
|
||
context: BrowserContext,
|
||
predicate: (r: Response) => boolean,
|
||
durationMs: number
|
||
): Promise<Response[]> {
|
||
return new Promise((resolve) => {
|
||
const collected: Response[] = [];
|
||
const seenUrls = new Set<string>();
|
||
let timer: NodeJS.Timeout | undefined;
|
||
|
||
const handler = (res: Response) => {
|
||
try {
|
||
if (predicate(res)) {
|
||
// 使用 URL 去重,避免重复收集同一个请求
|
||
const url = res.url();
|
||
if (!seenUrls.has(url)) {
|
||
seenUrls.add(url);
|
||
collected.push(res);
|
||
}
|
||
}
|
||
} catch {
|
||
// ignore predicate errors
|
||
}
|
||
};
|
||
|
||
const cleanup = () => {
|
||
context.off('response', handler);
|
||
if (timer) clearTimeout(timer);
|
||
};
|
||
|
||
context.on('response', handler);
|
||
timer = setTimeout(() => {
|
||
cleanup();
|
||
resolve(collected);
|
||
}, durationMs);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 等待符合条件的单个 Response,带短超时;用于评论等"可有可无"的数据。
|
||
*/
|
||
export function waitForResponseWithTimeout(
|
||
context: BrowserContext,
|
||
predicate: (r: Response) => boolean,
|
||
timeoutMs = 5_000
|
||
): Promise<Response> {
|
||
return new Promise<Response>((resolve, reject) => {
|
||
let timer: NodeJS.Timeout | undefined;
|
||
|
||
const handler = (res: Response) => {
|
||
try {
|
||
if (predicate(res)) {
|
||
cleanup();
|
||
resolve(res);
|
||
}
|
||
} catch {
|
||
// ignore predicate errors
|
||
}
|
||
};
|
||
|
||
const cleanup = () => {
|
||
context.off('response', handler);
|
||
if (timer) clearTimeout(timer);
|
||
};
|
||
|
||
context.on('response', handler);
|
||
if (timeoutMs > 0) {
|
||
timer = setTimeout(() => {
|
||
cleanup();
|
||
reject(new Error('timeout'));
|
||
}, timeoutMs);
|
||
}
|
||
});
|
||
}
|