175 lines
5.0 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

export const runtime = 'nodejs'
import type { BrowserContext, Response } from 'playwright';
export async function safeJson<T>(res: Response): Promise<T | null> {
const ctype = res.headers()['content-type'] || '';
if (ctype.includes('application/json')) {
return (await res.json()) as T;
}
const t = await res.text();
try {
return JSON.parse(t) as T;
} catch {
return null;
}
}
/**
* 使用 Playwright 的 APIRequestContext 下载二进制内容
* - 使用指定 headers 模拟浏览器请求
* - referrer 使用链接本身
*/
export async function downloadBinary(
context: BrowserContext,
url: string,
): Promise<{ buffer: Buffer; contentType: string; ext: string }> {
console.log('下载:', url);
const headers = {
referer: 'https://www.douyin.com/',
} as Record<string, string>;
const res = await context.request.get(url, {
headers,
maxRedirects: 3,
timeout: 240_000,
failOnStatusCode: true,
});
if (!res.ok()) {
throw new Error(`下载内容失败: ${res.status()} ${res.statusText()}`);
}
const buffer = await res.body();
const contentType = res.headers()['content-type'] || 'application/octet-stream';
const ext = (contentType.split('/')[1] || 'bin').split(';')[0] || 'bin';
return { buffer, contentType, ext };
}
/**
* 在多个候选匹配中“先到先得”地返回首个命中的 Response。
* - 不为每个候选单独设长超时,改用整体兜底超时,避免无意义等待。
*/
export function waitForFirstResponse(
context: BrowserContext,
candidates: { key: string; test: (r: Response) => boolean }[],
timeoutMs = 20_000
): Promise<{ key: string; response: Response } | null> {
return new Promise((resolve) => {
let resolved = false;
let timer: NodeJS.Timeout | undefined;
const handler = (res: Response) => {
if (resolved) return;
for (const c of candidates) {
try {
if (c.test(res)) {
resolved = true;
cleanup();
resolve({ key: c.key, response: res });
return;
}
} catch {
// ignore predicate errors
}
}
};
const cleanup = () => {
context.off('response', handler);
if (timer) clearTimeout(timer);
};
context.on('response', handler);
if (timeoutMs > 0) {
timer = setTimeout(() => {
if (!resolved) {
resolved = true;
cleanup();
resolve(null);
}
}, timeoutMs);
}
});
}
/**
* 在指定时间内持续收集所有符合条件的 Response
* 用于评论等需要滚动加载的数据
*/
export function collectResponsesWithinTime(
context: BrowserContext,
predicate: (r: Response) => boolean,
durationMs: number
): Promise<Response[]> {
return new Promise((resolve) => {
const collected: Response[] = [];
const seenUrls = new Set<string>();
let timer: NodeJS.Timeout | undefined;
const handler = (res: Response) => {
try {
if (predicate(res)) {
// 使用 URL 去重,避免重复收集同一个请求
const url = res.url();
if (!seenUrls.has(url)) {
seenUrls.add(url);
collected.push(res);
}
}
} catch {
// ignore predicate errors
}
};
const cleanup = () => {
context.off('response', handler);
if (timer) clearTimeout(timer);
};
context.on('response', handler);
timer = setTimeout(() => {
cleanup();
resolve(collected);
}, durationMs);
});
}
/**
* 等待符合条件的单个 Response带短超时用于评论等"可有可无"的数据。
*/
export function waitForResponseWithTimeout(
context: BrowserContext,
predicate: (r: Response) => boolean,
timeoutMs = 5_000
): Promise<Response> {
return new Promise<Response>((resolve, reject) => {
let timer: NodeJS.Timeout | undefined;
const handler = (res: Response) => {
try {
if (predicate(res)) {
cleanup();
resolve(res);
}
} catch {
// ignore predicate errors
}
};
const cleanup = () => {
context.off('response', handler);
if (timer) clearTimeout(timer);
};
context.on('response', handler);
if (timeoutMs > 0) {
timer = setTimeout(() => {
cleanup();
reject(new Error('timeout'));
}, timeoutMs);
}
});
}