douyin-archive/fix-asset-urls.ts

133 lines
4.5 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// scripts/fix-asset-urls.ts
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
const FROM = 'douyin-archive/';
const TO = '';
function escapeForPgRegex(s: string) {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
const FROM_RE = `^${escapeForPgRegex(FROM)}`; // 只替换“以旧前缀开头”的字符串
const dryRun = false; // true: 只统计,不修改
async function main() {
if (dryRun) {
const rows = await prisma.$queryRawUnsafe<any[]>(`
WITH c AS (
SELECT 'Author.avatar_url' AS col, COUNT(*) AS n FROM "Author" WHERE avatar_url LIKE '${FROM}%'
UNION ALL
SELECT 'CommentUser.avatar_url' , COUNT(*) FROM "CommentUser" WHERE avatar_url LIKE '${FROM}%'
UNION ALL
SELECT 'CommentImage.url' , COUNT(*) FROM "CommentImage" WHERE url LIKE '${FROM}%'
UNION ALL
SELECT 'Video.cover_url' , COUNT(*) FROM "Video" WHERE cover_url LIKE '${FROM}%'
UNION ALL
SELECT 'Video.video_url' , COUNT(*) FROM "Video" WHERE video_url LIKE '${FROM}%'
UNION ALL
SELECT 'ImageFile.url' , COUNT(*) FROM "ImageFile" WHERE url LIKE '${FROM}%'
UNION ALL
SELECT 'ImageFile.animated' , COUNT(*) FROM "ImageFile" WHERE animated LIKE '${FROM}%'
UNION ALL
SELECT 'ImagePost.music_url' , COUNT(*) FROM "ImagePost" WHERE music_url LIKE '${FROM}%'
UNION ALL
SELECT 'Video.raw_json' , COUNT(*) FROM "Video" WHERE raw_json::text LIKE '%${FROM}%'
UNION ALL
SELECT 'ImagePost.raw_json' , COUNT(*) FROM "ImagePost" WHERE raw_json::text LIKE '%${FROM}%'
)
SELECT * FROM c ORDER BY col;
`);
console.table(rows);
return;
}
await prisma.$transaction(async (tx) => {
// CommentUser.avatar_url
await tx.$executeRawUnsafe(`
UPDATE "CommentUser"
SET avatar_url = regexp_replace(avatar_url, '${FROM_RE}', '${TO}')
WHERE avatar_url LIKE '${FROM}%'
`);
// CommentImage.url
await tx.$executeRawUnsafe(`
UPDATE "CommentImage"
SET url = regexp_replace(url, '${FROM_RE}', '${TO}')
WHERE url LIKE '${FROM}%'
`);
// Author.avatar_url
await tx.$executeRawUnsafe(`
UPDATE "Author"
SET avatar_url = regexp_replace(avatar_url, '${FROM_RE}', '${TO}')
WHERE avatar_url LIKE '${FROM}%'
`);
// Video.cover_url
await tx.$executeRawUnsafe(`
UPDATE "Video"
SET cover_url = regexp_replace(cover_url, '${FROM_RE}', '${TO}')
WHERE cover_url LIKE '${FROM}%'
`);
// Video.video_url
await tx.$executeRawUnsafe(`
UPDATE "Video"
SET video_url = regexp_replace(video_url, '${FROM_RE}', '${TO}')
WHERE video_url LIKE '${FROM}%'
`);
// ImageFile.url
await tx.$executeRawUnsafe(`
UPDATE "ImageFile"
SET url = regexp_replace(url, '${FROM_RE}', '${TO}')
WHERE url LIKE '${FROM}%'
`);
// ImageFile.animated
await tx.$executeRawUnsafe(`
UPDATE "ImageFile"
SET animated = regexp_replace(animated, '${FROM_RE}', '${TO}')
WHERE animated LIKE '${FROM}%'
`);
// ImagePost.music_url
await tx.$executeRawUnsafe(`
UPDATE "ImagePost"
SET music_url = regexp_replace(music_url, '${FROM_RE}', '${TO}')
WHERE music_url LIKE '${FROM}%'
`);
// 可选raw_json简单文本整体替换若想更精细可用递归 JSONB 方法
await tx.$executeRawUnsafe(`
UPDATE "Video"
SET raw_json = to_jsonb(replace(raw_json::text, '${FROM}', '${TO}'))
WHERE raw_json::text LIKE '%${FROM}%'
`);
await tx.$executeRawUnsafe(`
UPDATE "ImagePost"
SET raw_json = to_jsonb(replace(raw_json::text, '${FROM}', '${TO}'))
WHERE raw_json::text LIKE '%${FROM}%'
`);
});
// 快速抽样
const sample = await prisma.$queryRawUnsafe<any[]>(`
(
SELECT 'CommentImage.url' AS col, url AS v FROM "CommentImage" WHERE url LIKE '${TO}%' LIMIT 2
) UNION ALL (
SELECT 'CommentUser.avatar_url', avatar_url FROM "CommentUser" WHERE avatar_url LIKE '${TO}%' LIMIT 2
) UNION ALL (
SELECT 'Video.video_url', video_url FROM "Video" WHERE video_url LIKE '${TO}%' LIMIT 2
)
`);
console.log('Sample after update:', sample);
}
main().catch((e) => {
console.error(e);
process.exit(1);
}).finally(async () => {
await prisma.$disconnect();
});