diff --git a/package.json b/package.json index f4d5683..cbb9c75 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,7 @@ "main": "index.js", "scripts": { "dev": "npx ts-node src/index.ts", + "migrate": "npx ts-node src/mannual/migrate.ts", "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], diff --git a/src/index.ts b/src/index.ts index 96591f2..ac25705 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,13 +7,25 @@ export type FetchType = { const fetchList = [ { name: 'AI写作工具', - typeId: '67908fc33de33b392c0330af', + typeId: '6790aae23de33b392c0330b2', url: 'https://ai-bot.cn/favorites/ai-writing-tools/' }, { - name: '', - typeId: '' - } + name: 'AI图像网站', + typeId: '67908fc33de33b392c0330af', + url: 'https://ai-bot.cn/favorites/best-ai-image-tools/' + }, + { + name: 'AI视频网站', + typeId: '67b6f0b7b139d1d6aa14cd06', + url: 'https://ai-bot.cn/favorites/ai-video-tools/' + }, + { + name: 'AI音频网站', + typeId: '6791a98fc058e55ed0a094ca', + url: 'https://ai-bot.cn/favorites/ai-audio-tools/' + }, + ] function main() { console.log("Hello, this is the main function!"); diff --git a/src/lib/mongodb.ts b/src/lib/mongodb.ts index 54b8495..14d7754 100644 --- a/src/lib/mongodb.ts +++ b/src/lib/mongodb.ts @@ -23,8 +23,8 @@ if (process.env.NODE_ENV === 'development') { clientPromise = client.connect(); } -export const getDb = async () => { - return (await clientPromise).db('crawler'); +export const getDb = async (dbName?: string) => { + return (await clientPromise).db(dbName || 'crawler'); }; export const getCollection = async (collection: string) => { const ins = await getDb(); diff --git a/src/link/index.ts b/src/link/index.ts index 536e11f..a7e61b9 100644 --- a/src/link/index.ts +++ b/src/link/index.ts @@ -4,7 +4,6 @@ import { downloadImage } from "../share/tools" import { getCollection } from '../lib/mongodb'; import { FetchType } from '..'; // 要抓取的网页 URL -const url = 'https://ai-bot.cn/favorites/ai-writing-tools/'; async function getPageData(url: string, name: string) { try { @@ -20,10 +19,13 @@ async function getPageData(url: string, name: string) { } } export async function queryListData(list: FetchType[]) { - const promiseList = list.map(item => fetchData(item.typeId)) - await Promise.all(promiseList) + const col = await getCollection('link'); + await col.deleteMany({}) + for (const item of list) { + await fetchData(item.typeId, item.url) + } } -export async function fetchData(typeName: string) { +export async function fetchData(typeName: string, url: string) { try { // 请求目标页面 const { data } = await axios.get(url); @@ -69,7 +71,10 @@ export async function fetchData(typeName: string) { toolsData.push(toolData); i++ - console.log(`进度:${i}/${length}`); + console.clear() + console.log(`正在爬取${typeName}类别的数据,共${length}条数据`); + + console.log(`${typeName}:进度:${i}/${length}`); } console.log(toolsData); diff --git a/src/mannual/migrate.ts b/src/mannual/migrate.ts new file mode 100644 index 0000000..45c0982 --- /dev/null +++ b/src/mannual/migrate.ts @@ -0,0 +1,12 @@ +import { getCollection, getDb } from "../lib/mongodb"; + +async function migrateLink() { + const botDb = await getDb('ai-bot'); + const botCol = botDb.collection('link'); + const col = await getCollection('link') + const links = await col.find().toArray(); + await botCol.deleteMany({}); + await botCol.insertMany(links); + console.log('Migrate link success'); +} +migrateLink(); \ No newline at end of file diff --git a/src/share/tools.ts b/src/share/tools.ts index 55a935d..5e08598 100644 --- a/src/share/tools.ts +++ b/src/share/tools.ts @@ -38,7 +38,7 @@ export async function downloadImage(url: string) { // 写入文件 await fs.writeFile(filePath, response.data); - return id; + return filename; } catch (error) { console.error('下载失败:', error); throw error;