From e8e249de353f9f52b82ddfd2807d66c8247f7e79 Mon Sep 17 00:00:00 2001 From: expdsn <18111002318@163.com> Date: Mon, 24 Feb 2025 19:06:27 +0800 Subject: [PATCH] save --- src/index.ts | 59 +++++++++++++++++++++++++++++++++++++++++++++++ src/link/index.ts | 36 +++++++++++++++++++++-------- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/src/index.ts b/src/index.ts index ac25705..a249e6f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,8 +3,10 @@ export type FetchType = { typeId: string; url: string; name?: string; + hasSubType?: boolean; } const fetchList = [ + { name: 'AI写作工具', typeId: '6790aae23de33b392c0330b2', @@ -25,6 +27,63 @@ const fetchList = [ typeId: '6791a98fc058e55ed0a094ca', url: 'https://ai-bot.cn/favorites/ai-audio-tools/' }, + { + name: 'AI办公网站', + typeId: '6790ab4f3de33b392c0330b3', + url: 'https://ai-bot.cn/favorites/ai-office-tools/', + hasSubType: true + }, + { + name: 'AI搜索工具', + typeId: '6790dc6b3de33b392c0330bb', + url: 'https://ai-bot.cn/favorites/ai-search-engines/' + }, + { + name: 'AI对话网站', + typeId: '6790c2f93de33b392c0330b6', + url: 'https://ai-bot.cn/favorites/ai-chatbots/' + }, + { + name: 'AI内容检测', + typeId: '67b707c9b139d1d6aa14cd07', + url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/' + }, + { + name: 'AI学习工具', + typeId: '67b7080fb139d1d6aa14cd08', + url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' + }, + { + name: 'AI开发平台', + typeId: '67b7eb3de0cf2993700b1186', + url: 'https://ai-bot.cn/favorites/ai-frameworks/' + }, + { + name: 'AI提示工具', + typeId: '67b7e9bce0cf2993700b1184', + url: 'https://ai-bot.cn/favorites/ai-prompt-tools/' + }, + { + name: 'AI法律助手', + typeId: '67b7eae0e0cf2993700b1185', + url: 'https://ai-bot.cn/favorites/ai-legal-assistants/' + }, + { + name: 'AI训练模型', + typeId: '67b7eb84e0cf2993700b1187', + url: 'https://ai-bot.cn/favorites/ai-models/' + }, + { + name: 'AI设计工具', + typeId: '6790ab9d3de33b392c0330b4', + url: 'https://ai-bot.cn/favorites/ai-design-tools/' + }, + + { + name: 'AI编程工具', + typeId: '6790dc2c3de33b392c0330ba', + url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' + } ] function main() { diff --git a/src/link/index.ts b/src/link/index.ts index a7e61b9..169c545 100644 --- a/src/link/index.ts +++ b/src/link/index.ts @@ -22,10 +22,10 @@ export async function queryListData(list: FetchType[]) { const col = await getCollection('link'); await col.deleteMany({}) for (const item of list) { - await fetchData(item.typeId, item.url) + await fetchData(item) } } -export async function fetchData(typeName: string, url: string) { +export async function fetchData({ typeId, name: typeName, url, hasSubType = false }: FetchType) { try { // 请求目标页面 const { data } = await axios.get(url); @@ -35,25 +35,29 @@ export async function fetchData(typeName: string, url: string) { const $ = cheerio.load(data); // 提取工具卡片数据 - const toolsData = [] as any; + const toolsData = [] as any[]; const length = $('.url-card').length - console.log(`正在爬取${typeName}类别的数据,共${length}条数据`); let i = 0 for (const element of $('.url-card')) { + const subTitle = $(element).parent().prev().find('h4').text().trim(); const name = $(element).find('.url-info strong').text().trim(); const tempLink = $(element).find('a').attr('href') as string; const description = $(element).find('.url-info p').text().trim(); const _id = $(element).attr('data-id'); const _originLink = $(element).find('img').attr('data-src'); let link = tempLink - if (tempLink.startsWith('https://ai-bot.cn')) { + + + console.log(subTitle); + + if (tempLink.startsWith('https://ai-bot')) { link = await getPageData(tempLink, name) || '' } // 假设工具的类别是固定的,比如 "AI写作工具" - const type = typeName; + const type = typeId; const priority = 1; // 根据索引来定义优先级 - const addTime = Date.now(); + const addTime = Date.now() / 1000; const logoLink = await downloadImage(_originLink) // console.log(logoLink); // const logoLink = '' @@ -67,13 +71,25 @@ export async function fetchData(typeName: string, url: string) { priority, logoLink, addTime, + subLinkType: hasSubType ? [subTitle] : undefined }; + if (hasSubType) { + if (toolsData.findIndex(val => val.name === name) !== -1) { + console.log('发现相同的name:' + name); + + toolsData[toolsData.findIndex(val => val.name === name)].subLinkType.push(subTitle) + } else { + toolsData.push(toolData); + + } + } else { + toolsData.push(toolData); + } - toolsData.push(toolData); i++ console.clear() - console.log(`正在爬取${typeName}类别的数据,共${length}条数据`); - + console.log(`正在爬取${typeName + ',' + subTitle || ''}类别的数据,共${length}条数据`); + console.log(`${typeName}:进度:${i}/${length}`); }