diff --git a/src/index.ts b/src/index.ts index 454939e..a249e6f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,78 +12,78 @@ const fetchList = [ typeId: '6790aae23de33b392c0330b2', url: 'https://ai-bot.cn/favorites/ai-writing-tools/' }, - // { - // name: 'AI图像网站', - // typeId: '67908fc33de33b392c0330af', - // url: 'https://ai-bot.cn/favorites/best-ai-image-tools/' - // }, - // { - // name: 'AI视频网站', - // typeId: '67b6f0b7b139d1d6aa14cd06', - // url: 'https://ai-bot.cn/favorites/ai-video-tools/' - // }, - // { - // name: 'AI音频网站', - // typeId: '6791a98fc058e55ed0a094ca', - // url: 'https://ai-bot.cn/favorites/ai-audio-tools/' - // }, - // { - // name: 'AI办公网站', - // typeId: '6790ab4f3de33b392c0330b3', - // url: 'https://ai-bot.cn/favorites/ai-office-tools/', - // hasSubType: true - // }, - // { - // name: 'AI搜索工具', - // typeId: '6790dc6b3de33b392c0330bb', - // url: 'https://ai-bot.cn/favorites/ai-search-engines/' - // }, - // { - // name: 'AI对话网站', - // typeId: '6790c2f93de33b392c0330b6', - // url: 'https://ai-bot.cn/favorites/ai-chatbots/' - // }, - // { - // name: 'AI内容检测', - // typeId: '67b707c9b139d1d6aa14cd07', - // url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/' - // }, - // { - // name: 'AI学习工具', - // typeId: '67b7080fb139d1d6aa14cd08', - // url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' - // }, - // { - // name: 'AI开发平台', - // typeId: '67b7eb3de0cf2993700b1186', - // url: 'https://ai-bot.cn/favorites/ai-frameworks/' - // }, - // { - // name: 'AI提示工具', - // typeId: '67b7e9bce0cf2993700b1184', - // url: 'https://ai-bot.cn/favorites/ai-prompt-tools/' - // }, - // { - // name: 'AI法律助手', - // typeId: '67b7eae0e0cf2993700b1185', - // url: 'https://ai-bot.cn/favorites/ai-legal-assistants/' - // }, - // { - // name: 'AI训练模型', - // typeId: '67b7eb84e0cf2993700b1187', - // url: 'https://ai-bot.cn/favorites/ai-models/' - // }, - // { - // name: 'AI设计工具', - // typeId: '6790ab9d3de33b392c0330b4', - // url: 'https://ai-bot.cn/favorites/ai-design-tools/' - // }, + { + name: 'AI图像网站', + typeId: '67908fc33de33b392c0330af', + url: 'https://ai-bot.cn/favorites/best-ai-image-tools/' + }, + { + name: 'AI视频网站', + typeId: '67b6f0b7b139d1d6aa14cd06', + url: 'https://ai-bot.cn/favorites/ai-video-tools/' + }, + { + name: 'AI音频网站', + typeId: '6791a98fc058e55ed0a094ca', + url: 'https://ai-bot.cn/favorites/ai-audio-tools/' + }, + { + name: 'AI办公网站', + typeId: '6790ab4f3de33b392c0330b3', + url: 'https://ai-bot.cn/favorites/ai-office-tools/', + hasSubType: true + }, + { + name: 'AI搜索工具', + typeId: '6790dc6b3de33b392c0330bb', + url: 'https://ai-bot.cn/favorites/ai-search-engines/' + }, + { + name: 'AI对话网站', + typeId: '6790c2f93de33b392c0330b6', + url: 'https://ai-bot.cn/favorites/ai-chatbots/' + }, + { + name: 'AI内容检测', + typeId: '67b707c9b139d1d6aa14cd07', + url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/' + }, + { + name: 'AI学习工具', + typeId: '67b7080fb139d1d6aa14cd08', + url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' + }, + { + name: 'AI开发平台', + typeId: '67b7eb3de0cf2993700b1186', + url: 'https://ai-bot.cn/favorites/ai-frameworks/' + }, + { + name: 'AI提示工具', + typeId: '67b7e9bce0cf2993700b1184', + url: 'https://ai-bot.cn/favorites/ai-prompt-tools/' + }, + { + name: 'AI法律助手', + typeId: '67b7eae0e0cf2993700b1185', + url: 'https://ai-bot.cn/favorites/ai-legal-assistants/' + }, + { + name: 'AI训练模型', + typeId: '67b7eb84e0cf2993700b1187', + url: 'https://ai-bot.cn/favorites/ai-models/' + }, + { + name: 'AI设计工具', + typeId: '6790ab9d3de33b392c0330b4', + url: 'https://ai-bot.cn/favorites/ai-design-tools/' + }, - // { - // name: 'AI编程工具', - // typeId: '6790dc2c3de33b392c0330ba', - // url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' - // } + { + name: 'AI编程工具', + typeId: '6790dc2c3de33b392c0330ba', + url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' + } ] function main() { diff --git a/src/link/index.ts b/src/link/index.ts index 43a2ecb..8da8ba3 100644 --- a/src/link/index.ts +++ b/src/link/index.ts @@ -4,7 +4,13 @@ import { downloadImage } from "../share/tools" import { getCollection } from '../lib/mongodb'; import Turndown from 'turndown'; import { FetchType } from '..'; +import { v4 as uuid } from 'uuid'; // 要抓取的网页 URL +function removeQueryParams(url: string): string { + const urlObj = new URL(url); + urlObj.search = ''; // 清空查询参数 + return urlObj.toString(); +} async function getPageData(url: string, name: string) { try { @@ -12,7 +18,7 @@ async function getPageData(url: string, name: string) { const { data } = await axios.get(url); const $ = cheerio.load(data); const element = $(`a[title="${name}"]`) - const href = element.attr('href') as string + const link = removeQueryParams(element.attr('href') as string) const panelBodyHtml = $('.panel-body').html(); // 2. 使用Turndown将HTML转换为Markdown @@ -30,12 +36,15 @@ async function getPageData(url: string, name: string) { } }); // 执行转换 - const markdown = turndown.turndown(panelBodyHtml); + const content = turndown.turndown(panelBodyHtml); + const cover = await downloadImage($('.img-cover').attr('data-src')) const title = $('.site-name').text().trim() return { - href, - markdown, + link, title, + cover, + content, + _id: uuid() } } catch (error) { @@ -61,6 +70,7 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals // 提取工具卡片数据 const toolsData = [] as any[]; + const articleDataList = [] as any[]; const length = $('.url-card').length let i = 0 for (const element of $('.url-card')) { @@ -70,28 +80,24 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals const description = $(element).find('.url-info p').text().trim(); const _id = $(element).attr('data-id'); const _originLink = $(element).find('img').attr('data-src'); - let link = tempLink + let link = removeQueryParams(tempLink) - const articleData = {} as any + let articleData; console.log(subTitle); if (tempLink.startsWith('https://ai-bot')) { const pageData = await getPageData(tempLink, name) if (pageData) { - link = pageData.href - articleData.markdown = pageData.markdown - articleData.title = pageData.title - - + link = pageData.link + articleData = pageData } - console.log(pageData); - + } // 假设工具的类别是固定的,比如 "AI写作工具" const type = typeId; const priority = 1; // 根据索引来定义优先级 - const addTime = Date.now() / 1000; + const addTime = Math.floor(Date.now() / 1000); const logoLink = await downloadImage(_originLink) // console.log(logoLink); // const logoLink = '' @@ -105,8 +111,17 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals priority, logoLink, addTime, + articleId: articleData?._id ? articleData._id : undefined, subLinkType: hasSubType ? [subTitle] : undefined }; + if (articleData) { + articleDataList.push({ + ...articleData, + addTime: new Date().getTime() / 1000, + priority: 0 + + }) + } if (hasSubType) { if (toolsData.findIndex(val => val.name === name) !== -1) { console.log('发现相同的name:' + name); @@ -128,9 +143,12 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals } console.log(toolsData); + console.log(articleDataList); const col = await getCollection('link'); - col.insertMany(toolsData); + await col.insertMany(toolsData); + const articleCol = await getCollection('article'); + await articleCol.insertMany(articleDataList) console.log('数据插入成功'); } catch (error) { console.error('Error fetching data:', error); diff --git a/src/mannual/migrate.ts b/src/mannual/migrate.ts index 45c0982..0385777 100644 --- a/src/mannual/migrate.ts +++ b/src/mannual/migrate.ts @@ -3,10 +3,32 @@ import { getCollection, getDb } from "../lib/mongodb"; async function migrateLink() { const botDb = await getDb('ai-bot'); const botCol = botDb.collection('link'); + const botArticleCol = botDb.collection('link-article'); + const col = await getCollection('link') const links = await col.find().toArray(); + const articleCol = await getCollection('article') await botCol.deleteMany({}); await botCol.insertMany(links); + await botArticleCol.deleteMany({}); + await botArticleCol.insertMany(await articleCol.find().toArray()); console.log('Migrate link success'); } -migrateLink(); \ No newline at end of file +async function move() { + const botDb = await getDb('ai-bot'); + const linkCol = botDb.collection('link'); + const articleCol = botDb.collection('link-article'); + const links = await linkCol.find().toArray(); + const articles = await articleCol.find().toArray(); + articles.forEach(async (article) => { + const link = await linkCol.findOne({ articleId: article._id + '' }); + if (link) { + console.log(link); + + await articleCol.updateOne({ _id: article._id }, { $set: { description: link.description } }); + } + }); + +} +// migrateLink(); +move() \ No newline at end of file diff --git a/src/mannual/move.ts b/src/mannual/move.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/share/tools.ts b/src/share/tools.ts index 5e08598..55d89b4 100644 --- a/src/share/tools.ts +++ b/src/share/tools.ts @@ -21,6 +21,9 @@ export function askQuestion(query: string) { export async function downloadImage(url: string) { try { // 获取图片响应 + if (!url.startsWith('https:')) { + url = 'https:' + url; + } const response = await axios.get(url, { responseType: 'arraybuffer', headers: { @@ -29,7 +32,7 @@ export async function downloadImage(url: string) { }); // 获取文件扩展名 - + const ext = response.headers['content-type']?.split('/')[1] || 'jpg'; const id = uuid() // 生成最终路径