This commit is contained in:
expdsn 2025-02-28 16:01:37 +08:00
parent 141bf45123
commit 3f17fdf673
5 changed files with 131 additions and 88 deletions

View File

@ -12,78 +12,78 @@ const fetchList = [
typeId: '6790aae23de33b392c0330b2', typeId: '6790aae23de33b392c0330b2',
url: 'https://ai-bot.cn/favorites/ai-writing-tools/' url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
}, },
// { {
// name: 'AI图像网站', name: 'AI图像网站',
// typeId: '67908fc33de33b392c0330af', typeId: '67908fc33de33b392c0330af',
// url: 'https://ai-bot.cn/favorites/best-ai-image-tools/' url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
// }, },
// { {
// name: 'AI视频网站', name: 'AI视频网站',
// typeId: '67b6f0b7b139d1d6aa14cd06', typeId: '67b6f0b7b139d1d6aa14cd06',
// url: 'https://ai-bot.cn/favorites/ai-video-tools/' url: 'https://ai-bot.cn/favorites/ai-video-tools/'
// }, },
// { {
// name: 'AI音频网站', name: 'AI音频网站',
// typeId: '6791a98fc058e55ed0a094ca', typeId: '6791a98fc058e55ed0a094ca',
// url: 'https://ai-bot.cn/favorites/ai-audio-tools/' url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
// }, },
// { {
// name: 'AI办公网站', name: 'AI办公网站',
// typeId: '6790ab4f3de33b392c0330b3', typeId: '6790ab4f3de33b392c0330b3',
// url: 'https://ai-bot.cn/favorites/ai-office-tools/', url: 'https://ai-bot.cn/favorites/ai-office-tools/',
// hasSubType: true hasSubType: true
// }, },
// { {
// name: 'AI搜索工具', name: 'AI搜索工具',
// typeId: '6790dc6b3de33b392c0330bb', typeId: '6790dc6b3de33b392c0330bb',
// url: 'https://ai-bot.cn/favorites/ai-search-engines/' url: 'https://ai-bot.cn/favorites/ai-search-engines/'
// }, },
// { {
// name: 'AI对话网站', name: 'AI对话网站',
// typeId: '6790c2f93de33b392c0330b6', typeId: '6790c2f93de33b392c0330b6',
// url: 'https://ai-bot.cn/favorites/ai-chatbots/' url: 'https://ai-bot.cn/favorites/ai-chatbots/'
// }, },
// { {
// name: 'AI内容检测', name: 'AI内容检测',
// typeId: '67b707c9b139d1d6aa14cd07', typeId: '67b707c9b139d1d6aa14cd07',
// url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/' url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
// }, },
// { {
// name: 'AI学习工具', name: 'AI学习工具',
// typeId: '67b7080fb139d1d6aa14cd08', typeId: '67b7080fb139d1d6aa14cd08',
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
// }, },
// { {
// name: 'AI开发平台', name: 'AI开发平台',
// typeId: '67b7eb3de0cf2993700b1186', typeId: '67b7eb3de0cf2993700b1186',
// url: 'https://ai-bot.cn/favorites/ai-frameworks/' url: 'https://ai-bot.cn/favorites/ai-frameworks/'
// }, },
// { {
// name: 'AI提示工具', name: 'AI提示工具',
// typeId: '67b7e9bce0cf2993700b1184', typeId: '67b7e9bce0cf2993700b1184',
// url: 'https://ai-bot.cn/favorites/ai-prompt-tools/' url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
// }, },
// { {
// name: 'AI法律助手', name: 'AI法律助手',
// typeId: '67b7eae0e0cf2993700b1185', typeId: '67b7eae0e0cf2993700b1185',
// url: 'https://ai-bot.cn/favorites/ai-legal-assistants/' url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
// }, },
// { {
// name: 'AI训练模型', name: 'AI训练模型',
// typeId: '67b7eb84e0cf2993700b1187', typeId: '67b7eb84e0cf2993700b1187',
// url: 'https://ai-bot.cn/favorites/ai-models/' url: 'https://ai-bot.cn/favorites/ai-models/'
// }, },
// { {
// name: 'AI设计工具', name: 'AI设计工具',
// typeId: '6790ab9d3de33b392c0330b4', typeId: '6790ab9d3de33b392c0330b4',
// url: 'https://ai-bot.cn/favorites/ai-design-tools/' url: 'https://ai-bot.cn/favorites/ai-design-tools/'
// }, },
// { {
// name: 'AI编程工具', name: 'AI编程工具',
// typeId: '6790dc2c3de33b392c0330ba', typeId: '6790dc2c3de33b392c0330ba',
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/' url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
// } }
] ]
function main() { function main() {

View File

@ -4,7 +4,13 @@ import { downloadImage } from "../share/tools"
import { getCollection } from '../lib/mongodb'; import { getCollection } from '../lib/mongodb';
import Turndown from 'turndown'; import Turndown from 'turndown';
import { FetchType } from '..'; import { FetchType } from '..';
import { v4 as uuid } from 'uuid';
// 要抓取的网页 URL // 要抓取的网页 URL
function removeQueryParams(url: string): string {
const urlObj = new URL(url);
urlObj.search = ''; // 清空查询参数
return urlObj.toString();
}
async function getPageData(url: string, name: string) { async function getPageData(url: string, name: string) {
try { try {
@ -12,7 +18,7 @@ async function getPageData(url: string, name: string) {
const { data } = await axios.get(url); const { data } = await axios.get(url);
const $ = cheerio.load(data); const $ = cheerio.load(data);
const element = $(`a[title="${name}"]`) const element = $(`a[title="${name}"]`)
const href = element.attr('href') as string const link = removeQueryParams(element.attr('href') as string)
const panelBodyHtml = $('.panel-body').html(); const panelBodyHtml = $('.panel-body').html();
// 2. 使用Turndown将HTML转换为Markdown // 2. 使用Turndown将HTML转换为Markdown
@ -30,12 +36,15 @@ async function getPageData(url: string, name: string) {
} }
}); });
// 执行转换 // 执行转换
const markdown = turndown.turndown(panelBodyHtml); const content = turndown.turndown(panelBodyHtml);
const cover = await downloadImage($('.img-cover').attr('data-src'))
const title = $('.site-name').text().trim() const title = $('.site-name').text().trim()
return { return {
href, link,
markdown,
title, title,
cover,
content,
_id: uuid()
} }
} catch (error) { } catch (error) {
@ -61,6 +70,7 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
// 提取工具卡片数据 // 提取工具卡片数据
const toolsData = [] as any[]; const toolsData = [] as any[];
const articleDataList = [] as any[];
const length = $('.url-card').length const length = $('.url-card').length
let i = 0 let i = 0
for (const element of $('.url-card')) { for (const element of $('.url-card')) {
@ -70,28 +80,24 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
const description = $(element).find('.url-info p').text().trim(); const description = $(element).find('.url-info p').text().trim();
const _id = $(element).attr('data-id'); const _id = $(element).attr('data-id');
const _originLink = $(element).find('img').attr('data-src'); const _originLink = $(element).find('img').attr('data-src');
let link = tempLink let link = removeQueryParams(tempLink)
const articleData = {} as any let articleData;
console.log(subTitle); console.log(subTitle);
if (tempLink.startsWith('https://ai-bot')) { if (tempLink.startsWith('https://ai-bot')) {
const pageData = await getPageData(tempLink, name) const pageData = await getPageData(tempLink, name)
if (pageData) { if (pageData) {
link = pageData.href link = pageData.link
articleData.markdown = pageData.markdown articleData = pageData
articleData.title = pageData.title
} }
console.log(pageData);
} }
// 假设工具的类别是固定的,比如 "AI写作工具" // 假设工具的类别是固定的,比如 "AI写作工具"
const type = typeId; const type = typeId;
const priority = 1; // 根据索引来定义优先级 const priority = 1; // 根据索引来定义优先级
const addTime = Date.now() / 1000; const addTime = Math.floor(Date.now() / 1000);
const logoLink = await downloadImage(_originLink) const logoLink = await downloadImage(_originLink)
// console.log(logoLink); // console.log(logoLink);
// const logoLink = '' // const logoLink = ''
@ -105,8 +111,17 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
priority, priority,
logoLink, logoLink,
addTime, addTime,
articleId: articleData?._id ? articleData._id : undefined,
subLinkType: hasSubType ? [subTitle] : undefined subLinkType: hasSubType ? [subTitle] : undefined
}; };
if (articleData) {
articleDataList.push({
...articleData,
addTime: new Date().getTime() / 1000,
priority: 0
})
}
if (hasSubType) { if (hasSubType) {
if (toolsData.findIndex(val => val.name === name) !== -1) { if (toolsData.findIndex(val => val.name === name) !== -1) {
console.log('发现相同的name:' + name); console.log('发现相同的name:' + name);
@ -128,9 +143,12 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
} }
console.log(toolsData); console.log(toolsData);
console.log(articleDataList);
const col = await getCollection('link'); const col = await getCollection('link');
col.insertMany(toolsData); await col.insertMany(toolsData);
const articleCol = await getCollection('article');
await articleCol.insertMany(articleDataList)
console.log('数据插入成功'); console.log('数据插入成功');
} catch (error) { } catch (error) {
console.error('Error fetching data:', error); console.error('Error fetching data:', error);

View File

@ -3,10 +3,32 @@ import { getCollection, getDb } from "../lib/mongodb";
async function migrateLink() { async function migrateLink() {
const botDb = await getDb('ai-bot'); const botDb = await getDb('ai-bot');
const botCol = botDb.collection('link'); const botCol = botDb.collection('link');
const botArticleCol = botDb.collection('link-article');
const col = await getCollection('link') const col = await getCollection('link')
const links = await col.find().toArray(); const links = await col.find().toArray();
const articleCol = await getCollection('article')
await botCol.deleteMany({}); await botCol.deleteMany({});
await botCol.insertMany(links); await botCol.insertMany(links);
await botArticleCol.deleteMany({});
await botArticleCol.insertMany(await articleCol.find().toArray());
console.log('Migrate link success'); console.log('Migrate link success');
} }
migrateLink(); async function move() {
const botDb = await getDb('ai-bot');
const linkCol = botDb.collection('link');
const articleCol = botDb.collection('link-article');
const links = await linkCol.find().toArray();
const articles = await articleCol.find().toArray();
articles.forEach(async (article) => {
const link = await linkCol.findOne({ articleId: article._id + '' });
if (link) {
console.log(link);
await articleCol.updateOne({ _id: article._id }, { $set: { description: link.description } });
}
});
}
// migrateLink();
move()

0
src/mannual/move.ts Normal file
View File

View File

@ -21,6 +21,9 @@ export function askQuestion(query: string) {
export async function downloadImage(url: string) { export async function downloadImage(url: string) {
try { try {
// 获取图片响应 // 获取图片响应
if (!url.startsWith('https:')) {
url = 'https:' + url;
}
const response = await axios.get(url, { const response = await axios.get(url, {
responseType: 'arraybuffer', responseType: 'arraybuffer',
headers: { headers: {
@ -29,7 +32,7 @@ export async function downloadImage(url: string) {
}); });
// 获取文件扩展名 // 获取文件扩展名
const ext = response.headers['content-type']?.split('/')[1] || 'jpg'; const ext = response.headers['content-type']?.split('/')[1] || 'jpg';
const id = uuid() const id = uuid()
// 生成最终路径 // 生成最终路径