This commit is contained in:
expdsn 2025-02-28 16:01:37 +08:00
parent 141bf45123
commit 3f17fdf673
5 changed files with 131 additions and 88 deletions

View File

@ -12,78 +12,78 @@ const fetchList = [
typeId: '6790aae23de33b392c0330b2',
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
},
// {
// name: 'AI图像网站',
// typeId: '67908fc33de33b392c0330af',
// url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
// },
// {
// name: 'AI视频网站',
// typeId: '67b6f0b7b139d1d6aa14cd06',
// url: 'https://ai-bot.cn/favorites/ai-video-tools/'
// },
// {
// name: 'AI音频网站',
// typeId: '6791a98fc058e55ed0a094ca',
// url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
// },
// {
// name: 'AI办公网站',
// typeId: '6790ab4f3de33b392c0330b3',
// url: 'https://ai-bot.cn/favorites/ai-office-tools/',
// hasSubType: true
// },
// {
// name: 'AI搜索工具',
// typeId: '6790dc6b3de33b392c0330bb',
// url: 'https://ai-bot.cn/favorites/ai-search-engines/'
// },
// {
// name: 'AI对话网站',
// typeId: '6790c2f93de33b392c0330b6',
// url: 'https://ai-bot.cn/favorites/ai-chatbots/'
// },
// {
// name: 'AI内容检测',
// typeId: '67b707c9b139d1d6aa14cd07',
// url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
// },
// {
// name: 'AI学习工具',
// typeId: '67b7080fb139d1d6aa14cd08',
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
// },
// {
// name: 'AI开发平台',
// typeId: '67b7eb3de0cf2993700b1186',
// url: 'https://ai-bot.cn/favorites/ai-frameworks/'
// },
// {
// name: 'AI提示工具',
// typeId: '67b7e9bce0cf2993700b1184',
// url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
// },
// {
// name: 'AI法律助手',
// typeId: '67b7eae0e0cf2993700b1185',
// url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
// },
// {
// name: 'AI训练模型',
// typeId: '67b7eb84e0cf2993700b1187',
// url: 'https://ai-bot.cn/favorites/ai-models/'
// },
// {
// name: 'AI设计工具',
// typeId: '6790ab9d3de33b392c0330b4',
// url: 'https://ai-bot.cn/favorites/ai-design-tools/'
// },
{
name: 'AI图像网站',
typeId: '67908fc33de33b392c0330af',
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
},
{
name: 'AI视频网站',
typeId: '67b6f0b7b139d1d6aa14cd06',
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
},
{
name: 'AI音频网站',
typeId: '6791a98fc058e55ed0a094ca',
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
},
{
name: 'AI办公网站',
typeId: '6790ab4f3de33b392c0330b3',
url: 'https://ai-bot.cn/favorites/ai-office-tools/',
hasSubType: true
},
{
name: 'AI搜索工具',
typeId: '6790dc6b3de33b392c0330bb',
url: 'https://ai-bot.cn/favorites/ai-search-engines/'
},
{
name: 'AI对话网站',
typeId: '6790c2f93de33b392c0330b6',
url: 'https://ai-bot.cn/favorites/ai-chatbots/'
},
{
name: 'AI内容检测',
typeId: '67b707c9b139d1d6aa14cd07',
url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
},
{
name: 'AI学习工具',
typeId: '67b7080fb139d1d6aa14cd08',
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
},
{
name: 'AI开发平台',
typeId: '67b7eb3de0cf2993700b1186',
url: 'https://ai-bot.cn/favorites/ai-frameworks/'
},
{
name: 'AI提示工具',
typeId: '67b7e9bce0cf2993700b1184',
url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
},
{
name: 'AI法律助手',
typeId: '67b7eae0e0cf2993700b1185',
url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
},
{
name: 'AI训练模型',
typeId: '67b7eb84e0cf2993700b1187',
url: 'https://ai-bot.cn/favorites/ai-models/'
},
{
name: 'AI设计工具',
typeId: '6790ab9d3de33b392c0330b4',
url: 'https://ai-bot.cn/favorites/ai-design-tools/'
},
// {
// name: 'AI编程工具',
// typeId: '6790dc2c3de33b392c0330ba',
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
// }
{
name: 'AI编程工具',
typeId: '6790dc2c3de33b392c0330ba',
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
}
]
function main() {

View File

@ -4,7 +4,13 @@ import { downloadImage } from "../share/tools"
import { getCollection } from '../lib/mongodb';
import Turndown from 'turndown';
import { FetchType } from '..';
import { v4 as uuid } from 'uuid';
// 要抓取的网页 URL
function removeQueryParams(url: string): string {
const urlObj = new URL(url);
urlObj.search = ''; // 清空查询参数
return urlObj.toString();
}
async function getPageData(url: string, name: string) {
try {
@ -12,7 +18,7 @@ async function getPageData(url: string, name: string) {
const { data } = await axios.get(url);
const $ = cheerio.load(data);
const element = $(`a[title="${name}"]`)
const href = element.attr('href') as string
const link = removeQueryParams(element.attr('href') as string)
const panelBodyHtml = $('.panel-body').html();
// 2. 使用Turndown将HTML转换为Markdown
@ -30,12 +36,15 @@ async function getPageData(url: string, name: string) {
}
});
// 执行转换
const markdown = turndown.turndown(panelBodyHtml);
const content = turndown.turndown(panelBodyHtml);
const cover = await downloadImage($('.img-cover').attr('data-src'))
const title = $('.site-name').text().trim()
return {
href,
markdown,
link,
title,
cover,
content,
_id: uuid()
}
} catch (error) {
@ -61,6 +70,7 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
// 提取工具卡片数据
const toolsData = [] as any[];
const articleDataList = [] as any[];
const length = $('.url-card').length
let i = 0
for (const element of $('.url-card')) {
@ -70,28 +80,24 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
const description = $(element).find('.url-info p').text().trim();
const _id = $(element).attr('data-id');
const _originLink = $(element).find('img').attr('data-src');
let link = tempLink
let link = removeQueryParams(tempLink)
const articleData = {} as any
let articleData;
console.log(subTitle);
if (tempLink.startsWith('https://ai-bot')) {
const pageData = await getPageData(tempLink, name)
if (pageData) {
link = pageData.href
articleData.markdown = pageData.markdown
articleData.title = pageData.title
link = pageData.link
articleData = pageData
}
console.log(pageData);
}
// 假设工具的类别是固定的,比如 "AI写作工具"
const type = typeId;
const priority = 1; // 根据索引来定义优先级
const addTime = Date.now() / 1000;
const addTime = Math.floor(Date.now() / 1000);
const logoLink = await downloadImage(_originLink)
// console.log(logoLink);
// const logoLink = ''
@ -105,8 +111,17 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
priority,
logoLink,
addTime,
articleId: articleData?._id ? articleData._id : undefined,
subLinkType: hasSubType ? [subTitle] : undefined
};
if (articleData) {
articleDataList.push({
...articleData,
addTime: new Date().getTime() / 1000,
priority: 0
})
}
if (hasSubType) {
if (toolsData.findIndex(val => val.name === name) !== -1) {
console.log('发现相同的name:' + name);
@ -128,9 +143,12 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
}
console.log(toolsData);
console.log(articleDataList);
const col = await getCollection('link');
col.insertMany(toolsData);
await col.insertMany(toolsData);
const articleCol = await getCollection('article');
await articleCol.insertMany(articleDataList)
console.log('数据插入成功');
} catch (error) {
console.error('Error fetching data:', error);

View File

@ -3,10 +3,32 @@ import { getCollection, getDb } from "../lib/mongodb";
async function migrateLink() {
const botDb = await getDb('ai-bot');
const botCol = botDb.collection('link');
const botArticleCol = botDb.collection('link-article');
const col = await getCollection('link')
const links = await col.find().toArray();
const articleCol = await getCollection('article')
await botCol.deleteMany({});
await botCol.insertMany(links);
await botArticleCol.deleteMany({});
await botArticleCol.insertMany(await articleCol.find().toArray());
console.log('Migrate link success');
}
migrateLink();
async function move() {
const botDb = await getDb('ai-bot');
const linkCol = botDb.collection('link');
const articleCol = botDb.collection('link-article');
const links = await linkCol.find().toArray();
const articles = await articleCol.find().toArray();
articles.forEach(async (article) => {
const link = await linkCol.findOne({ articleId: article._id + '' });
if (link) {
console.log(link);
await articleCol.updateOne({ _id: article._id }, { $set: { description: link.description } });
}
});
}
// migrateLink();
move()

0
src/mannual/move.ts Normal file
View File

View File

@ -21,6 +21,9 @@ export function askQuestion(query: string) {
export async function downloadImage(url: string) {
try {
// 获取图片响应
if (!url.startsWith('https:')) {
url = 'https:' + url;
}
const response = await axios.get(url, {
responseType: 'arraybuffer',
headers: {
@ -29,7 +32,7 @@ export async function downloadImage(url: string) {
});
// 获取文件扩展名
const ext = response.headers['content-type']?.split('/')[1] || 'jpg';
const id = uuid()
// 生成最终路径