save
This commit is contained in:
parent
141bf45123
commit
3f17fdf673
142
src/index.ts
142
src/index.ts
|
@ -12,78 +12,78 @@ const fetchList = [
|
|||
typeId: '6790aae23de33b392c0330b2',
|
||||
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
||||
},
|
||||
// {
|
||||
// name: 'AI图像网站',
|
||||
// typeId: '67908fc33de33b392c0330af',
|
||||
// url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI视频网站',
|
||||
// typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI音频网站',
|
||||
// typeId: '6791a98fc058e55ed0a094ca',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI办公网站',
|
||||
// typeId: '6790ab4f3de33b392c0330b3',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-office-tools/',
|
||||
// hasSubType: true
|
||||
// },
|
||||
// {
|
||||
// name: 'AI搜索工具',
|
||||
// typeId: '6790dc6b3de33b392c0330bb',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-search-engines/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI对话网站',
|
||||
// typeId: '6790c2f93de33b392c0330b6',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-chatbots/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI内容检测',
|
||||
// typeId: '67b707c9b139d1d6aa14cd07',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI学习工具',
|
||||
// typeId: '67b7080fb139d1d6aa14cd08',
|
||||
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI开发平台',
|
||||
// typeId: '67b7eb3de0cf2993700b1186',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-frameworks/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI提示工具',
|
||||
// typeId: '67b7e9bce0cf2993700b1184',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI法律助手',
|
||||
// typeId: '67b7eae0e0cf2993700b1185',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI训练模型',
|
||||
// typeId: '67b7eb84e0cf2993700b1187',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-models/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI设计工具',
|
||||
// typeId: '6790ab9d3de33b392c0330b4',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-design-tools/'
|
||||
// },
|
||||
{
|
||||
name: 'AI图像网站',
|
||||
typeId: '67908fc33de33b392c0330af',
|
||||
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI视频网站',
|
||||
typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI音频网站',
|
||||
typeId: '6791a98fc058e55ed0a094ca',
|
||||
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI办公网站',
|
||||
typeId: '6790ab4f3de33b392c0330b3',
|
||||
url: 'https://ai-bot.cn/favorites/ai-office-tools/',
|
||||
hasSubType: true
|
||||
},
|
||||
{
|
||||
name: 'AI搜索工具',
|
||||
typeId: '6790dc6b3de33b392c0330bb',
|
||||
url: 'https://ai-bot.cn/favorites/ai-search-engines/'
|
||||
},
|
||||
{
|
||||
name: 'AI对话网站',
|
||||
typeId: '6790c2f93de33b392c0330b6',
|
||||
url: 'https://ai-bot.cn/favorites/ai-chatbots/'
|
||||
},
|
||||
{
|
||||
name: 'AI内容检测',
|
||||
typeId: '67b707c9b139d1d6aa14cd07',
|
||||
url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI学习工具',
|
||||
typeId: '67b7080fb139d1d6aa14cd08',
|
||||
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
},
|
||||
{
|
||||
name: 'AI开发平台',
|
||||
typeId: '67b7eb3de0cf2993700b1186',
|
||||
url: 'https://ai-bot.cn/favorites/ai-frameworks/'
|
||||
},
|
||||
{
|
||||
name: 'AI提示工具',
|
||||
typeId: '67b7e9bce0cf2993700b1184',
|
||||
url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI法律助手',
|
||||
typeId: '67b7eae0e0cf2993700b1185',
|
||||
url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
|
||||
},
|
||||
{
|
||||
name: 'AI训练模型',
|
||||
typeId: '67b7eb84e0cf2993700b1187',
|
||||
url: 'https://ai-bot.cn/favorites/ai-models/'
|
||||
},
|
||||
{
|
||||
name: 'AI设计工具',
|
||||
typeId: '6790ab9d3de33b392c0330b4',
|
||||
url: 'https://ai-bot.cn/favorites/ai-design-tools/'
|
||||
},
|
||||
|
||||
// {
|
||||
// name: 'AI编程工具',
|
||||
// typeId: '6790dc2c3de33b392c0330ba',
|
||||
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
// }
|
||||
{
|
||||
name: 'AI编程工具',
|
||||
typeId: '6790dc2c3de33b392c0330ba',
|
||||
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
}
|
||||
|
||||
]
|
||||
function main() {
|
||||
|
|
|
@ -4,7 +4,13 @@ import { downloadImage } from "../share/tools"
|
|||
import { getCollection } from '../lib/mongodb';
|
||||
import Turndown from 'turndown';
|
||||
import { FetchType } from '..';
|
||||
import { v4 as uuid } from 'uuid';
|
||||
// 要抓取的网页 URL
|
||||
function removeQueryParams(url: string): string {
|
||||
const urlObj = new URL(url);
|
||||
urlObj.search = ''; // 清空查询参数
|
||||
return urlObj.toString();
|
||||
}
|
||||
|
||||
async function getPageData(url: string, name: string) {
|
||||
try {
|
||||
|
@ -12,7 +18,7 @@ async function getPageData(url: string, name: string) {
|
|||
const { data } = await axios.get(url);
|
||||
const $ = cheerio.load(data);
|
||||
const element = $(`a[title="${name}"]`)
|
||||
const href = element.attr('href') as string
|
||||
const link = removeQueryParams(element.attr('href') as string)
|
||||
const panelBodyHtml = $('.panel-body').html();
|
||||
|
||||
// 2. 使用Turndown将HTML转换为Markdown
|
||||
|
@ -30,12 +36,15 @@ async function getPageData(url: string, name: string) {
|
|||
}
|
||||
});
|
||||
// 执行转换
|
||||
const markdown = turndown.turndown(panelBodyHtml);
|
||||
const content = turndown.turndown(panelBodyHtml);
|
||||
const cover = await downloadImage($('.img-cover').attr('data-src'))
|
||||
const title = $('.site-name').text().trim()
|
||||
return {
|
||||
href,
|
||||
markdown,
|
||||
link,
|
||||
title,
|
||||
cover,
|
||||
content,
|
||||
_id: uuid()
|
||||
|
||||
}
|
||||
} catch (error) {
|
||||
|
@ -61,6 +70,7 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
|||
|
||||
// 提取工具卡片数据
|
||||
const toolsData = [] as any[];
|
||||
const articleDataList = [] as any[];
|
||||
const length = $('.url-card').length
|
||||
let i = 0
|
||||
for (const element of $('.url-card')) {
|
||||
|
@ -70,28 +80,24 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
|||
const description = $(element).find('.url-info p').text().trim();
|
||||
const _id = $(element).attr('data-id');
|
||||
const _originLink = $(element).find('img').attr('data-src');
|
||||
let link = tempLink
|
||||
let link = removeQueryParams(tempLink)
|
||||
|
||||
const articleData = {} as any
|
||||
let articleData;
|
||||
console.log(subTitle);
|
||||
|
||||
if (tempLink.startsWith('https://ai-bot')) {
|
||||
const pageData = await getPageData(tempLink, name)
|
||||
if (pageData) {
|
||||
link = pageData.href
|
||||
articleData.markdown = pageData.markdown
|
||||
articleData.title = pageData.title
|
||||
|
||||
|
||||
link = pageData.link
|
||||
articleData = pageData
|
||||
}
|
||||
console.log(pageData);
|
||||
|
||||
|
||||
|
||||
}
|
||||
// 假设工具的类别是固定的,比如 "AI写作工具"
|
||||
const type = typeId;
|
||||
const priority = 1; // 根据索引来定义优先级
|
||||
const addTime = Date.now() / 1000;
|
||||
const addTime = Math.floor(Date.now() / 1000);
|
||||
const logoLink = await downloadImage(_originLink)
|
||||
// console.log(logoLink);
|
||||
// const logoLink = ''
|
||||
|
@ -105,8 +111,17 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
|||
priority,
|
||||
logoLink,
|
||||
addTime,
|
||||
articleId: articleData?._id ? articleData._id : undefined,
|
||||
subLinkType: hasSubType ? [subTitle] : undefined
|
||||
};
|
||||
if (articleData) {
|
||||
articleDataList.push({
|
||||
...articleData,
|
||||
addTime: new Date().getTime() / 1000,
|
||||
priority: 0
|
||||
|
||||
})
|
||||
}
|
||||
if (hasSubType) {
|
||||
if (toolsData.findIndex(val => val.name === name) !== -1) {
|
||||
console.log('发现相同的name:' + name);
|
||||
|
@ -128,9 +143,12 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
|||
|
||||
}
|
||||
console.log(toolsData);
|
||||
console.log(articleDataList);
|
||||
|
||||
const col = await getCollection('link');
|
||||
col.insertMany(toolsData);
|
||||
await col.insertMany(toolsData);
|
||||
const articleCol = await getCollection('article');
|
||||
await articleCol.insertMany(articleDataList)
|
||||
console.log('数据插入成功');
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
|
|
|
@ -3,10 +3,32 @@ import { getCollection, getDb } from "../lib/mongodb";
|
|||
async function migrateLink() {
|
||||
const botDb = await getDb('ai-bot');
|
||||
const botCol = botDb.collection('link');
|
||||
const botArticleCol = botDb.collection('link-article');
|
||||
|
||||
const col = await getCollection('link')
|
||||
const links = await col.find().toArray();
|
||||
const articleCol = await getCollection('article')
|
||||
await botCol.deleteMany({});
|
||||
await botCol.insertMany(links);
|
||||
await botArticleCol.deleteMany({});
|
||||
await botArticleCol.insertMany(await articleCol.find().toArray());
|
||||
console.log('Migrate link success');
|
||||
}
|
||||
migrateLink();
|
||||
async function move() {
|
||||
const botDb = await getDb('ai-bot');
|
||||
const linkCol = botDb.collection('link');
|
||||
const articleCol = botDb.collection('link-article');
|
||||
const links = await linkCol.find().toArray();
|
||||
const articles = await articleCol.find().toArray();
|
||||
articles.forEach(async (article) => {
|
||||
const link = await linkCol.findOne({ articleId: article._id + '' });
|
||||
if (link) {
|
||||
console.log(link);
|
||||
|
||||
await articleCol.updateOne({ _id: article._id }, { $set: { description: link.description } });
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
// migrateLink();
|
||||
move()
|
|
@ -21,6 +21,9 @@ export function askQuestion(query: string) {
|
|||
export async function downloadImage(url: string) {
|
||||
try {
|
||||
// 获取图片响应
|
||||
if (!url.startsWith('https:')) {
|
||||
url = 'https:' + url;
|
||||
}
|
||||
const response = await axios.get(url, {
|
||||
responseType: 'arraybuffer',
|
||||
headers: {
|
||||
|
@ -29,7 +32,7 @@ export async function downloadImage(url: string) {
|
|||
});
|
||||
|
||||
// 获取文件扩展名
|
||||
|
||||
|
||||
const ext = response.headers['content-type']?.split('/')[1] || 'jpg';
|
||||
const id = uuid()
|
||||
// 生成最终路径
|
||||
|
|
Loading…
Reference in New Issue