save
This commit is contained in:
parent
141bf45123
commit
3f17fdf673
142
src/index.ts
142
src/index.ts
|
@ -12,78 +12,78 @@ const fetchList = [
|
||||||
typeId: '6790aae23de33b392c0330b2',
|
typeId: '6790aae23de33b392c0330b2',
|
||||||
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
||||||
},
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI图像网站',
|
name: 'AI图像网站',
|
||||||
// typeId: '67908fc33de33b392c0330af',
|
typeId: '67908fc33de33b392c0330af',
|
||||||
// url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI视频网站',
|
name: 'AI视频网站',
|
||||||
// typeId: '67b6f0b7b139d1d6aa14cd06',
|
typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI音频网站',
|
name: 'AI音频网站',
|
||||||
// typeId: '6791a98fc058e55ed0a094ca',
|
typeId: '6791a98fc058e55ed0a094ca',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI办公网站',
|
name: 'AI办公网站',
|
||||||
// typeId: '6790ab4f3de33b392c0330b3',
|
typeId: '6790ab4f3de33b392c0330b3',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-office-tools/',
|
url: 'https://ai-bot.cn/favorites/ai-office-tools/',
|
||||||
// hasSubType: true
|
hasSubType: true
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI搜索工具',
|
name: 'AI搜索工具',
|
||||||
// typeId: '6790dc6b3de33b392c0330bb',
|
typeId: '6790dc6b3de33b392c0330bb',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-search-engines/'
|
url: 'https://ai-bot.cn/favorites/ai-search-engines/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI对话网站',
|
name: 'AI对话网站',
|
||||||
// typeId: '6790c2f93de33b392c0330b6',
|
typeId: '6790c2f93de33b392c0330b6',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-chatbots/'
|
url: 'https://ai-bot.cn/favorites/ai-chatbots/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI内容检测',
|
name: 'AI内容检测',
|
||||||
// typeId: '67b707c9b139d1d6aa14cd07',
|
typeId: '67b707c9b139d1d6aa14cd07',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI学习工具',
|
name: 'AI学习工具',
|
||||||
// typeId: '67b7080fb139d1d6aa14cd08',
|
typeId: '67b7080fb139d1d6aa14cd08',
|
||||||
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI开发平台',
|
name: 'AI开发平台',
|
||||||
// typeId: '67b7eb3de0cf2993700b1186',
|
typeId: '67b7eb3de0cf2993700b1186',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-frameworks/'
|
url: 'https://ai-bot.cn/favorites/ai-frameworks/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI提示工具',
|
name: 'AI提示工具',
|
||||||
// typeId: '67b7e9bce0cf2993700b1184',
|
typeId: '67b7e9bce0cf2993700b1184',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI法律助手',
|
name: 'AI法律助手',
|
||||||
// typeId: '67b7eae0e0cf2993700b1185',
|
typeId: '67b7eae0e0cf2993700b1185',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
|
url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI训练模型',
|
name: 'AI训练模型',
|
||||||
// typeId: '67b7eb84e0cf2993700b1187',
|
typeId: '67b7eb84e0cf2993700b1187',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-models/'
|
url: 'https://ai-bot.cn/favorites/ai-models/'
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// name: 'AI设计工具',
|
name: 'AI设计工具',
|
||||||
// typeId: '6790ab9d3de33b392c0330b4',
|
typeId: '6790ab9d3de33b392c0330b4',
|
||||||
// url: 'https://ai-bot.cn/favorites/ai-design-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-design-tools/'
|
||||||
// },
|
},
|
||||||
|
|
||||||
// {
|
{
|
||||||
// name: 'AI编程工具',
|
name: 'AI编程工具',
|
||||||
// typeId: '6790dc2c3de33b392c0330ba',
|
typeId: '6790dc2c3de33b392c0330ba',
|
||||||
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||||
// }
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
function main() {
|
function main() {
|
||||||
|
|
|
@ -4,7 +4,13 @@ import { downloadImage } from "../share/tools"
|
||||||
import { getCollection } from '../lib/mongodb';
|
import { getCollection } from '../lib/mongodb';
|
||||||
import Turndown from 'turndown';
|
import Turndown from 'turndown';
|
||||||
import { FetchType } from '..';
|
import { FetchType } from '..';
|
||||||
|
import { v4 as uuid } from 'uuid';
|
||||||
// 要抓取的网页 URL
|
// 要抓取的网页 URL
|
||||||
|
function removeQueryParams(url: string): string {
|
||||||
|
const urlObj = new URL(url);
|
||||||
|
urlObj.search = ''; // 清空查询参数
|
||||||
|
return urlObj.toString();
|
||||||
|
}
|
||||||
|
|
||||||
async function getPageData(url: string, name: string) {
|
async function getPageData(url: string, name: string) {
|
||||||
try {
|
try {
|
||||||
|
@ -12,7 +18,7 @@ async function getPageData(url: string, name: string) {
|
||||||
const { data } = await axios.get(url);
|
const { data } = await axios.get(url);
|
||||||
const $ = cheerio.load(data);
|
const $ = cheerio.load(data);
|
||||||
const element = $(`a[title="${name}"]`)
|
const element = $(`a[title="${name}"]`)
|
||||||
const href = element.attr('href') as string
|
const link = removeQueryParams(element.attr('href') as string)
|
||||||
const panelBodyHtml = $('.panel-body').html();
|
const panelBodyHtml = $('.panel-body').html();
|
||||||
|
|
||||||
// 2. 使用Turndown将HTML转换为Markdown
|
// 2. 使用Turndown将HTML转换为Markdown
|
||||||
|
@ -30,12 +36,15 @@ async function getPageData(url: string, name: string) {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// 执行转换
|
// 执行转换
|
||||||
const markdown = turndown.turndown(panelBodyHtml);
|
const content = turndown.turndown(panelBodyHtml);
|
||||||
|
const cover = await downloadImage($('.img-cover').attr('data-src'))
|
||||||
const title = $('.site-name').text().trim()
|
const title = $('.site-name').text().trim()
|
||||||
return {
|
return {
|
||||||
href,
|
link,
|
||||||
markdown,
|
|
||||||
title,
|
title,
|
||||||
|
cover,
|
||||||
|
content,
|
||||||
|
_id: uuid()
|
||||||
|
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -61,6 +70,7 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
||||||
|
|
||||||
// 提取工具卡片数据
|
// 提取工具卡片数据
|
||||||
const toolsData = [] as any[];
|
const toolsData = [] as any[];
|
||||||
|
const articleDataList = [] as any[];
|
||||||
const length = $('.url-card').length
|
const length = $('.url-card').length
|
||||||
let i = 0
|
let i = 0
|
||||||
for (const element of $('.url-card')) {
|
for (const element of $('.url-card')) {
|
||||||
|
@ -70,28 +80,24 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
||||||
const description = $(element).find('.url-info p').text().trim();
|
const description = $(element).find('.url-info p').text().trim();
|
||||||
const _id = $(element).attr('data-id');
|
const _id = $(element).attr('data-id');
|
||||||
const _originLink = $(element).find('img').attr('data-src');
|
const _originLink = $(element).find('img').attr('data-src');
|
||||||
let link = tempLink
|
let link = removeQueryParams(tempLink)
|
||||||
|
|
||||||
const articleData = {} as any
|
let articleData;
|
||||||
console.log(subTitle);
|
console.log(subTitle);
|
||||||
|
|
||||||
if (tempLink.startsWith('https://ai-bot')) {
|
if (tempLink.startsWith('https://ai-bot')) {
|
||||||
const pageData = await getPageData(tempLink, name)
|
const pageData = await getPageData(tempLink, name)
|
||||||
if (pageData) {
|
if (pageData) {
|
||||||
link = pageData.href
|
link = pageData.link
|
||||||
articleData.markdown = pageData.markdown
|
articleData = pageData
|
||||||
articleData.title = pageData.title
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
console.log(pageData);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
// 假设工具的类别是固定的,比如 "AI写作工具"
|
// 假设工具的类别是固定的,比如 "AI写作工具"
|
||||||
const type = typeId;
|
const type = typeId;
|
||||||
const priority = 1; // 根据索引来定义优先级
|
const priority = 1; // 根据索引来定义优先级
|
||||||
const addTime = Date.now() / 1000;
|
const addTime = Math.floor(Date.now() / 1000);
|
||||||
const logoLink = await downloadImage(_originLink)
|
const logoLink = await downloadImage(_originLink)
|
||||||
// console.log(logoLink);
|
// console.log(logoLink);
|
||||||
// const logoLink = ''
|
// const logoLink = ''
|
||||||
|
@ -105,8 +111,17 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
||||||
priority,
|
priority,
|
||||||
logoLink,
|
logoLink,
|
||||||
addTime,
|
addTime,
|
||||||
|
articleId: articleData?._id ? articleData._id : undefined,
|
||||||
subLinkType: hasSubType ? [subTitle] : undefined
|
subLinkType: hasSubType ? [subTitle] : undefined
|
||||||
};
|
};
|
||||||
|
if (articleData) {
|
||||||
|
articleDataList.push({
|
||||||
|
...articleData,
|
||||||
|
addTime: new Date().getTime() / 1000,
|
||||||
|
priority: 0
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
if (hasSubType) {
|
if (hasSubType) {
|
||||||
if (toolsData.findIndex(val => val.name === name) !== -1) {
|
if (toolsData.findIndex(val => val.name === name) !== -1) {
|
||||||
console.log('发现相同的name:' + name);
|
console.log('发现相同的name:' + name);
|
||||||
|
@ -128,9 +143,12 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
||||||
|
|
||||||
}
|
}
|
||||||
console.log(toolsData);
|
console.log(toolsData);
|
||||||
|
console.log(articleDataList);
|
||||||
|
|
||||||
const col = await getCollection('link');
|
const col = await getCollection('link');
|
||||||
col.insertMany(toolsData);
|
await col.insertMany(toolsData);
|
||||||
|
const articleCol = await getCollection('article');
|
||||||
|
await articleCol.insertMany(articleDataList)
|
||||||
console.log('数据插入成功');
|
console.log('数据插入成功');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching data:', error);
|
console.error('Error fetching data:', error);
|
||||||
|
|
|
@ -3,10 +3,32 @@ import { getCollection, getDb } from "../lib/mongodb";
|
||||||
async function migrateLink() {
|
async function migrateLink() {
|
||||||
const botDb = await getDb('ai-bot');
|
const botDb = await getDb('ai-bot');
|
||||||
const botCol = botDb.collection('link');
|
const botCol = botDb.collection('link');
|
||||||
|
const botArticleCol = botDb.collection('link-article');
|
||||||
|
|
||||||
const col = await getCollection('link')
|
const col = await getCollection('link')
|
||||||
const links = await col.find().toArray();
|
const links = await col.find().toArray();
|
||||||
|
const articleCol = await getCollection('article')
|
||||||
await botCol.deleteMany({});
|
await botCol.deleteMany({});
|
||||||
await botCol.insertMany(links);
|
await botCol.insertMany(links);
|
||||||
|
await botArticleCol.deleteMany({});
|
||||||
|
await botArticleCol.insertMany(await articleCol.find().toArray());
|
||||||
console.log('Migrate link success');
|
console.log('Migrate link success');
|
||||||
}
|
}
|
||||||
migrateLink();
|
async function move() {
|
||||||
|
const botDb = await getDb('ai-bot');
|
||||||
|
const linkCol = botDb.collection('link');
|
||||||
|
const articleCol = botDb.collection('link-article');
|
||||||
|
const links = await linkCol.find().toArray();
|
||||||
|
const articles = await articleCol.find().toArray();
|
||||||
|
articles.forEach(async (article) => {
|
||||||
|
const link = await linkCol.findOne({ articleId: article._id + '' });
|
||||||
|
if (link) {
|
||||||
|
console.log(link);
|
||||||
|
|
||||||
|
await articleCol.updateOne({ _id: article._id }, { $set: { description: link.description } });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
// migrateLink();
|
||||||
|
move()
|
|
@ -21,6 +21,9 @@ export function askQuestion(query: string) {
|
||||||
export async function downloadImage(url: string) {
|
export async function downloadImage(url: string) {
|
||||||
try {
|
try {
|
||||||
// 获取图片响应
|
// 获取图片响应
|
||||||
|
if (!url.startsWith('https:')) {
|
||||||
|
url = 'https:' + url;
|
||||||
|
}
|
||||||
const response = await axios.get(url, {
|
const response = await axios.get(url, {
|
||||||
responseType: 'arraybuffer',
|
responseType: 'arraybuffer',
|
||||||
headers: {
|
headers: {
|
||||||
|
@ -29,7 +32,7 @@ export async function downloadImage(url: string) {
|
||||||
});
|
});
|
||||||
|
|
||||||
// 获取文件扩展名
|
// 获取文件扩展名
|
||||||
|
|
||||||
const ext = response.headers['content-type']?.split('/')[1] || 'jpg';
|
const ext = response.headers['content-type']?.split('/')[1] || 'jpg';
|
||||||
const id = uuid()
|
const id = uuid()
|
||||||
// 生成最终路径
|
// 生成最终路径
|
||||||
|
|
Loading…
Reference in New Issue