This commit is contained in:
expdsn 2025-02-24 19:06:27 +08:00
parent 1d50519a96
commit e8e249de35
2 changed files with 85 additions and 10 deletions

View File

@ -3,8 +3,10 @@ export type FetchType = {
typeId: string; typeId: string;
url: string; url: string;
name?: string; name?: string;
hasSubType?: boolean;
} }
const fetchList = [ const fetchList = [
{ {
name: 'AI写作工具', name: 'AI写作工具',
typeId: '6790aae23de33b392c0330b2', typeId: '6790aae23de33b392c0330b2',
@ -25,6 +27,63 @@ const fetchList = [
typeId: '6791a98fc058e55ed0a094ca', typeId: '6791a98fc058e55ed0a094ca',
url: 'https://ai-bot.cn/favorites/ai-audio-tools/' url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
}, },
{
name: 'AI办公网站',
typeId: '6790ab4f3de33b392c0330b3',
url: 'https://ai-bot.cn/favorites/ai-office-tools/',
hasSubType: true
},
{
name: 'AI搜索工具',
typeId: '6790dc6b3de33b392c0330bb',
url: 'https://ai-bot.cn/favorites/ai-search-engines/'
},
{
name: 'AI对话网站',
typeId: '6790c2f93de33b392c0330b6',
url: 'https://ai-bot.cn/favorites/ai-chatbots/'
},
{
name: 'AI内容检测',
typeId: '67b707c9b139d1d6aa14cd07',
url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
},
{
name: 'AI学习工具',
typeId: '67b7080fb139d1d6aa14cd08',
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
},
{
name: 'AI开发平台',
typeId: '67b7eb3de0cf2993700b1186',
url: 'https://ai-bot.cn/favorites/ai-frameworks/'
},
{
name: 'AI提示工具',
typeId: '67b7e9bce0cf2993700b1184',
url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
},
{
name: 'AI法律助手',
typeId: '67b7eae0e0cf2993700b1185',
url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
},
{
name: 'AI训练模型',
typeId: '67b7eb84e0cf2993700b1187',
url: 'https://ai-bot.cn/favorites/ai-models/'
},
{
name: 'AI设计工具',
typeId: '6790ab9d3de33b392c0330b4',
url: 'https://ai-bot.cn/favorites/ai-design-tools/'
},
{
name: 'AI编程工具',
typeId: '6790dc2c3de33b392c0330ba',
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
}
] ]
function main() { function main() {

View File

@ -22,10 +22,10 @@ export async function queryListData(list: FetchType[]) {
const col = await getCollection('link'); const col = await getCollection('link');
await col.deleteMany({}) await col.deleteMany({})
for (const item of list) { for (const item of list) {
await fetchData(item.typeId, item.url) await fetchData(item)
} }
} }
export async function fetchData(typeName: string, url: string) { export async function fetchData({ typeId, name: typeName, url, hasSubType = false }: FetchType) {
try { try {
// 请求目标页面 // 请求目标页面
const { data } = await axios.get(url); const { data } = await axios.get(url);
@ -35,25 +35,29 @@ export async function fetchData(typeName: string, url: string) {
const $ = cheerio.load(data); const $ = cheerio.load(data);
// 提取工具卡片数据 // 提取工具卡片数据
const toolsData = [] as any; const toolsData = [] as any[];
const length = $('.url-card').length const length = $('.url-card').length
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
let i = 0 let i = 0
for (const element of $('.url-card')) { for (const element of $('.url-card')) {
const subTitle = $(element).parent().prev().find('h4').text().trim();
const name = $(element).find('.url-info strong').text().trim(); const name = $(element).find('.url-info strong').text().trim();
const tempLink = $(element).find('a').attr('href') as string; const tempLink = $(element).find('a').attr('href') as string;
const description = $(element).find('.url-info p').text().trim(); const description = $(element).find('.url-info p').text().trim();
const _id = $(element).attr('data-id'); const _id = $(element).attr('data-id');
const _originLink = $(element).find('img').attr('data-src'); const _originLink = $(element).find('img').attr('data-src');
let link = tempLink let link = tempLink
if (tempLink.startsWith('https://ai-bot.cn')) {
console.log(subTitle);
if (tempLink.startsWith('https://ai-bot')) {
link = await getPageData(tempLink, name) || '' link = await getPageData(tempLink, name) || ''
} }
// 假设工具的类别是固定的,比如 "AI写作工具" // 假设工具的类别是固定的,比如 "AI写作工具"
const type = typeName; const type = typeId;
const priority = 1; // 根据索引来定义优先级 const priority = 1; // 根据索引来定义优先级
const addTime = Date.now(); const addTime = Date.now() / 1000;
const logoLink = await downloadImage(_originLink) const logoLink = await downloadImage(_originLink)
// console.log(logoLink); // console.log(logoLink);
// const logoLink = '' // const logoLink = ''
@ -67,13 +71,25 @@ export async function fetchData(typeName: string, url: string) {
priority, priority,
logoLink, logoLink,
addTime, addTime,
subLinkType: hasSubType ? [subTitle] : undefined
}; };
if (hasSubType) {
if (toolsData.findIndex(val => val.name === name) !== -1) {
console.log('发现相同的name:' + name);
toolsData[toolsData.findIndex(val => val.name === name)].subLinkType.push(subTitle)
} else {
toolsData.push(toolData);
}
} else {
toolsData.push(toolData);
}
toolsData.push(toolData);
i++ i++
console.clear() console.clear()
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`); console.log(`正在爬取${typeName + ',' + subTitle || ''}类别的数据,共${length}条数据`);
console.log(`${typeName}:进度:${i}/${length}`); console.log(`${typeName}:进度:${i}/${length}`);
} }