This commit is contained in:
expdsn 2025-02-24 19:06:27 +08:00
parent 1d50519a96
commit e8e249de35
2 changed files with 85 additions and 10 deletions

View File

@ -3,8 +3,10 @@ export type FetchType = {
typeId: string;
url: string;
name?: string;
hasSubType?: boolean;
}
const fetchList = [
{
name: 'AI写作工具',
typeId: '6790aae23de33b392c0330b2',
@ -25,6 +27,63 @@ const fetchList = [
typeId: '6791a98fc058e55ed0a094ca',
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
},
{
name: 'AI办公网站',
typeId: '6790ab4f3de33b392c0330b3',
url: 'https://ai-bot.cn/favorites/ai-office-tools/',
hasSubType: true
},
{
name: 'AI搜索工具',
typeId: '6790dc6b3de33b392c0330bb',
url: 'https://ai-bot.cn/favorites/ai-search-engines/'
},
{
name: 'AI对话网站',
typeId: '6790c2f93de33b392c0330b6',
url: 'https://ai-bot.cn/favorites/ai-chatbots/'
},
{
name: 'AI内容检测',
typeId: '67b707c9b139d1d6aa14cd07',
url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
},
{
name: 'AI学习工具',
typeId: '67b7080fb139d1d6aa14cd08',
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
},
{
name: 'AI开发平台',
typeId: '67b7eb3de0cf2993700b1186',
url: 'https://ai-bot.cn/favorites/ai-frameworks/'
},
{
name: 'AI提示工具',
typeId: '67b7e9bce0cf2993700b1184',
url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
},
{
name: 'AI法律助手',
typeId: '67b7eae0e0cf2993700b1185',
url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
},
{
name: 'AI训练模型',
typeId: '67b7eb84e0cf2993700b1187',
url: 'https://ai-bot.cn/favorites/ai-models/'
},
{
name: 'AI设计工具',
typeId: '6790ab9d3de33b392c0330b4',
url: 'https://ai-bot.cn/favorites/ai-design-tools/'
},
{
name: 'AI编程工具',
typeId: '6790dc2c3de33b392c0330ba',
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
}
]
function main() {

View File

@ -22,10 +22,10 @@ export async function queryListData(list: FetchType[]) {
const col = await getCollection('link');
await col.deleteMany({})
for (const item of list) {
await fetchData(item.typeId, item.url)
await fetchData(item)
}
}
export async function fetchData(typeName: string, url: string) {
export async function fetchData({ typeId, name: typeName, url, hasSubType = false }: FetchType) {
try {
// 请求目标页面
const { data } = await axios.get(url);
@ -35,25 +35,29 @@ export async function fetchData(typeName: string, url: string) {
const $ = cheerio.load(data);
// 提取工具卡片数据
const toolsData = [] as any;
const toolsData = [] as any[];
const length = $('.url-card').length
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
let i = 0
for (const element of $('.url-card')) {
const subTitle = $(element).parent().prev().find('h4').text().trim();
const name = $(element).find('.url-info strong').text().trim();
const tempLink = $(element).find('a').attr('href') as string;
const description = $(element).find('.url-info p').text().trim();
const _id = $(element).attr('data-id');
const _originLink = $(element).find('img').attr('data-src');
let link = tempLink
if (tempLink.startsWith('https://ai-bot.cn')) {
console.log(subTitle);
if (tempLink.startsWith('https://ai-bot')) {
link = await getPageData(tempLink, name) || ''
}
// 假设工具的类别是固定的,比如 "AI写作工具"
const type = typeName;
const type = typeId;
const priority = 1; // 根据索引来定义优先级
const addTime = Date.now();
const addTime = Date.now() / 1000;
const logoLink = await downloadImage(_originLink)
// console.log(logoLink);
// const logoLink = ''
@ -67,13 +71,25 @@ export async function fetchData(typeName: string, url: string) {
priority,
logoLink,
addTime,
subLinkType: hasSubType ? [subTitle] : undefined
};
if (hasSubType) {
if (toolsData.findIndex(val => val.name === name) !== -1) {
console.log('发现相同的name:' + name);
toolsData[toolsData.findIndex(val => val.name === name)].subLinkType.push(subTitle)
} else {
toolsData.push(toolData);
}
} else {
toolsData.push(toolData);
}
toolsData.push(toolData);
i++
console.clear()
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
console.log(`正在爬取${typeName + ',' + subTitle || ''}类别的数据,共${length}条数据`);
console.log(`${typeName}:进度:${i}/${length}`);
}