save
This commit is contained in:
parent
e8e249de35
commit
141bf45123
|
@ -17,6 +17,7 @@
|
|||
"g": "^2.0.1",
|
||||
"mongodb": "^6.13.0",
|
||||
"puppeteer": "^24.2.1",
|
||||
"turndown": "^7.2.0",
|
||||
"uuid": "^11.0.5"
|
||||
},
|
||||
"pnpm": {
|
||||
|
@ -26,6 +27,7 @@
|
|||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.13.4",
|
||||
"@types/turndown": "^5.0.5",
|
||||
"@types/uuid": "^10.0.0",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.7.3"
|
||||
|
|
|
@ -23,6 +23,9 @@ importers:
|
|||
puppeteer:
|
||||
specifier: ^24.2.1
|
||||
version: 24.2.1(typescript@5.7.3)
|
||||
turndown:
|
||||
specifier: ^7.2.0
|
||||
version: 7.2.0
|
||||
uuid:
|
||||
specifier: ^11.0.5
|
||||
version: 11.0.5
|
||||
|
@ -30,6 +33,9 @@ importers:
|
|||
'@types/node':
|
||||
specifier: ^22.13.4
|
||||
version: 22.13.4
|
||||
'@types/turndown':
|
||||
specifier: ^5.0.5
|
||||
version: 5.0.5
|
||||
'@types/uuid':
|
||||
specifier: ^10.0.0
|
||||
version: 10.0.0
|
||||
|
@ -64,6 +70,9 @@ packages:
|
|||
'@jridgewell/trace-mapping@0.3.9':
|
||||
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
|
||||
|
||||
'@mixmark-io/domino@2.2.0':
|
||||
resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==}
|
||||
|
||||
'@mongodb-js/saslprep@1.2.0':
|
||||
resolution: {integrity: sha512-+ywrb0AqkfaYuhHs6LxKWgqbh3I72EpEgESCw37o+9qPx9WTCkgDm2B+eMrwehGtHBWHFU4GXvnSCNiFhhausg==}
|
||||
|
||||
|
@ -90,6 +99,9 @@ packages:
|
|||
'@types/node@22.13.4':
|
||||
resolution: {integrity: sha512-ywP2X0DYtX3y08eFVx5fNIw7/uIv8hYUKgXoK8oayJlLnKcRfEYCxWMVE1XagUdVtCJlZT1AU4LXEABW+L1Peg==}
|
||||
|
||||
'@types/turndown@5.0.5':
|
||||
resolution: {integrity: sha512-TL2IgGgc7B5j78rIccBtlYAnkuv8nUQqhQc+DSYV5j9Be9XOcm/SKOVRuA47xAVI3680Tk9B1d8flK2GWT2+4w==}
|
||||
|
||||
'@types/uuid@10.0.0':
|
||||
resolution: {integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==}
|
||||
|
||||
|
@ -655,6 +667,9 @@ packages:
|
|||
tslib@2.8.1:
|
||||
resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
|
||||
|
||||
turndown@7.2.0:
|
||||
resolution: {integrity: sha512-eCZGBN4nNNqM9Owkv9HAtWRYfLA4h909E/WGAWWBpmB275ehNhZyk87/Tpvjbp0jjNl9XwCsbe6bm6CqFsgD+A==}
|
||||
|
||||
typed-query-selector@2.12.0:
|
||||
resolution: {integrity: sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==}
|
||||
|
||||
|
@ -757,6 +772,8 @@ snapshots:
|
|||
'@jridgewell/resolve-uri': 3.1.2
|
||||
'@jridgewell/sourcemap-codec': 1.5.0
|
||||
|
||||
'@mixmark-io/domino@2.2.0': {}
|
||||
|
||||
'@mongodb-js/saslprep@1.2.0':
|
||||
dependencies:
|
||||
sparse-bitfield: 3.0.3
|
||||
|
@ -788,6 +805,8 @@ snapshots:
|
|||
dependencies:
|
||||
undici-types: 6.20.0
|
||||
|
||||
'@types/turndown@5.0.5': {}
|
||||
|
||||
'@types/uuid@10.0.0': {}
|
||||
|
||||
'@types/webidl-conversions@7.0.3': {}
|
||||
|
@ -1392,6 +1411,10 @@ snapshots:
|
|||
|
||||
tslib@2.8.1: {}
|
||||
|
||||
turndown@7.2.0:
|
||||
dependencies:
|
||||
'@mixmark-io/domino': 2.2.0
|
||||
|
||||
typed-query-selector@2.12.0: {}
|
||||
|
||||
typescript@5.7.3: {}
|
||||
|
|
142
src/index.ts
142
src/index.ts
|
@ -12,78 +12,78 @@ const fetchList = [
|
|||
typeId: '6790aae23de33b392c0330b2',
|
||||
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI图像网站',
|
||||
typeId: '67908fc33de33b392c0330af',
|
||||
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI视频网站',
|
||||
typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI音频网站',
|
||||
typeId: '6791a98fc058e55ed0a094ca',
|
||||
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI办公网站',
|
||||
typeId: '6790ab4f3de33b392c0330b3',
|
||||
url: 'https://ai-bot.cn/favorites/ai-office-tools/',
|
||||
hasSubType: true
|
||||
},
|
||||
{
|
||||
name: 'AI搜索工具',
|
||||
typeId: '6790dc6b3de33b392c0330bb',
|
||||
url: 'https://ai-bot.cn/favorites/ai-search-engines/'
|
||||
},
|
||||
{
|
||||
name: 'AI对话网站',
|
||||
typeId: '6790c2f93de33b392c0330b6',
|
||||
url: 'https://ai-bot.cn/favorites/ai-chatbots/'
|
||||
},
|
||||
{
|
||||
name: 'AI内容检测',
|
||||
typeId: '67b707c9b139d1d6aa14cd07',
|
||||
url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI学习工具',
|
||||
typeId: '67b7080fb139d1d6aa14cd08',
|
||||
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
},
|
||||
{
|
||||
name: 'AI开发平台',
|
||||
typeId: '67b7eb3de0cf2993700b1186',
|
||||
url: 'https://ai-bot.cn/favorites/ai-frameworks/'
|
||||
},
|
||||
{
|
||||
name: 'AI提示工具',
|
||||
typeId: '67b7e9bce0cf2993700b1184',
|
||||
url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI法律助手',
|
||||
typeId: '67b7eae0e0cf2993700b1185',
|
||||
url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
|
||||
},
|
||||
{
|
||||
name: 'AI训练模型',
|
||||
typeId: '67b7eb84e0cf2993700b1187',
|
||||
url: 'https://ai-bot.cn/favorites/ai-models/'
|
||||
},
|
||||
{
|
||||
name: 'AI设计工具',
|
||||
typeId: '6790ab9d3de33b392c0330b4',
|
||||
url: 'https://ai-bot.cn/favorites/ai-design-tools/'
|
||||
},
|
||||
// {
|
||||
// name: 'AI图像网站',
|
||||
// typeId: '67908fc33de33b392c0330af',
|
||||
// url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI视频网站',
|
||||
// typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI音频网站',
|
||||
// typeId: '6791a98fc058e55ed0a094ca',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI办公网站',
|
||||
// typeId: '6790ab4f3de33b392c0330b3',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-office-tools/',
|
||||
// hasSubType: true
|
||||
// },
|
||||
// {
|
||||
// name: 'AI搜索工具',
|
||||
// typeId: '6790dc6b3de33b392c0330bb',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-search-engines/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI对话网站',
|
||||
// typeId: '6790c2f93de33b392c0330b6',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-chatbots/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI内容检测',
|
||||
// typeId: '67b707c9b139d1d6aa14cd07',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-content-detection-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI学习工具',
|
||||
// typeId: '67b7080fb139d1d6aa14cd08',
|
||||
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI开发平台',
|
||||
// typeId: '67b7eb3de0cf2993700b1186',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-frameworks/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI提示工具',
|
||||
// typeId: '67b7e9bce0cf2993700b1184',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-prompt-tools/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI法律助手',
|
||||
// typeId: '67b7eae0e0cf2993700b1185',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-legal-assistants/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI训练模型',
|
||||
// typeId: '67b7eb84e0cf2993700b1187',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-models/'
|
||||
// },
|
||||
// {
|
||||
// name: 'AI设计工具',
|
||||
// typeId: '6790ab9d3de33b392c0330b4',
|
||||
// url: 'https://ai-bot.cn/favorites/ai-design-tools/'
|
||||
// },
|
||||
|
||||
{
|
||||
name: 'AI编程工具',
|
||||
typeId: '6790dc2c3de33b392c0330ba',
|
||||
url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
}
|
||||
// {
|
||||
// name: 'AI编程工具',
|
||||
// typeId: '6790dc2c3de33b392c0330ba',
|
||||
// url: 'https://ai-bot.cn/favorites/websites-to-learn-ai/'
|
||||
// }
|
||||
|
||||
]
|
||||
function main() {
|
||||
|
|
|
@ -2,6 +2,7 @@ import axios from 'axios';
|
|||
const cheerio = require('cheerio')
|
||||
import { downloadImage } from "../share/tools"
|
||||
import { getCollection } from '../lib/mongodb';
|
||||
import Turndown from 'turndown';
|
||||
import { FetchType } from '..';
|
||||
// 要抓取的网页 URL
|
||||
|
||||
|
@ -11,8 +12,32 @@ async function getPageData(url: string, name: string) {
|
|||
const { data } = await axios.get(url);
|
||||
const $ = cheerio.load(data);
|
||||
const element = $(`a[title="${name}"]`)
|
||||
return element.attr('href')
|
||||
const href = element.attr('href') as string
|
||||
const panelBodyHtml = $('.panel-body').html();
|
||||
|
||||
// 2. 使用Turndown将HTML转换为Markdown
|
||||
const turndown = new Turndown({
|
||||
codeBlockStyle: 'fenced', // 代码块用```包裹
|
||||
headingStyle: 'atx' // 标题用#符号
|
||||
});
|
||||
|
||||
// 添加自定义规则(可选)
|
||||
turndown.addRule('preCodeBlock', {
|
||||
filter: ['pre'],
|
||||
replacement: (content) => {
|
||||
// 保留pre标签内的原始格式(如代码块)
|
||||
return '\n```\n' + content + '\n```\n';
|
||||
}
|
||||
});
|
||||
// 执行转换
|
||||
const markdown = turndown.turndown(panelBodyHtml);
|
||||
const title = $('.site-name').text().trim()
|
||||
return {
|
||||
href,
|
||||
markdown,
|
||||
title,
|
||||
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
|
||||
|
@ -47,11 +72,20 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
|||
const _originLink = $(element).find('img').attr('data-src');
|
||||
let link = tempLink
|
||||
|
||||
|
||||
const articleData = {} as any
|
||||
console.log(subTitle);
|
||||
|
||||
if (tempLink.startsWith('https://ai-bot')) {
|
||||
link = await getPageData(tempLink, name) || ''
|
||||
const pageData = await getPageData(tempLink, name)
|
||||
if (pageData) {
|
||||
link = pageData.href
|
||||
articleData.markdown = pageData.markdown
|
||||
articleData.title = pageData.title
|
||||
|
||||
|
||||
}
|
||||
console.log(pageData);
|
||||
|
||||
|
||||
}
|
||||
// 假设工具的类别是固定的,比如 "AI写作工具"
|
||||
|
@ -87,7 +121,7 @@ export async function fetchData({ typeId, name: typeName, url, hasSubType = fals
|
|||
}
|
||||
|
||||
i++
|
||||
console.clear()
|
||||
// console.clear()
|
||||
console.log(`正在爬取${typeName + ',' + subTitle || ''}类别的数据,共${length}条数据`);
|
||||
|
||||
console.log(`${typeName}:进度:${i}/${length}`);
|
||||
|
|
Loading…
Reference in New Issue