This commit is contained in:
expdsn 2025-02-24 10:24:56 +08:00
parent dd55ad1490
commit 1d50519a96
6 changed files with 42 additions and 12 deletions

View File

@ -4,6 +4,7 @@
"main": "index.js",
"scripts": {
"dev": "npx ts-node src/index.ts",
"migrate": "npx ts-node src/mannual/migrate.ts",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],

View File

@ -7,13 +7,25 @@ export type FetchType = {
const fetchList = [
{
name: 'AI写作工具',
typeId: '67908fc33de33b392c0330af',
typeId: '6790aae23de33b392c0330b2',
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
},
{
name: '',
typeId: ''
}
name: 'AI图像网站',
typeId: '67908fc33de33b392c0330af',
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
},
{
name: 'AI视频网站',
typeId: '67b6f0b7b139d1d6aa14cd06',
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
},
{
name: 'AI音频网站',
typeId: '6791a98fc058e55ed0a094ca',
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
},
]
function main() {
console.log("Hello, this is the main function!");

View File

@ -23,8 +23,8 @@ if (process.env.NODE_ENV === 'development') {
clientPromise = client.connect();
}
export const getDb = async () => {
return (await clientPromise).db('crawler');
export const getDb = async (dbName?: string) => {
return (await clientPromise).db(dbName || 'crawler');
};
export const getCollection = async (collection: string) => {
const ins = await getDb();

View File

@ -4,7 +4,6 @@ import { downloadImage } from "../share/tools"
import { getCollection } from '../lib/mongodb';
import { FetchType } from '..';
// 要抓取的网页 URL
const url = 'https://ai-bot.cn/favorites/ai-writing-tools/';
async function getPageData(url: string, name: string) {
try {
@ -20,10 +19,13 @@ async function getPageData(url: string, name: string) {
}
}
export async function queryListData(list: FetchType[]) {
const promiseList = list.map(item => fetchData(item.typeId))
await Promise.all(promiseList)
const col = await getCollection('link');
await col.deleteMany({})
for (const item of list) {
await fetchData(item.typeId, item.url)
}
}
export async function fetchData(typeName: string) {
export async function fetchData(typeName: string, url: string) {
try {
// 请求目标页面
const { data } = await axios.get(url);
@ -69,7 +71,10 @@ export async function fetchData(typeName: string) {
toolsData.push(toolData);
i++
console.log(`进度:${i}/${length}`);
console.clear()
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
console.log(`${typeName}:进度:${i}/${length}`);
}
console.log(toolsData);

12
src/mannual/migrate.ts Normal file
View File

@ -0,0 +1,12 @@
import { getCollection, getDb } from "../lib/mongodb";
async function migrateLink() {
const botDb = await getDb('ai-bot');
const botCol = botDb.collection('link');
const col = await getCollection('link')
const links = await col.find().toArray();
await botCol.deleteMany({});
await botCol.insertMany(links);
console.log('Migrate link success');
}
migrateLink();

View File

@ -38,7 +38,7 @@ export async function downloadImage(url: string) {
// 写入文件
await fs.writeFile(filePath, response.data);
return id;
return filename;
} catch (error) {
console.error('下载失败:', error);
throw error;