save
This commit is contained in:
parent
dd55ad1490
commit
1d50519a96
|
@ -4,6 +4,7 @@
|
|||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "npx ts-node src/index.ts",
|
||||
"migrate": "npx ts-node src/mannual/migrate.ts",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
|
|
20
src/index.ts
20
src/index.ts
|
@ -7,13 +7,25 @@ export type FetchType = {
|
|||
const fetchList = [
|
||||
{
|
||||
name: 'AI写作工具',
|
||||
typeId: '67908fc33de33b392c0330af',
|
||||
typeId: '6790aae23de33b392c0330b2',
|
||||
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
||||
},
|
||||
{
|
||||
name: '',
|
||||
typeId: ''
|
||||
}
|
||||
name: 'AI图像网站',
|
||||
typeId: '67908fc33de33b392c0330af',
|
||||
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI视频网站',
|
||||
typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||
},
|
||||
{
|
||||
name: 'AI音频网站',
|
||||
typeId: '6791a98fc058e55ed0a094ca',
|
||||
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||
},
|
||||
|
||||
]
|
||||
function main() {
|
||||
console.log("Hello, this is the main function!");
|
||||
|
|
|
@ -23,8 +23,8 @@ if (process.env.NODE_ENV === 'development') {
|
|||
clientPromise = client.connect();
|
||||
}
|
||||
|
||||
export const getDb = async () => {
|
||||
return (await clientPromise).db('crawler');
|
||||
export const getDb = async (dbName?: string) => {
|
||||
return (await clientPromise).db(dbName || 'crawler');
|
||||
};
|
||||
export const getCollection = async (collection: string) => {
|
||||
const ins = await getDb();
|
||||
|
|
|
@ -4,7 +4,6 @@ import { downloadImage } from "../share/tools"
|
|||
import { getCollection } from '../lib/mongodb';
|
||||
import { FetchType } from '..';
|
||||
// 要抓取的网页 URL
|
||||
const url = 'https://ai-bot.cn/favorites/ai-writing-tools/';
|
||||
|
||||
async function getPageData(url: string, name: string) {
|
||||
try {
|
||||
|
@ -20,10 +19,13 @@ async function getPageData(url: string, name: string) {
|
|||
}
|
||||
}
|
||||
export async function queryListData(list: FetchType[]) {
|
||||
const promiseList = list.map(item => fetchData(item.typeId))
|
||||
await Promise.all(promiseList)
|
||||
const col = await getCollection('link');
|
||||
await col.deleteMany({})
|
||||
for (const item of list) {
|
||||
await fetchData(item.typeId, item.url)
|
||||
}
|
||||
}
|
||||
export async function fetchData(typeName: string) {
|
||||
export async function fetchData(typeName: string, url: string) {
|
||||
try {
|
||||
// 请求目标页面
|
||||
const { data } = await axios.get(url);
|
||||
|
@ -69,7 +71,10 @@ export async function fetchData(typeName: string) {
|
|||
|
||||
toolsData.push(toolData);
|
||||
i++
|
||||
console.log(`进度:${i}/${length}`);
|
||||
console.clear()
|
||||
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
|
||||
|
||||
console.log(`${typeName}:进度:${i}/${length}`);
|
||||
|
||||
}
|
||||
console.log(toolsData);
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
import { getCollection, getDb } from "../lib/mongodb";
|
||||
|
||||
async function migrateLink() {
|
||||
const botDb = await getDb('ai-bot');
|
||||
const botCol = botDb.collection('link');
|
||||
const col = await getCollection('link')
|
||||
const links = await col.find().toArray();
|
||||
await botCol.deleteMany({});
|
||||
await botCol.insertMany(links);
|
||||
console.log('Migrate link success');
|
||||
}
|
||||
migrateLink();
|
|
@ -38,7 +38,7 @@ export async function downloadImage(url: string) {
|
|||
|
||||
// 写入文件
|
||||
await fs.writeFile(filePath, response.data);
|
||||
return id;
|
||||
return filename;
|
||||
} catch (error) {
|
||||
console.error('下载失败:', error);
|
||||
throw error;
|
||||
|
|
Loading…
Reference in New Issue