This commit is contained in:
expdsn 2025-02-20 18:41:18 +08:00
parent f8dc38e673
commit dd55ad1490
6 changed files with 187 additions and 11 deletions

5
.env.local Normal file
View File

@ -0,0 +1,5 @@
MONGODB_URI=mongodb://expdsn:58662@expdsn.cloud:27017
# MONGODB_URI=mongodb://expdsn:58662@localhost:27017
SESSION_SECRET=lREDRcaFwZIzM7Rjw63XGj8trTyMqhVUsVwwhuTQnFs=
ALIYUN_RAM_ACCESS_KEY_ID=LTAI5tNzopZHJFa2Q9vqr1u5
ALIYUN_RAM_ACCESS_KEY_SECRET=qPu7fyft0KJ1l6SGqbS71IW0vDbRlr

View File

@ -14,6 +14,7 @@
"axios": "^1.7.9",
"cheerio": "^1.0.0",
"g": "^2.0.1",
"mongodb": "^6.13.0",
"puppeteer": "^24.2.1",
"uuid": "^11.0.5"
},

View File

@ -17,6 +17,9 @@ importers:
g:
specifier: ^2.0.1
version: 2.0.1
mongodb:
specifier: ^6.13.0
version: 6.13.0(socks@2.8.4)
puppeteer:
specifier: ^24.2.1
version: 24.2.1(typescript@5.7.3)
@ -61,6 +64,9 @@ packages:
'@jridgewell/trace-mapping@0.3.9':
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
'@mongodb-js/saslprep@1.2.0':
resolution: {integrity: sha512-+ywrb0AqkfaYuhHs6LxKWgqbh3I72EpEgESCw37o+9qPx9WTCkgDm2B+eMrwehGtHBWHFU4GXvnSCNiFhhausg==}
'@puppeteer/browsers@2.7.1':
resolution: {integrity: sha512-MK7rtm8JjaxPN7Mf1JdZIZKPD2Z+W7osvrC1vjpvfOX1K0awDIHYbNi89f7eotp7eMUn2shWnt03HwVbriXtKQ==}
engines: {node: '>=18'}
@ -87,6 +93,12 @@ packages:
'@types/uuid@10.0.0':
resolution: {integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==}
'@types/webidl-conversions@7.0.3':
resolution: {integrity: sha512-CiJJvcRtIgzadHCYXw7dqEnMNRjhGZlYK05Mj9OyktqV8uVT8fD2BFOB7S1uwBE3Kj2Z+4UyPmFw/Ixgw/LAlA==}
'@types/whatwg-url@11.0.5':
resolution: {integrity: sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==}
'@types/yauzl@2.10.3':
resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==}
@ -162,6 +174,10 @@ packages:
boolbase@1.0.0:
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
bson@6.10.3:
resolution: {integrity: sha512-MTxGsqgYTwfshYWTRdmZRC+M7FnG1b4y7RO7p2k3X24Wq0yv1m77Wsj0BzlPzd/IowgESfsruQCUToa7vbOpPQ==}
engines: {node: '>=16.20.1'}
buffer-crc32@0.2.13:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
@ -445,6 +461,9 @@ packages:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
memory-pager@1.5.0:
resolution: {integrity: sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==}
mime-db@1.52.0:
resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
engines: {node: '>= 0.6'}
@ -456,6 +475,36 @@ packages:
mitt@3.0.1:
resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==}
mongodb-connection-string-url@3.0.2:
resolution: {integrity: sha512-rMO7CGo/9BFwyZABcKAWL8UJwH/Kc2x0g72uhDWzG48URRax5TCIcJ7Rc3RZqffZzO/Gwff/jyKwCU9TN8gehA==}
mongodb@6.13.0:
resolution: {integrity: sha512-KeESYR5TEaFxOuwRqkOm3XOsMqCSkdeDMjaW5u2nuKfX7rqaofp7JQGoi7sVqQcNJTKuveNbzZtWMstb8ABP6Q==}
engines: {node: '>=16.20.1'}
peerDependencies:
'@aws-sdk/credential-providers': ^3.188.0
'@mongodb-js/zstd': ^1.1.0 || ^2.0.0
gcp-metadata: ^5.2.0
kerberos: ^2.0.1
mongodb-client-encryption: '>=6.0.0 <7'
snappy: ^7.2.2
socks: ^2.7.1
peerDependenciesMeta:
'@aws-sdk/credential-providers':
optional: true
'@mongodb-js/zstd':
optional: true
gcp-metadata:
optional: true
kerberos:
optional: true
mongodb-client-encryption:
optional: true
snappy:
optional: true
socks:
optional: true
ms@2.1.3:
resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
@ -514,6 +563,10 @@ packages:
pump@3.0.2:
resolution: {integrity: sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw==}
punycode@2.3.1:
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
engines: {node: '>=6'}
puppeteer-core@24.2.1:
resolution: {integrity: sha512-bCypUh3WXzETafv1TCFAjIUnI8BiQ/d+XvEfEXDLcIMm9CAvROqnBmbt79yBjwasoDZsgfXnUmIJU7Y27AalVQ==}
engines: {node: '>=18'}
@ -555,6 +608,9 @@ packages:
resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==}
engines: {node: '>=0.10.0'}
sparse-bitfield@3.0.3:
resolution: {integrity: sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==}
sprintf-js@1.1.3:
resolution: {integrity: sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==}
@ -578,6 +634,10 @@ packages:
text-decoder@1.2.3:
resolution: {integrity: sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==}
tr46@5.0.0:
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
engines: {node: '>=18'}
ts-node@10.9.2:
resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
hasBin: true
@ -617,6 +677,10 @@ packages:
v8-compile-cache-lib@3.0.1:
resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
webidl-conversions@7.0.0:
resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==}
engines: {node: '>=12'}
whatwg-encoding@3.1.1:
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
engines: {node: '>=18'}
@ -625,6 +689,10 @@ packages:
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
engines: {node: '>=18'}
whatwg-url@14.1.1:
resolution: {integrity: sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==}
engines: {node: '>=18'}
wrap-ansi@7.0.0:
resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
engines: {node: '>=10'}
@ -689,6 +757,10 @@ snapshots:
'@jridgewell/resolve-uri': 3.1.2
'@jridgewell/sourcemap-codec': 1.5.0
'@mongodb-js/saslprep@1.2.0':
dependencies:
sparse-bitfield: 3.0.3
'@puppeteer/browsers@2.7.1':
dependencies:
debug: 4.4.0
@ -718,6 +790,12 @@ snapshots:
'@types/uuid@10.0.0': {}
'@types/webidl-conversions@7.0.3': {}
'@types/whatwg-url@11.0.5':
dependencies:
'@types/webidl-conversions': 7.0.3
'@types/yauzl@2.10.3':
dependencies:
'@types/node': 22.13.4
@ -788,6 +866,8 @@ snapshots:
boolbase@1.0.0: {}
bson@6.10.3: {}
buffer-crc32@0.2.13: {}
call-bind-apply-helpers@1.0.2:
@ -1085,6 +1165,8 @@ snapshots:
math-intrinsics@1.1.0: {}
memory-pager@1.5.0: {}
mime-db@1.52.0: {}
mime-types@2.1.35:
@ -1093,6 +1175,19 @@ snapshots:
mitt@3.0.1: {}
mongodb-connection-string-url@3.0.2:
dependencies:
'@types/whatwg-url': 11.0.5
whatwg-url: 14.1.1
mongodb@6.13.0(socks@2.8.4):
dependencies:
'@mongodb-js/saslprep': 1.2.0
bson: 6.10.3
mongodb-connection-string-url: 3.0.2
optionalDependencies:
socks: 2.8.4
ms@2.1.3: {}
netmask@2.0.2: {}
@ -1173,6 +1268,8 @@ snapshots:
end-of-stream: 1.4.4
once: 1.4.0
punycode@2.3.1: {}
puppeteer-core@24.2.1:
dependencies:
'@puppeteer/browsers': 2.7.1
@ -1228,6 +1325,10 @@ snapshots:
source-map@0.6.1:
optional: true
sparse-bitfield@3.0.3:
dependencies:
memory-pager: 1.5.0
sprintf-js@1.1.3: {}
streamx@2.22.0:
@ -1267,6 +1368,10 @@ snapshots:
dependencies:
b4a: 1.6.7
tr46@5.0.0:
dependencies:
punycode: 2.3.1
ts-node@10.9.2(@types/node@22.13.4)(typescript@5.7.3):
dependencies:
'@cspotcode/source-map-support': 0.8.1
@ -1299,12 +1404,19 @@ snapshots:
v8-compile-cache-lib@3.0.1: {}
webidl-conversions@7.0.0: {}
whatwg-encoding@3.1.1:
dependencies:
iconv-lite: 0.6.3
whatwg-mimetype@4.0.0: {}
whatwg-url@14.1.1:
dependencies:
tr46: 5.0.0
webidl-conversions: 7.0.0
wrap-ansi@7.0.0:
dependencies:
ansi-styles: 4.3.0

View File

@ -1,10 +1,24 @@
import { fetchData } from "./link";
function main() {
console.log("Hello, this is the main function!");
fetchData("AI写作工具")
// test()
import { fetchData, queryListData } from "./link";
export type FetchType = {
typeId: string;
url: string;
name?: string;
}
const fetchList = [
{
name: 'AI写作工具',
typeId: '67908fc33de33b392c0330af',
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
},
{
name: '',
typeId: ''
}
main();
]
function main() {
console.log("Hello, this is the main function!");
queryListData(fetchList)
// test()
}
main();

32
src/lib/mongodb.ts Normal file
View File

@ -0,0 +1,32 @@
// lib/mongodb.ts
import { MongoClient, Db } from 'mongodb';
const uri = "mongodb://expdsn:58662@expdsn.cloud:27017";
const options = {};
let client: MongoClient;
let clientPromise: Promise<MongoClient>;
if (!uri) {
throw new Error('Please add your Mongo URI to.env.local');
}
if (process.env.NODE_ENV === 'development') {
if (!(global as any)._mongoClientPromise) {
client = new MongoClient(uri, options);
(global as any)._mongoClientPromise = client.connect();
}
clientPromise = (global as any)._mongoClientPromise;
} else {
client = new MongoClient(uri, options);
clientPromise = client.connect();
}
export const getDb = async () => {
return (await clientPromise).db('crawler');
};
export const getCollection = async (collection: string) => {
const ins = await getDb();
return ins.collection(collection);
};

View File

@ -1,6 +1,8 @@
import axios from 'axios';
const cheerio = require('cheerio')
import { downloadImage } from "../share/tools"
import { getCollection } from '../lib/mongodb';
import { FetchType } from '..';
// 要抓取的网页 URL
const url = 'https://ai-bot.cn/favorites/ai-writing-tools/';
@ -17,6 +19,10 @@ async function getPageData(url: string, name: string) {
}
}
export async function queryListData(list: FetchType[]) {
const promiseList = list.map(item => fetchData(item.typeId))
await Promise.all(promiseList)
}
export async function fetchData(typeName: string) {
try {
// 请求目标页面
@ -28,6 +34,9 @@ export async function fetchData(typeName: string) {
// 提取工具卡片数据
const toolsData = [] as any;
const length = $('.url-card').length
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
let i = 0
for (const element of $('.url-card')) {
const name = $(element).find('.url-info strong').text().trim();
const tempLink = $(element).find('a').attr('href') as string;
@ -59,12 +68,15 @@ export async function fetchData(typeName: string) {
};
toolsData.push(toolData);
console.log('完成' );
i++
console.log(`进度:${i}/${length}`);
}
console.log(toolsData);
const col = await getCollection('link');
col.insertMany(toolsData);
console.log('数据插入成功');
} catch (error) {
console.error('Error fetching data:', error);
}