svae
This commit is contained in:
commit
35944a46d7
|
@ -0,0 +1,3 @@
|
|||
node_modules
|
||||
|
||||
downloads
|
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"name": "my-crawler",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "npx ts-node src/index.ts",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"dependencies": {
|
||||
"axios": "^1.7.9",
|
||||
"cheerio": "^1.0.0",
|
||||
"puppeteer": "^24.2.1",
|
||||
"uuid": "^11.0.5"
|
||||
},
|
||||
"pnpm": {
|
||||
"onlyBuiltDependencies": [
|
||||
"puppeteer"
|
||||
]
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.13.4",
|
||||
"@types/uuid": "^10.0.0",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.7.3"
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,9 @@
|
|||
import { fetchData } from "./link";
|
||||
|
||||
function main() {
|
||||
console.log("Hello, this is the main function!");
|
||||
fetchData("AI写作工具")
|
||||
}
|
||||
|
||||
main();
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
import axios from 'axios';
|
||||
const cheerio = require('cheerio')
|
||||
import { downloadImage } from "../share/tools"
|
||||
// 要抓取的网页 URL
|
||||
const url = 'https://ai-bot.cn/favorites/ai-writing-tools/';
|
||||
|
||||
async function getPageData(url: string, name: string) {
|
||||
try {
|
||||
|
||||
const { data } = await axios.get(url);
|
||||
const $ = cheerio.load(data);
|
||||
const element = $(`a[title="${name}"]`)
|
||||
return element.attr('href')
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
|
||||
}
|
||||
}
|
||||
export async function fetchData(typeName: string) {
|
||||
try {
|
||||
// 请求目标页面
|
||||
const { data } = await axios.get(url);
|
||||
|
||||
// 使用 cheerio 加载 HTML
|
||||
|
||||
const $ = cheerio.load(data);
|
||||
|
||||
// 提取工具卡片数据
|
||||
const toolsData = [] as any;
|
||||
$('.url-card').each(async (index: any, element: any) => {
|
||||
const name = $(element).find('.url-info strong').text().trim();
|
||||
const tempLink = $(element).find('a').attr('href') as string;
|
||||
const description = $(element).find('.url-info p').text().trim();
|
||||
const _id = $(element).attr('data-id');
|
||||
const _originLink = $(element).find('img').attr('data-src');
|
||||
let link = tempLink
|
||||
if (tempLink.startsWith('https://ai-bot.cn')) {
|
||||
link = await getPageData(tempLink, name) || ''
|
||||
|
||||
}
|
||||
// 假设工具的类别是固定的,比如 "AI写作工具"
|
||||
const type = typeName;
|
||||
const priority = index + 1; // 根据索引来定义优先级
|
||||
const addTime = Date.now();
|
||||
const logoLink = await downloadImage(_originLink)
|
||||
// console.log(logoLink);
|
||||
// const logoLink = ''
|
||||
// 将提取的数据转换为 Link 类型
|
||||
const toolData = {
|
||||
name,
|
||||
link,
|
||||
description,
|
||||
_id,
|
||||
type,
|
||||
priority,
|
||||
logoLink,
|
||||
addTime,
|
||||
};
|
||||
|
||||
await toolsData.push(toolData);
|
||||
});
|
||||
console.log(toolsData);
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
import { promises as fs } from 'fs';
|
||||
import axios from "axios";
|
||||
import readline from "readline"
|
||||
import path from 'path';
|
||||
import { v4 as uuid } from 'uuid'; // 或使用 crypto 模块
|
||||
const downloadDir = path.join(__dirname, '../../downloads');
|
||||
|
||||
export function askQuestion(query: string) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout
|
||||
});
|
||||
|
||||
rl.question(query, (answer: string) => {
|
||||
rl.close();
|
||||
resolve(answer);
|
||||
});
|
||||
});
|
||||
}
|
||||
export async function downloadImage(url: string) {
|
||||
try {
|
||||
// 获取图片响应
|
||||
const response = await axios.get(url, {
|
||||
responseType: 'arraybuffer',
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
}
|
||||
});
|
||||
|
||||
// 获取文件扩展名
|
||||
|
||||
const ext = response.headers['content-type']?.split('/')[1] || 'jpg';
|
||||
const id = uuid()
|
||||
// 生成最终路径
|
||||
const filename = `${id}.${ext}`;
|
||||
const filePath = path.join(downloadDir, filename);
|
||||
|
||||
// 写入文件
|
||||
await fs.writeFile(filePath, response.data);
|
||||
return id;
|
||||
} catch (error) {
|
||||
console.error('下载失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES6", // 编译成 ES6 代码
|
||||
"module": "commonjs", // 使用 CommonJS 模块系统(Node.js 默认使用)
|
||||
"outDir": "./dist", // 编译后的文件输出目录
|
||||
"rootDir": "./src", // TypeScript 源文件目录
|
||||
"strict": true, // 启用严格模式,建议开启
|
||||
"esModuleInterop": true, // 允许默认导入非 ES6 模块
|
||||
"skipLibCheck": true, // 跳过库文件的类型检查
|
||||
"forceConsistentCasingInFileNames": true // 强制一致的文件命名大小写
|
||||
},
|
||||
"include": ["src/**/*.ts"], // 包含 src 目录下的所有 TypeScript 文件
|
||||
"exclude": ["node_modules"] // 排除 node_modules 目录
|
||||
}
|
Loading…
Reference in New Issue