001project_wildgrowth/backend/scripts/test-structure-chunking.ts

108 lines
3.5 KiB
TypeScript
Raw Normal View History

2026-02-11 15:26:03 +08:00
/**
*
*
*
* npx ts-node scripts/test-structure-chunking.ts []
*/
import fs from 'fs';
import path from 'path';
import { structureChunkingService } from '../src/services/structureChunkingService';
// 测试文本:多章节
const TEST_TEXT_CHAPTERS = `
Artificial IntelligenceAI
20501956
"人工智能"
3
使
CNNRNN
`;
// 测试文本:无章节结构
const TEST_TEXT_NO_STRUCTURE = `
`;
async function main() {
console.log('='.repeat(60));
console.log('按章分块服务测试');
console.log('='.repeat(60));
console.log();
const filePath = process.argv[2];
let testTexts: { name: string; text: string }[] = [];
if (filePath) {
const absolutePath = path.resolve(filePath);
if (!fs.existsSync(absolutePath)) {
console.error(`错误:文件不存在 - ${absolutePath}`);
process.exit(1);
}
const fileContent = fs.readFileSync(absolutePath, 'utf-8');
testTexts.push({ name: `文件: ${path.basename(filePath)}`, text: fileContent });
} else {
testTexts = [
{ name: '多章节', text: TEST_TEXT_CHAPTERS },
{ name: '无结构', text: TEST_TEXT_NO_STRUCTURE },
];
}
for (const { name, text } of testTexts) {
console.log('-'.repeat(60));
console.log(`测试: ${name}`);
console.log(`输入长度: ${text.length} 字符`);
console.log('-'.repeat(60));
const startTime = Date.now();
const result = await structureChunkingService.parseAsync(text);
const duration = Date.now() - startTime;
console.log(`成功: ${result.success}`);
console.log(`模式: ${result.pattern || '无'}`);
console.log(`分块数: ${result.chunks.length}`);
console.log(`字符数: ${result.totalCharacters}`);
console.log(`耗时: ${duration}ms`);
console.log();
if (result.chunks.length > 0) {
console.log('分块详情:');
for (const chunk of result.chunks) {
const preview = chunk.content.substring(0, 50).replace(/\n/g, ' ');
console.log(` [${chunk.order + 1}] ${chunk.title}`);
console.log(` ${preview}${chunk.content.length > 50 ? '...' : ''} (${chunk.content.length} 字符)`);
}
} else {
console.log('未检测到章级结构');
}
console.log();
}
console.log('='.repeat(60));
console.log('测试完成');
console.log('='.repeat(60));
}
main().catch((err) => {
console.error('测试失败:', err);
process.exit(1);
});