001project_wildgrowth/backend/public/chunking-test.html

481 lines
19 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>按章分块测试</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container { max-width: 1600px; margin: 0 auto; }
h1 { color: white; text-align: center; margin-bottom: 20px; font-size: 28px; }
.subtitle { color: rgba(255,255,255,0.8); text-align: center; margin-bottom: 30px; font-size: 14px; }
.main-content { display: grid; grid-template-columns: 400px 1fr; gap: 20px; }
@media (max-width: 1000px) { .main-content { grid-template-columns: 1fr; } }
.panel { background: white; border-radius: 16px; padding: 24px; box-shadow: 0 10px 40px rgba(0,0,0,0.2); }
.panel-title {
font-size: 18px; font-weight: 600; color: #333; margin-bottom: 16px;
display: flex; align-items: center; gap: 8px;
}
.panel-title::before {
content: ''; width: 4px; height: 20px;
background: linear-gradient(135deg, #667eea, #764ba2); border-radius: 2px;
}
.btn-row { display: flex; gap: 12px; margin-top: 16px; }
button {
padding: 12px 24px; border: none; border-radius: 8px;
font-size: 14px; font-weight: 600; cursor: pointer; transition: all 0.3s;
}
.btn-primary { background: linear-gradient(135deg, #667eea, #764ba2); color: white; flex: 1; }
.btn-primary:hover { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4); }
.btn-primary:disabled { opacity: 0.6; cursor: not-allowed; transform: none; }
.btn-secondary { background: #f0f0f0; color: #666; }
.btn-secondary:hover { background: #e0e0e0; }
/* 文件上传区域 */
.upload-area {
border: 2px dashed #d0d0d0; border-radius: 12px; padding: 32px;
text-align: center; cursor: pointer; transition: all 0.3s;
background: #fafafa;
}
.upload-area:hover { border-color: #667eea; background: #f5f5ff; }
.upload-area.dragover { border-color: #667eea; background: #eef2ff; }
.upload-icon { font-size: 48px; margin-bottom: 12px; }
.upload-text { color: #666; font-size: 14px; }
.upload-hint { color: #999; font-size: 12px; margin-top: 8px; }
/* 文件列表 */
.file-list { margin-top: 16px; max-height: 300px; overflow-y: auto; }
.file-item {
display: flex; align-items: center; gap: 12px;
padding: 10px 12px; background: #f8f9fa; border-radius: 8px;
margin-bottom: 8px; font-size: 13px;
}
.file-item .icon { font-size: 20px; }
.file-item .name { flex: 1; font-weight: 500; color: #333; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
.file-item .size { color: #888; font-size: 12px; }
.file-item .status { font-size: 12px; padding: 2px 8px; border-radius: 4px; }
.file-item .status.pending { background: #fef3c7; color: #92400e; }
.file-item .status.processing { background: #dbeafe; color: #1e40af; }
.file-item .status.success { background: #d1fae5; color: #065f46; }
.file-item .status.error { background: #fee2e2; color: #991b1b; }
.file-item .remove { cursor: pointer; color: #999; padding: 4px; }
.file-item .remove:hover { color: #e53e3e; }
/* 结果区域 */
.results-container { max-height: calc(100vh - 200px); overflow-y: auto; }
.result-card {
border: 1px solid #e0e0e0; border-radius: 12px; margin-bottom: 16px; overflow: hidden;
}
.result-header {
display: flex; align-items: center; gap: 12px; padding: 16px;
background: #f8f9fa; border-bottom: 1px solid #e0e0e0;
}
.result-header .icon { font-size: 24px; }
.result-header .info { flex: 1; }
.result-header .filename { font-weight: 600; color: #333; }
.result-header .meta { font-size: 12px; color: #888; margin-top: 2px; }
.result-header .badge {
padding: 4px 12px; border-radius: 20px; font-size: 12px; font-weight: 600;
}
.result-header .badge.success { background: #d1fae5; color: #065f46; }
.result-header .badge.error { background: #fee2e2; color: #991b1b; }
.result-stats {
display: grid; grid-template-columns: repeat(4, 1fr); gap: 8px;
padding: 12px 16px; background: white; border-bottom: 1px solid #e0e0e0;
}
.result-stats .stat { text-align: center; }
.result-stats .stat-value { font-size: 18px; font-weight: 700; color: #667eea; }
.result-stats .stat-label { font-size: 11px; color: #888; }
.result-chunks { padding: 16px; background: white; }
.chunk-item {
border: 1px solid #e8e8e8; border-radius: 8px; margin-bottom: 8px;
overflow: hidden; font-size: 13px;
}
.chunk-header {
display: flex; align-items: center; gap: 8px; padding: 10px 12px;
background: #fafafa; cursor: pointer;
}
.chunk-header:hover { background: #f0f0f0; }
.chunk-order {
width: 24px; height: 24px; border-radius: 50%;
display: flex; align-items: center; justify-content: center;
font-size: 11px; font-weight: 700; color: white; background: #667eea;
}
.chunk-title { flex: 1; font-weight: 500; color: #333; }
.chunk-meta { font-size: 11px; color: #888; }
.chunk-content {
padding: 12px; font-size: 12px; line-height: 1.5; color: #555;
border-top: 1px solid #e8e8e8; background: white;
white-space: pre-wrap; max-height: 150px; overflow-y: auto;
}
.empty-state { text-align: center; padding: 60px 20px; color: #888; }
.empty-state .icon { font-size: 48px; margin-bottom: 16px; }
.progress-bar {
height: 4px; background: #e0e0e0; border-radius: 2px;
margin-top: 16px; overflow: hidden;
}
.progress-bar .fill {
height: 100%; background: linear-gradient(135deg, #667eea, #764ba2);
transition: width 0.3s;
}
/* 小按钮 */
.btn-small {
padding: 6px 12px; font-size: 12px; border: 1px solid #d0d0d0;
background: white; border-radius: 6px; cursor: pointer;
transition: all 0.2s;
}
.btn-small:hover { background: #f5f5f5; border-color: #667eea; }
</style>
</head>
<body>
<div class="container">
<h1>按章分块测试(批量)</h1>
<p class="subtitle">支持批量上传文档,识别章级结构并分块</p>
<div class="main-content">
<!-- 左侧:上传区 -->
<div class="panel">
<div class="panel-title">上传文档</div>
<div class="upload-area" id="uploadArea" onclick="document.getElementById('fileInput').click()">
<div class="upload-icon">📚</div>
<div class="upload-text">点击上传或拖拽文件</div>
<div class="upload-hint">支持 Word、PDF、EPUB可多选</div>
<div class="upload-hint">单文件最大 100MB</div>
</div>
<input type="file" id="fileInput" accept=".pdf,.docx,.epub" multiple style="display: none;" onchange="handleFileSelect(event)">
<div class="file-list" id="fileList"></div>
<div class="progress-bar" id="progressBar" style="display: none;">
<div class="fill" id="progressFill" style="width: 0%;"></div>
</div>
<div class="btn-row">
<button class="btn-primary" id="testBtn" onclick="startBatchProcess()">开始分块</button>
<button class="btn-secondary" onclick="clearAll()">清空</button>
</div>
</div>
<!-- 右侧:结果区 -->
<div class="panel" style="flex: 1;">
<div class="panel-title">分块结果</div>
<div class="results-container" id="resultsContainer">
<div class="empty-state" id="emptyState">
<div class="icon">📄</div>
<div>上传文档后点击"开始分块"</div>
</div>
</div>
</div>
</div>
</div>
<script>
let files = [];
let results = [];
// 文件拖拽处理
const uploadArea = document.getElementById('uploadArea');
uploadArea.addEventListener('dragover', (e) => { e.preventDefault(); uploadArea.classList.add('dragover'); });
uploadArea.addEventListener('dragleave', () => uploadArea.classList.remove('dragover'));
uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
uploadArea.classList.remove('dragover');
addFiles(Array.from(e.dataTransfer.files));
});
function handleFileSelect(event) {
addFiles(Array.from(event.target.files));
event.target.value = '';
}
function addFiles(newFiles) {
const validExts = ['.pdf', '.docx', '.epub'];
newFiles.forEach(file => {
const ext = file.name.toLowerCase().substring(file.name.lastIndexOf('.'));
if (!validExts.includes(ext)) {
alert(`不支持的格式: ${file.name}`);
return;
}
if (file.size > 100 * 1024 * 1024) {
alert(`文件过大: ${file.name}`);
return;
}
// 避免重复
if (!files.find(f => f.name === file.name && f.size === file.size)) {
files.push(file);
}
});
renderFileList();
}
function removeFile(index) {
files.splice(index, 1);
renderFileList();
}
function renderFileList() {
const container = document.getElementById('fileList');
if (files.length === 0) {
container.innerHTML = '';
return;
}
container.innerHTML = files.map((file, i) => `
<div class="file-item" id="file-${i}">
<span class="icon">${getFileIcon(file.name)}</span>
<span class="name" title="${file.name}">${file.name}</span>
<span class="size">${formatFileSize(file.size)}</span>
<span class="status pending" id="status-${i}">待处理</span>
<span class="remove" onclick="removeFile(${i})">✕</span>
</div>
`).join('');
}
function getFileIcon(filename) {
const ext = filename.toLowerCase().substring(filename.lastIndexOf('.'));
if (ext === '.pdf') return '📕';
if (ext === '.docx') return '📘';
if (ext === '.epub') return '📗';
return '📄';
}
function formatFileSize(bytes) {
if (bytes < 1024) return bytes + ' B';
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
}
function clearAll() {
files = [];
results = [];
renderFileList();
document.getElementById('resultsContainer').innerHTML = `
<div class="empty-state" id="emptyState">
<div class="icon">📄</div>
<div>上传文档后点击"开始分块"</div>
</div>
`;
document.getElementById('progressBar').style.display = 'none';
}
async function startBatchProcess() {
if (files.length === 0) {
alert('请先上传文件');
return;
}
const btn = document.getElementById('testBtn');
btn.disabled = true;
btn.textContent = '处理中...';
document.getElementById('emptyState')?.remove();
document.getElementById('progressBar').style.display = 'block';
results = [];
const total = files.length;
for (let i = 0; i < files.length; i++) {
const file = files[i];
updateFileStatus(i, 'processing', '处理中...');
updateProgress((i / total) * 100);
try {
const formData = new FormData();
formData.append('file', file);
const response = await fetch('/api/playground/chunking/upload', {
method: 'POST',
body: formData,
});
const result = await response.json();
if (result.success) {
updateFileStatus(i, 'success', `${result.data.totalChunks}`);
results.push({ file, success: true, data: result.data });
} else {
throw new Error(result.error);
}
} catch (error) {
updateFileStatus(i, 'error', '失败');
results.push({ file, success: false, error: error.message });
}
renderResults();
}
updateProgress(100);
btn.disabled = false;
btn.textContent = '开始分块';
}
function updateFileStatus(index, status, text) {
const el = document.getElementById(`status-${index}`);
if (el) {
el.className = `status ${status}`;
el.textContent = text;
}
}
function updateProgress(percent) {
document.getElementById('progressFill').style.width = `${percent}%`;
}
function renderResults() {
const container = document.getElementById('resultsContainer');
container.innerHTML = results.map((r, i) => {
if (!r.success) {
return `
<div class="result-card">
<div class="result-header">
<span class="icon">${getFileIcon(r.file.name)}</span>
<div class="info">
<div class="filename">${escapeHtml(r.file.name)}</div>
<div class="meta">${formatFileSize(r.file.size)}</div>
</div>
<span class="badge error">失败</span>
</div>
<div style="padding: 16px; color: #991b1b; font-size: 13px;">
错误: ${escapeHtml(r.error)}
</div>
</div>
`;
}
const d = r.data;
return `
<div class="result-card" data-index="${i}">
<div class="result-header">
<span class="icon">${getFileIcon(r.file.name)}</span>
<div class="info">
<div class="filename">${escapeHtml(r.file.name)}</div>
<div class="meta">${formatFileSize(r.file.size)} · ${d.pattern || '无结构'}${d.failureReason ? ' · ⚠️ ' + d.failureReason : ''}</div>
</div>
<span class="badge ${d.success ? 'success' : 'error'}">${d.success ? d.totalChunks + ' 块' : '失败'}</span>
</div>
<div class="result-stats">
<div class="stat">
<div class="stat-value">${d.totalChunks}</div>
<div class="stat-label">分块数</div>
</div>
<div class="stat">
<div class="stat-value">${(d.totalCharacters / 1000).toFixed(1)}k</div>
<div class="stat-label">字符数</div>
</div>
<div class="stat">
<div class="stat-value">${d.duration}</div>
<div class="stat-label">耗时</div>
</div>
<div class="stat">
<div class="stat-value">${d.chunks.length > 0 ? Math.round(d.totalCharacters / d.chunks.length) : 0}</div>
<div class="stat-label">平均/块</div>
</div>
</div>
${d.chunks.length > 0 ? `
<div class="result-chunks">
<div style="display: flex; gap: 8px; margin-bottom: 12px;">
<button class="btn-small" onclick="toggleAllChunks(${i}, true)">📖 展开全部</button>
<button class="btn-small" onclick="toggleAllChunks(${i}, false)">📕 收起全部</button>
<button class="btn-small" onclick="openChunksInNewWindow(${i})">🔗 新窗口查看</button>
</div>
<div id="chunks-container-${i}">
${d.chunks.map((chunk, j) => `
<div class="chunk-item">
<div class="chunk-header" onclick="toggleContent(this)">
<div class="chunk-order">${j + 1}</div>
<div class="chunk-title">${escapeHtml(chunk.title)}</div>
<div class="chunk-meta">${chunk.contentLength} 字</div>
</div>
${chunk.content ? `<div class="chunk-content" style="display: none;">${escapeHtml(chunk.content)}</div>` : ''}
</div>
`).join('')}
</div>
</div>
` : `
<div style="padding: 16px; text-align: center; color: #888; font-size: 13px;">
未识别到章节结构
</div>
`}
</div>
`;
}).join('');
}
function toggleContent(header) {
const content = header.parentElement.querySelector('.chunk-content');
if (content) {
content.style.display = content.style.display === 'none' ? 'block' : 'none';
}
}
function toggleAllChunks(resultIndex, expand) {
const container = document.getElementById(`chunks-container-${resultIndex}`);
if (!container) return;
const contents = container.querySelectorAll('.chunk-content');
contents.forEach(el => {
el.style.display = expand ? 'block' : 'none';
});
}
function openChunksInNewWindow(resultIndex) {
const r = results[resultIndex];
if (!r || !r.data || !r.data.chunks) return;
const d = r.data;
// 构建 HTML
let chunksHtml = '';
d.chunks.forEach((chunk, j) => {
chunksHtml += '<div class="chunk">' +
'<div class="chunk-header">' +
'<div class="chunk-order">' + (j + 1) + '</div>' +
'<div class="chunk-title">' + escapeHtml(chunk.title) + '</div>' +
'<div class="chunk-length">' + chunk.contentLength + ' 字</div>' +
'</div>' +
'<div class="chunk-content">' + escapeHtml(chunk.content) + '</div>' +
'</div>';
});
const html = '<!DOCTYPE html><html lang="zh-CN"><head><meta charset="UTF-8">' +
'<title>' + escapeHtml(r.file.name) + ' - 分块结果</title>' +
'<style>' +
'body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; padding: 20px; max-width: 1000px; margin: 0 auto; }' +
'h1 { font-size: 20px; margin-bottom: 20px; }' +
'.meta { color: #666; font-size: 14px; margin-bottom: 20px; }' +
'.chunk { border: 1px solid #e0e0e0; border-radius: 8px; margin-bottom: 16px; overflow: hidden; }' +
'.chunk-header { background: #f5f5f5; padding: 12px 16px; font-weight: 600; display: flex; align-items: center; gap: 12px; }' +
'.chunk-order { width: 28px; height: 28px; background: #667eea; color: white; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px; }' +
'.chunk-title { flex: 1; }' +
'.chunk-length { color: #888; font-size: 12px; }' +
'.chunk-content { padding: 16px; line-height: 1.8; white-space: pre-wrap; font-size: 14px; }' +
'</style></head><body>' +
'<h1>' + escapeHtml(r.file.name) + '</h1>' +
'<div class="meta">共 ' + d.chunks.length + ' 个分块 · ' + d.totalCharacters.toLocaleString() + ' 字符 · 模式: ' + (d.pattern || '未知') + '</div>' +
chunksHtml +
'</body></html>';
const blob = new Blob([html], { type: 'text/html' });
const url = URL.createObjectURL(blob);
window.open(url, '_blank');
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
</script>
</body>
</html>