feat: 添加 AI 入库预处理功能，支持数据结构化和异常行导出

2026-03-12 14:59:02 +08:00
parent d2dc25eb09
commit ef0af75193
4 changed files with 472 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -351,11 +351,11 @@ cp /www/wwwroot/inventory/data/inventory.db /www/backup/inventory_$(date +%F).db
 ### 第一阶段：先做能直接省时间的功能
- [ ] AI 入库预处理
+- [x] AI 入库预处理
- [ ] 支持粘贴采购清单、聊天记录、Excel 文本后，由 AI 自动拆分为 `料号 / 名称 / 数量 / 规格 / 备注`
+- [x] 支持粘贴采购清单、聊天记录、Excel 文本后，由 AI 自动拆分为 `料号 / 名称 / 数量 / 规格 / 备注`
- [ ] 自动识别脏数据，例如缺字段、数量格式异常、字段顺序混乱
+- [x] 自动识别脏数据，例如缺字段、数量格式异常、字段顺序混乱
- [ ] 输出结构化预览结果，用户确认后再正式写入库存
+- [x] 输出结构化预览结果，用户确认后再正式写入库存
- [ ] 对接现有快速入库 / 袋装批量新增流程，不直接绕开人工确认
+- [x] 对接现有快速入库 / 袋装批量新增流程，不直接绕开人工确认
 ### 第二阶段：提升库存数据质量
--- a/app.py
+++ b/app.py
@@ -989,6 +989,201 @@ def _parse_bulk_line(line: str):
    }
 def _split_inbound_line_fields(line: str) -> tuple[list[str], list[str]]:
    """把原始入库行尽量拆分成 5 列。
    中文说明：用户粘贴的数据分隔符不一定统一，这里优先识别逗号/Tab，
    其次尝试竖线或连续空格，尽量把一行拆成系统可识别的字段。
    """
    warnings = []
    raw = (line or "").strip()
    if not raw:
        return ["", "", "", "", ""], warnings
    normalized = raw.replace("，", ",")
    if "," in normalized or "\t" in normalized:
        parts = [p.strip() for p in re.split(r"[,\t]", normalized)]
    elif "|" in normalized:
        parts = [p.strip() for p in normalized.split("|")]
        warnings.append("检测到竖线分隔，已自动转换为标准字段")
    else:
        parts = [p.strip() for p in re.split(r"\s{2,}", normalized) if p.strip()]
        warnings.append("未检测到逗号或Tab，已按连续空格尝试拆分")
    if len(parts) > 5:
        parts = parts[:4] + [" ".join(parts[4:])]
        warnings.append("字段超过 5 列，已将多余内容合并到备注")
    while len(parts) < 5:
        parts.append("")
    return parts, warnings
 def _format_inbound_line(part_no: str, name: str, quantity: int, specification: str, note: str) -> str:
    safe_quantity = int(quantity or 0)
    return f"{part_no}, {name}, {safe_quantity}, {specification}, {note}".strip()
 def _parse_inbound_preview_rows(raw_lines: list[str]) -> list[dict]:
    rows = []
    for line_no, line in enumerate(raw_lines, start=1):
        parts, warnings = _split_inbound_line_fields(line)
        part_no = (parts[0] or "").strip()
        name = (parts[1] or "").strip()
        quantity_raw = (parts[2] or "").strip()
        specification = (parts[3] or "").strip()
        note = (parts[4] or "").strip()
        errors = []
        if not part_no:
            errors.append("缺少料号")
        if not name:
            errors.append("缺少名称")
        quantity = 0
        try:
            quantity = _parse_non_negative_int(quantity_raw, 0)
        except ValueError:
            errors.append("数量格式错误，必须是大于等于 0 的整数")
        rows.append(
            {
                "line_no": line_no,
                "raw": line,
                "part_no": part_no,
                "name": name,
                "quantity": int(quantity),
                "quantity_raw": quantity_raw,
                "specification": specification,
                "note": note,
                "errors": errors,
                "warnings": warnings,
                "is_valid": len(errors) == 0,
                "normalized_line": _format_inbound_line(part_no, name, quantity, specification, note),
            }
        )
    return rows
 def _extract_json_object_block(raw_text: str) -> str:
    text = (raw_text or "").strip()
    if not text:
        return ""
    if text.startswith("```"):
        text = re.sub(r"^```(?:json)?\\s*", "", text)
        text = re.sub(r"\\s*```$", "", text)
    first = text.find("{")
    last = text.rfind("}")
    if first >= 0 and last > first:
        return text[first : last + 1]
    return text
 def _normalize_ai_inbound_rows(ai_rows: list, fallback_rows: list[dict]) -> list[dict]:
    by_line = {row["line_no"]: dict(row) for row in fallback_rows}
    for raw_row in ai_rows or []:
        if not isinstance(raw_row, dict):
            continue
        try:
            line_no = int(raw_row.get("line_no"))
        except (TypeError, ValueError):
            continue
        if line_no not in by_line:
            continue
        current = by_line[line_no]
        part_no = str(raw_row.get("part_no", current["part_no"]) or "").strip()
        name = str(raw_row.get("name", current["name"]) or "").strip()
        specification = str(raw_row.get("specification", current["specification"]) or "").strip()
        note = str(raw_row.get("note", current["note"]) or "").strip()
        quantity = current["quantity"]
        quantity_candidate = raw_row.get("quantity", current["quantity"])
        try:
            quantity = _parse_non_negative_int(str(quantity_candidate), 0)
        except (TypeError, ValueError):
            # AI 数量不可信时保留规则解析值，不覆盖。
            pass
        errors = []
        if not part_no:
            errors.append("缺少料号")
        if not name:
            errors.append("缺少名称")
        warnings = list(current.get("warnings", []))
        for w in raw_row.get("warnings", []) if isinstance(raw_row.get("warnings", []), list) else []:
            text = str(w or "").strip()
            if text:
                warnings.append(text)
        current.update(
            {
                "part_no": part_no,
                "name": name,
                "quantity": int(quantity),
                "specification": specification,
                "note": note,
                "errors": errors,
                "warnings": warnings,
                "is_valid": len(errors) == 0,
                "normalized_line": _format_inbound_line(part_no, name, quantity, specification, note),
            }
        )
    return [by_line[idx] for idx in sorted(by_line.keys())]
 def _ai_enhance_inbound_preview(raw_lines: list[str], mode: str, fallback_rows: list[dict], settings: dict) -> tuple[list[dict], str]:
    """使用 AI 对规则解析结果做二次修正。
    中文说明：AI 负责“更聪明地拆分和纠错”，但最终仍会做字段约束；
    如果 AI 不可用或返回异常，自动退回规则解析，不影响使用。
    """
    api_key = (settings.get("api_key") or "").strip()
    api_url = (settings.get("api_url") or "").strip()
    model = (settings.get("model") or "").strip()
    if not api_key or not api_url or not model:
        return fallback_rows, "AI 参数未完整配置，已使用规则解析结果"
    numbered_lines = [{"line_no": idx, "raw": line} for idx, line in enumerate(raw_lines, start=1)]
    system_prompt = (
        "你是电子元件入库清洗助手。"
        "必须只输出 JSON，不要 Markdown，不要解释文字。"
        "请输出对象: {\"rows\":[{\"line_no\":number,\"part_no\":string,\"name\":string,\"quantity\":number,\"specification\":string,\"note\":string,\"warnings\":string[]}]}。"
        "不要新增或删除行号；每个 line_no 仅返回一条。"
        "quantity 必须是 >=0 的整数；无法确定时返回 0 并在 warnings 中说明。"
    )
    user_prompt = (
        f"导入模式: {mode}\n"
        "原始行(JSON):\n"
        + json.dumps(numbered_lines, ensure_ascii=False)
        + "\n规则解析参考(JSON):\n"
        + json.dumps(fallback_rows, ensure_ascii=False)
    )
    try:
        suggestion = _call_siliconflow_chat(
            system_prompt,
            user_prompt,
            api_url=api_url,
            model=model,
            api_key=api_key,
            timeout=int(settings.get("timeout", 30)),
        )
        parsed = json.loads(_extract_json_object_block(suggestion))
        ai_rows = parsed.get("rows", []) if isinstance(parsed, dict) else []
        return _normalize_ai_inbound_rows(ai_rows, fallback_rows), ""
    except Exception:
        return fallback_rows, "AI 解析失败，已自动回退到规则解析结果"
 def log_inventory_event(
    *,
    event_type: str,
@@ -2468,6 +2663,44 @@ def search_page():
    )
@app.route("/ai/inbound-parse", methods=["POST"])
 def ai_inbound_parse():
    """AI 入库预处理接口。
    中文说明：
    1. 输入原始多行文本；
    2. 返回结构化行数据 + 脏数据识别；
    3. 仅提供“预处理与预览”，最终入库仍由用户手动确认提交。
    """
    raw_text = request.form.get("lines", "")
    mode = (request.form.get("mode", "box") or "box").strip().lower()
    if mode not in {"box", "bag"}:
        mode = "box"
    lines = [line.strip() for line in (raw_text or "").splitlines() if line.strip()]
    if not lines:
        return {"ok": False, "message": "请至少输入一行待处理文本"}, 400
    fallback_rows = _parse_inbound_preview_rows(lines)
    settings = _get_ai_settings()
    rows, parse_notice = _ai_enhance_inbound_preview(lines, mode, fallback_rows, settings)
    valid_rows = [row for row in rows if row.get("is_valid")]
    invalid_rows = [row for row in rows if not row.get("is_valid")]
    normalized_lines = "\n".join([row.get("normalized_line", "") for row in valid_rows if row.get("normalized_line")])
    return {
        "ok": True,
        "mode": mode,
        "total_lines": len(rows),
        "valid_count": len(valid_rows),
        "invalid_count": len(invalid_rows),
        "parse_notice": parse_notice,
        "rows": rows,
        "normalized_lines": normalized_lines,
    }
@app.route("/ai/restock-plan", methods=["POST"])
 def ai_restock_plan():
    """生成 AI 补货建议。
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -575,6 +575,8 @@ body {
 .muted {
    color: var(--muted);
    font-size: 14px;
    overflow-wrap: anywhere;
    word-break: break-word;
 }
 .new-box-form {
@@ -856,6 +858,7 @@ body.modal-open {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: var(--space-2);
    min-width: 0;
 }
 .form-grid label {
@@ -863,6 +866,13 @@ body.modal-open {
    flex-direction: column;
    gap: 6px;
    font-size: 14px;
    min-width: 0;
    overflow-wrap: anywhere;
    word-break: break-word;
 }
 .form-grid > * {
    min-width: 0;
 }
 .form-grid .full {
@@ -880,6 +890,8 @@ input[type="search"] {
    padding: 8px 2px;
    font: inherit;
    transition: border-color 140ms ease;
    width: 100%;
    min-width: 0;
 }
 input[type="text"]:focus,
@@ -898,6 +910,8 @@ textarea {
    background: transparent;
    color: var(--text);
    font: inherit;
    width: 100%;
    min-width: 0;
 }
 input[type="checkbox"] {
@@ -968,6 +982,22 @@ th {
    margin-bottom: var(--space-1);
 }
 .ai-preview {
    margin-top: var(--space-1);
    border: 1px solid var(--line);
    border-radius: var(--radius);
    background: color-mix(in srgb, var(--card) 90%, var(--card-alt));
    padding: 8px;
 }
 .ai-preview .row-invalid {
    background: color-mix(in srgb, var(--danger) 12%, var(--card));
 }
 #ai-inbound-status.ok {
    color: color-mix(in srgb, var(--accent-press) 75%, var(--text));
 }
 .entry-shell {
    display: grid;
    grid-template-columns: minmax(0, 1fr) 320px;
--- a/templates/box.html
+++ b/templates/box.html
@@ -76,12 +76,34 @@
 					<h2 id="quick-inbound-title">快速入库</h2>
 					<button class="btn btn-light" type="button" id="close-quick-inbound">关闭</button>
 				</div>
-				<p class="hint">每行一条: 料号, 名称, 数量, 规格, 备注。支持英文逗号或Tab分隔；检测到同料号或同参数时不会自动合并，需要人工确认。</p>
+				<p class="hint">每行一条: 料号, 名称, 数量, 规格, 备注。支持英文逗号或Tab分隔；先点“AI预处理”查看结构化结果，再确认导入。</p>
-				<form method="post" action="{{ url_for('quick_inbound', box_id=box.id) }}">
+				<form method="post" id="quick-inbound-form" action="{% if box.box_type == 'bag' %}{{ url_for('add_bag_items_batch', box_id=box.id) }}{% else %}{{ url_for('quick_inbound', box_id=box.id) }}{% endif %}">
-					<textarea class="batch-input" name="lines" rows="8" placeholder="10K-0603, 电阻10K 0603, 500, 1%, 常用\n100nF-0603, 电容100nF 0603, 300, 50V X7R, 去耦"></textarea>
+					<input type="hidden" id="ai-inbound-mode" value="{% if box.box_type == 'bag' %}bag{% else %}box{% endif %}">
 					<textarea class="batch-input" id="quick-inbound-lines" name="lines" rows="8" placeholder="10K-0603, 电阻10K 0603, 500, 1%, 常用\n100nF-0603, 电容100nF 0603, 300, 50V X7R, 去耦"></textarea>
 					<p class="hint">建议: part_no 用厂家型号，name 用品类+型号，specification 只写关键参数。</p>
 					<p class="hint" id="ai-inbound-status" aria-live="polite"></p>
 					<section class="ai-preview" id="ai-inbound-preview" hidden>
 						<div class="table-wrap">
 							<table>
 								<thead>
 									<tr>
 										<th>行</th>
 										<th>料号</th>
 										<th>名称</th>
 										<th>数量</th>
 										<th>规格</th>
 										<th>备注</th>
 										<th>识别提示</th>
 									</tr>
 								</thead>
 								<tbody id="ai-inbound-preview-body"></tbody>
 							</table>
 						</div>
 					</section>
 					<div class="actions">
-						<button class="btn" type="submit">批量快速入库</button>
+						<button class="btn btn-light" type="button" id="ai-inbound-parse-btn">AI预处理并预览</button>
 						<button class="btn btn-light" type="button" id="ai-inbound-export-invalid-btn" disabled>导出异常行</button>
 						<button class="btn" type="submit">确认导入</button>
 					</div>
 				</form>
 			</div>
@@ -217,6 +239,184 @@
 				});
 			});
 		})();
 		(function () {
 			var parseBtn = document.getElementById('ai-inbound-parse-btn');
 			var exportInvalidBtn = document.getElementById('ai-inbound-export-invalid-btn');
 			var textarea = document.getElementById('quick-inbound-lines');
 			var modeInput = document.getElementById('ai-inbound-mode');
 			var status = document.getElementById('ai-inbound-status');
 			var preview = document.getElementById('ai-inbound-preview');
 			var previewBody = document.getElementById('ai-inbound-preview-body');
 			var latestRows = [];
 			if (!parseBtn || !textarea || !status || !preview || !previewBody || !exportInvalidBtn) {
 				return;
 			}
 			function escapeHtml(text) {
 				return String(text || '')
 					.replace(/&/g, '&amp;')
 					.replace(/</g, '&lt;')
 					.replace(/>/g, '&gt;')
 					.replace(/"/g, '&quot;')
 					.replace(/'/g, '&#39;');
 			}
 			function renderRows(rows) {
 				latestRows = Array.isArray(rows) ? rows : [];
 				var invalidCount = latestRows.filter(function (row) {
 					return !row.is_valid;
 				}).length;
 				exportInvalidBtn.disabled = invalidCount <= 0;
 				if (!Array.isArray(rows) || !rows.length) {
 					preview.hidden = true;
 					previewBody.innerHTML = '';
 					return;
 				}
 				var html = rows.map(function (row) {
 					var messages = [];
 					(row.errors || []).forEach(function (msg) {
 						messages.push('错误: ' + msg);
 					});
 					(row.warnings || []).forEach(function (msg) {
 						messages.push('提示: ' + msg);
 					});
 					var tips = messages.length ? messages.join('<br>') : '正常';
 					var rowClass = row.is_valid ? '' : ' class="row-invalid"';
 					return '<tr' + rowClass + '>'
 						+ '<td>' + escapeHtml(row.line_no) + '</td>'
 						+ '<td>' + escapeHtml(row.part_no) + '</td>'
 						+ '<td>' + escapeHtml(row.name) + '</td>'
 						+ '<td>' + escapeHtml(row.quantity) + '</td>'
 						+ '<td>' + escapeHtml(row.specification) + '</td>'
 						+ '<td>' + escapeHtml(row.note) + '</td>'
 						+ '<td>' + tips + '</td>'
 						+ '</tr>';
 				}).join('');
 				previewBody.innerHTML = html;
 				preview.hidden = false;
 			}
 			parseBtn.addEventListener('click', function () {
 				var lines = (textarea.value || '').trim();
 				if (!lines) {
 					status.textContent = '请先粘贴至少一行内容';
 					status.classList.remove('ok');
 					return;
 				}
 				parseBtn.disabled = true;
 				exportInvalidBtn.disabled = true;
 				status.textContent = '正在进行 AI 预处理...';
 				status.classList.remove('ok');
 				var payload = new URLSearchParams();
 				payload.set('lines', lines);
 				payload.set('mode', modeInput ? modeInput.value : 'box');
 				fetch('{{ url_for('ai_inbound_parse') }}', {
 					method: 'POST',
 					headers: {
 						'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
 					},
 					body: payload.toString()
 				}).then(function (resp) {
 					return resp.json().then(function (data) {
 						if (!resp.ok || !data.ok) {
 							throw new Error(data.message || '预处理失败');
 						}
 						return data;
 					});
 				}).then(function (data) {
 					renderRows(data.rows || []);
 					if (data.normalized_lines) {
 						textarea.value = data.normalized_lines;
 					}
 					var baseText = '预处理完成: 有效 ' + (data.valid_count || 0) + ' 行';
 					if (data.invalid_count) {
 						baseText += '，异常 ' + data.invalid_count + ' 行';
 					}
 					if (data.parse_notice) {
 						baseText += '。' + data.parse_notice;
 					}
 					status.textContent = baseText;
 					status.classList.add('ok');
 				}).catch(function (error) {
 					status.textContent = '预处理失败: ' + error.message;
 					status.classList.remove('ok');
 				}).finally(function () {
 					parseBtn.disabled = false;
 				});
 			});
 			exportInvalidBtn.addEventListener('click', function () {
 				var invalidRows = latestRows.filter(function (row) {
 					return !row.is_valid;
 				});
 				if (!invalidRows.length) {
 					status.textContent = '当前没有异常行可导出';
 					status.classList.remove('ok');
 					return;
 				}
 				var headers = ['line_no', 'raw', 'part_no', 'name', 'quantity', 'specification', 'note', 'errors', 'warnings'];
 				var rows = [headers];
 				invalidRows.forEach(function (row) {
 					rows.push([
 						row.line_no || '',
 						row.raw || '',
 						row.part_no || '',
 						row.name || '',
 						row.quantity || 0,
 						row.specification || '',
 						row.note || '',
 						(row.errors || []).join(' | '),
 						(row.warnings || []).join(' | ')
 					]);
 				});
 				function csvCell(value) {
 					var text = String(value == null ? '' : value);
 					if (/[",\n]/.test(text)) {
 						return '"' + text.replace(/"/g, '""') + '"';
 					}
 					return text;
 				}
 				var csv = rows.map(function (cols) {
 					return cols.map(csvCell).join(',');
 				}).join('\n');
 				var blob = new Blob(['\ufeff' + csv], { type: 'text/csv;charset=utf-8;' });
 				var url = URL.createObjectURL(blob);
 				var now = new Date();
 				var stamp = now.getFullYear()
 					+ String(now.getMonth() + 1).padStart(2, '0')
 					+ String(now.getDate()).padStart(2, '0')
 					+ '_'
 					+ String(now.getHours()).padStart(2, '0')
 					+ String(now.getMinutes()).padStart(2, '0')
 					+ String(now.getSeconds()).padStart(2, '0');
 				var link = document.createElement('a');
 				link.href = url;
 				link.download = 'inbound_invalid_rows_' + stamp + '.csv';
 				document.body.appendChild(link);
 				link.click();
 				document.body.removeChild(link);
 				URL.revokeObjectURL(url);
 				status.textContent = '已导出异常行 ' + invalidRows.length + ' 条';
 				status.classList.add('ok');
 			});
 		})();
 	</script>
 </body>
 </html>