feat: 添加 AI 入库预处理功能,支持数据结构化和异常行导出

This commit is contained in:
2026-03-12 14:59:02 +08:00
parent d2dc25eb09
commit ef0af75193
4 changed files with 472 additions and 9 deletions

View File

@@ -351,11 +351,11 @@ cp /www/wwwroot/inventory/data/inventory.db /www/backup/inventory_$(date +%F).db
### 第一阶段:先做能直接省时间的功能 ### 第一阶段:先做能直接省时间的功能
- [ ] AI 入库预处理 - [x] AI 入库预处理
- [ ] 支持粘贴采购清单、聊天记录、Excel 文本后,由 AI 自动拆分为 `料号 / 名称 / 数量 / 规格 / 备注` - [x] 支持粘贴采购清单、聊天记录、Excel 文本后,由 AI 自动拆分为 `料号 / 名称 / 数量 / 规格 / 备注`
- [ ] 自动识别脏数据,例如缺字段、数量格式异常、字段顺序混乱 - [x] 自动识别脏数据,例如缺字段、数量格式异常、字段顺序混乱
- [ ] 输出结构化预览结果,用户确认后再正式写入库存 - [x] 输出结构化预览结果,用户确认后再正式写入库存
- [ ] 对接现有快速入库 / 袋装批量新增流程,不直接绕开人工确认 - [x] 对接现有快速入库 / 袋装批量新增流程,不直接绕开人工确认
### 第二阶段:提升库存数据质量 ### 第二阶段:提升库存数据质量

233
app.py
View File

@@ -989,6 +989,201 @@ def _parse_bulk_line(line: str):
} }
def _split_inbound_line_fields(line: str) -> tuple[list[str], list[str]]:
"""把原始入库行尽量拆分成 5 列。
中文说明:用户粘贴的数据分隔符不一定统一,这里优先识别逗号/Tab
其次尝试竖线或连续空格,尽量把一行拆成系统可识别的字段。
"""
warnings = []
raw = (line or "").strip()
if not raw:
return ["", "", "", "", ""], warnings
normalized = raw.replace("", ",")
if "," in normalized or "\t" in normalized:
parts = [p.strip() for p in re.split(r"[,\t]", normalized)]
elif "|" in normalized:
parts = [p.strip() for p in normalized.split("|")]
warnings.append("检测到竖线分隔,已自动转换为标准字段")
else:
parts = [p.strip() for p in re.split(r"\s{2,}", normalized) if p.strip()]
warnings.append("未检测到逗号或Tab已按连续空格尝试拆分")
if len(parts) > 5:
parts = parts[:4] + [" ".join(parts[4:])]
warnings.append("字段超过 5 列,已将多余内容合并到备注")
while len(parts) < 5:
parts.append("")
return parts, warnings
def _format_inbound_line(part_no: str, name: str, quantity: int, specification: str, note: str) -> str:
safe_quantity = int(quantity or 0)
return f"{part_no}, {name}, {safe_quantity}, {specification}, {note}".strip()
def _parse_inbound_preview_rows(raw_lines: list[str]) -> list[dict]:
rows = []
for line_no, line in enumerate(raw_lines, start=1):
parts, warnings = _split_inbound_line_fields(line)
part_no = (parts[0] or "").strip()
name = (parts[1] or "").strip()
quantity_raw = (parts[2] or "").strip()
specification = (parts[3] or "").strip()
note = (parts[4] or "").strip()
errors = []
if not part_no:
errors.append("缺少料号")
if not name:
errors.append("缺少名称")
quantity = 0
try:
quantity = _parse_non_negative_int(quantity_raw, 0)
except ValueError:
errors.append("数量格式错误,必须是大于等于 0 的整数")
rows.append(
{
"line_no": line_no,
"raw": line,
"part_no": part_no,
"name": name,
"quantity": int(quantity),
"quantity_raw": quantity_raw,
"specification": specification,
"note": note,
"errors": errors,
"warnings": warnings,
"is_valid": len(errors) == 0,
"normalized_line": _format_inbound_line(part_no, name, quantity, specification, note),
}
)
return rows
def _extract_json_object_block(raw_text: str) -> str:
text = (raw_text or "").strip()
if not text:
return ""
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\\s*", "", text)
text = re.sub(r"\\s*```$", "", text)
first = text.find("{")
last = text.rfind("}")
if first >= 0 and last > first:
return text[first : last + 1]
return text
def _normalize_ai_inbound_rows(ai_rows: list, fallback_rows: list[dict]) -> list[dict]:
by_line = {row["line_no"]: dict(row) for row in fallback_rows}
for raw_row in ai_rows or []:
if not isinstance(raw_row, dict):
continue
try:
line_no = int(raw_row.get("line_no"))
except (TypeError, ValueError):
continue
if line_no not in by_line:
continue
current = by_line[line_no]
part_no = str(raw_row.get("part_no", current["part_no"]) or "").strip()
name = str(raw_row.get("name", current["name"]) or "").strip()
specification = str(raw_row.get("specification", current["specification"]) or "").strip()
note = str(raw_row.get("note", current["note"]) or "").strip()
quantity = current["quantity"]
quantity_candidate = raw_row.get("quantity", current["quantity"])
try:
quantity = _parse_non_negative_int(str(quantity_candidate), 0)
except (TypeError, ValueError):
# AI 数量不可信时保留规则解析值,不覆盖。
pass
errors = []
if not part_no:
errors.append("缺少料号")
if not name:
errors.append("缺少名称")
warnings = list(current.get("warnings", []))
for w in raw_row.get("warnings", []) if isinstance(raw_row.get("warnings", []), list) else []:
text = str(w or "").strip()
if text:
warnings.append(text)
current.update(
{
"part_no": part_no,
"name": name,
"quantity": int(quantity),
"specification": specification,
"note": note,
"errors": errors,
"warnings": warnings,
"is_valid": len(errors) == 0,
"normalized_line": _format_inbound_line(part_no, name, quantity, specification, note),
}
)
return [by_line[idx] for idx in sorted(by_line.keys())]
def _ai_enhance_inbound_preview(raw_lines: list[str], mode: str, fallback_rows: list[dict], settings: dict) -> tuple[list[dict], str]:
"""使用 AI 对规则解析结果做二次修正。
中文说明AI 负责“更聪明地拆分和纠错”,但最终仍会做字段约束;
如果 AI 不可用或返回异常,自动退回规则解析,不影响使用。
"""
api_key = (settings.get("api_key") or "").strip()
api_url = (settings.get("api_url") or "").strip()
model = (settings.get("model") or "").strip()
if not api_key or not api_url or not model:
return fallback_rows, "AI 参数未完整配置,已使用规则解析结果"
numbered_lines = [{"line_no": idx, "raw": line} for idx, line in enumerate(raw_lines, start=1)]
system_prompt = (
"你是电子元件入库清洗助手。"
"必须只输出 JSON不要 Markdown不要解释文字。"
"请输出对象: {\"rows\":[{\"line_no\":number,\"part_no\":string,\"name\":string,\"quantity\":number,\"specification\":string,\"note\":string,\"warnings\":string[]}]}。"
"不要新增或删除行号;每个 line_no 仅返回一条。"
"quantity 必须是 >=0 的整数;无法确定时返回 0 并在 warnings 中说明。"
)
user_prompt = (
f"导入模式: {mode}\n"
"原始行(JSON):\n"
+ json.dumps(numbered_lines, ensure_ascii=False)
+ "\n规则解析参考(JSON):\n"
+ json.dumps(fallback_rows, ensure_ascii=False)
)
try:
suggestion = _call_siliconflow_chat(
system_prompt,
user_prompt,
api_url=api_url,
model=model,
api_key=api_key,
timeout=int(settings.get("timeout", 30)),
)
parsed = json.loads(_extract_json_object_block(suggestion))
ai_rows = parsed.get("rows", []) if isinstance(parsed, dict) else []
return _normalize_ai_inbound_rows(ai_rows, fallback_rows), ""
except Exception:
return fallback_rows, "AI 解析失败,已自动回退到规则解析结果"
def log_inventory_event( def log_inventory_event(
*, *,
event_type: str, event_type: str,
@@ -2468,6 +2663,44 @@ def search_page():
) )
@app.route("/ai/inbound-parse", methods=["POST"])
def ai_inbound_parse():
"""AI 入库预处理接口。
中文说明:
1. 输入原始多行文本;
2. 返回结构化行数据 + 脏数据识别;
3. 仅提供“预处理与预览”,最终入库仍由用户手动确认提交。
"""
raw_text = request.form.get("lines", "")
mode = (request.form.get("mode", "box") or "box").strip().lower()
if mode not in {"box", "bag"}:
mode = "box"
lines = [line.strip() for line in (raw_text or "").splitlines() if line.strip()]
if not lines:
return {"ok": False, "message": "请至少输入一行待处理文本"}, 400
fallback_rows = _parse_inbound_preview_rows(lines)
settings = _get_ai_settings()
rows, parse_notice = _ai_enhance_inbound_preview(lines, mode, fallback_rows, settings)
valid_rows = [row for row in rows if row.get("is_valid")]
invalid_rows = [row for row in rows if not row.get("is_valid")]
normalized_lines = "\n".join([row.get("normalized_line", "") for row in valid_rows if row.get("normalized_line")])
return {
"ok": True,
"mode": mode,
"total_lines": len(rows),
"valid_count": len(valid_rows),
"invalid_count": len(invalid_rows),
"parse_notice": parse_notice,
"rows": rows,
"normalized_lines": normalized_lines,
}
@app.route("/ai/restock-plan", methods=["POST"]) @app.route("/ai/restock-plan", methods=["POST"])
def ai_restock_plan(): def ai_restock_plan():
"""生成 AI 补货建议。 """生成 AI 补货建议。

View File

@@ -575,6 +575,8 @@ body {
.muted { .muted {
color: var(--muted); color: var(--muted);
font-size: 14px; font-size: 14px;
overflow-wrap: anywhere;
word-break: break-word;
} }
.new-box-form { .new-box-form {
@@ -856,6 +858,7 @@ body.modal-open {
display: grid; display: grid;
grid-template-columns: 1fr 1fr; grid-template-columns: 1fr 1fr;
gap: var(--space-2); gap: var(--space-2);
min-width: 0;
} }
.form-grid label { .form-grid label {
@@ -863,6 +866,13 @@ body.modal-open {
flex-direction: column; flex-direction: column;
gap: 6px; gap: 6px;
font-size: 14px; font-size: 14px;
min-width: 0;
overflow-wrap: anywhere;
word-break: break-word;
}
.form-grid > * {
min-width: 0;
} }
.form-grid .full { .form-grid .full {
@@ -880,6 +890,8 @@ input[type="search"] {
padding: 8px 2px; padding: 8px 2px;
font: inherit; font: inherit;
transition: border-color 140ms ease; transition: border-color 140ms ease;
width: 100%;
min-width: 0;
} }
input[type="text"]:focus, input[type="text"]:focus,
@@ -898,6 +910,8 @@ textarea {
background: transparent; background: transparent;
color: var(--text); color: var(--text);
font: inherit; font: inherit;
width: 100%;
min-width: 0;
} }
input[type="checkbox"] { input[type="checkbox"] {
@@ -968,6 +982,22 @@ th {
margin-bottom: var(--space-1); margin-bottom: var(--space-1);
} }
.ai-preview {
margin-top: var(--space-1);
border: 1px solid var(--line);
border-radius: var(--radius);
background: color-mix(in srgb, var(--card) 90%, var(--card-alt));
padding: 8px;
}
.ai-preview .row-invalid {
background: color-mix(in srgb, var(--danger) 12%, var(--card));
}
#ai-inbound-status.ok {
color: color-mix(in srgb, var(--accent-press) 75%, var(--text));
}
.entry-shell { .entry-shell {
display: grid; display: grid;
grid-template-columns: minmax(0, 1fr) 320px; grid-template-columns: minmax(0, 1fr) 320px;

View File

@@ -76,12 +76,34 @@
<h2 id="quick-inbound-title">快速入库</h2> <h2 id="quick-inbound-title">快速入库</h2>
<button class="btn btn-light" type="button" id="close-quick-inbound">关闭</button> <button class="btn btn-light" type="button" id="close-quick-inbound">关闭</button>
</div> </div>
<p class="hint">每行一条: 料号, 名称, 数量, 规格, 备注。支持英文逗号或Tab分隔检测到同料号或同参数时不会自动合并,需要人工确认</p> <p class="hint">每行一条: 料号, 名称, 数量, 规格, 备注。支持英文逗号或Tab分隔先点“AI预处理”查看结构化结果再确认导入</p>
<form method="post" action="{{ url_for('quick_inbound', box_id=box.id) }}"> <form method="post" id="quick-inbound-form" action="{% if box.box_type == 'bag' %}{{ url_for('add_bag_items_batch', box_id=box.id) }}{% else %}{{ url_for('quick_inbound', box_id=box.id) }}{% endif %}">
<textarea class="batch-input" name="lines" rows="8" placeholder="10K-0603, 电阻10K 0603, 500, 1%, 常用\n100nF-0603, 电容100nF 0603, 300, 50V X7R, 去耦"></textarea> <input type="hidden" id="ai-inbound-mode" value="{% if box.box_type == 'bag' %}bag{% else %}box{% endif %}">
<textarea class="batch-input" id="quick-inbound-lines" name="lines" rows="8" placeholder="10K-0603, 电阻10K 0603, 500, 1%, 常用\n100nF-0603, 电容100nF 0603, 300, 50V X7R, 去耦"></textarea>
<p class="hint">建议: part_no 用厂家型号name 用品类+型号specification 只写关键参数。</p> <p class="hint">建议: part_no 用厂家型号name 用品类+型号specification 只写关键参数。</p>
<p class="hint" id="ai-inbound-status" aria-live="polite"></p>
<section class="ai-preview" id="ai-inbound-preview" hidden>
<div class="table-wrap">
<table>
<thead>
<tr>
<th></th>
<th>料号</th>
<th>名称</th>
<th>数量</th>
<th>规格</th>
<th>备注</th>
<th>识别提示</th>
</tr>
</thead>
<tbody id="ai-inbound-preview-body"></tbody>
</table>
</div>
</section>
<div class="actions"> <div class="actions">
<button class="btn" type="submit">批量快速入库</button> <button class="btn btn-light" type="button" id="ai-inbound-parse-btn">AI预处理并预览</button>
<button class="btn btn-light" type="button" id="ai-inbound-export-invalid-btn" disabled>导出异常行</button>
<button class="btn" type="submit">确认导入</button>
</div> </div>
</form> </form>
</div> </div>
@@ -217,6 +239,184 @@
}); });
}); });
})(); })();
(function () {
var parseBtn = document.getElementById('ai-inbound-parse-btn');
var exportInvalidBtn = document.getElementById('ai-inbound-export-invalid-btn');
var textarea = document.getElementById('quick-inbound-lines');
var modeInput = document.getElementById('ai-inbound-mode');
var status = document.getElementById('ai-inbound-status');
var preview = document.getElementById('ai-inbound-preview');
var previewBody = document.getElementById('ai-inbound-preview-body');
var latestRows = [];
if (!parseBtn || !textarea || !status || !preview || !previewBody || !exportInvalidBtn) {
return;
}
function escapeHtml(text) {
return String(text || '')
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
function renderRows(rows) {
latestRows = Array.isArray(rows) ? rows : [];
var invalidCount = latestRows.filter(function (row) {
return !row.is_valid;
}).length;
exportInvalidBtn.disabled = invalidCount <= 0;
if (!Array.isArray(rows) || !rows.length) {
preview.hidden = true;
previewBody.innerHTML = '';
return;
}
var html = rows.map(function (row) {
var messages = [];
(row.errors || []).forEach(function (msg) {
messages.push('错误: ' + msg);
});
(row.warnings || []).forEach(function (msg) {
messages.push('提示: ' + msg);
});
var tips = messages.length ? messages.join('<br>') : '正常';
var rowClass = row.is_valid ? '' : ' class="row-invalid"';
return '<tr' + rowClass + '>'
+ '<td>' + escapeHtml(row.line_no) + '</td>'
+ '<td>' + escapeHtml(row.part_no) + '</td>'
+ '<td>' + escapeHtml(row.name) + '</td>'
+ '<td>' + escapeHtml(row.quantity) + '</td>'
+ '<td>' + escapeHtml(row.specification) + '</td>'
+ '<td>' + escapeHtml(row.note) + '</td>'
+ '<td>' + tips + '</td>'
+ '</tr>';
}).join('');
previewBody.innerHTML = html;
preview.hidden = false;
}
parseBtn.addEventListener('click', function () {
var lines = (textarea.value || '').trim();
if (!lines) {
status.textContent = '请先粘贴至少一行内容';
status.classList.remove('ok');
return;
}
parseBtn.disabled = true;
exportInvalidBtn.disabled = true;
status.textContent = '正在进行 AI 预处理...';
status.classList.remove('ok');
var payload = new URLSearchParams();
payload.set('lines', lines);
payload.set('mode', modeInput ? modeInput.value : 'box');
fetch('{{ url_for('ai_inbound_parse') }}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
},
body: payload.toString()
}).then(function (resp) {
return resp.json().then(function (data) {
if (!resp.ok || !data.ok) {
throw new Error(data.message || '预处理失败');
}
return data;
});
}).then(function (data) {
renderRows(data.rows || []);
if (data.normalized_lines) {
textarea.value = data.normalized_lines;
}
var baseText = '预处理完成: 有效 ' + (data.valid_count || 0) + ' 行';
if (data.invalid_count) {
baseText += ',异常 ' + data.invalid_count + ' 行';
}
if (data.parse_notice) {
baseText += '。' + data.parse_notice;
}
status.textContent = baseText;
status.classList.add('ok');
}).catch(function (error) {
status.textContent = '预处理失败: ' + error.message;
status.classList.remove('ok');
}).finally(function () {
parseBtn.disabled = false;
});
});
exportInvalidBtn.addEventListener('click', function () {
var invalidRows = latestRows.filter(function (row) {
return !row.is_valid;
});
if (!invalidRows.length) {
status.textContent = '当前没有异常行可导出';
status.classList.remove('ok');
return;
}
var headers = ['line_no', 'raw', 'part_no', 'name', 'quantity', 'specification', 'note', 'errors', 'warnings'];
var rows = [headers];
invalidRows.forEach(function (row) {
rows.push([
row.line_no || '',
row.raw || '',
row.part_no || '',
row.name || '',
row.quantity || 0,
row.specification || '',
row.note || '',
(row.errors || []).join(' | '),
(row.warnings || []).join(' | ')
]);
});
function csvCell(value) {
var text = String(value == null ? '' : value);
if (/[",\n]/.test(text)) {
return '"' + text.replace(/"/g, '""') + '"';
}
return text;
}
var csv = rows.map(function (cols) {
return cols.map(csvCell).join(',');
}).join('\n');
var blob = new Blob(['\ufeff' + csv], { type: 'text/csv;charset=utf-8;' });
var url = URL.createObjectURL(blob);
var now = new Date();
var stamp = now.getFullYear()
+ String(now.getMonth() + 1).padStart(2, '0')
+ String(now.getDate()).padStart(2, '0')
+ '_'
+ String(now.getHours()).padStart(2, '0')
+ String(now.getMinutes()).padStart(2, '0')
+ String(now.getSeconds()).padStart(2, '0');
var link = document.createElement('a');
link.href = url;
link.download = 'inbound_invalid_rows_' + stamp + '.csv';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
URL.revokeObjectURL(url);
status.textContent = '已导出异常行 ' + invalidRows.length + ' 条';
status.classList.add('ok');
});
})();
</script> </script>
</body> </body>
</html> </html>