From 6c811a77db238c2ae0cbbfa3204277ad8c50cccc Mon Sep 17 00:00:00 2001
From: goddonebianu <goddonebianu@outlook.com>
Date: Mon, 9 Mar 2026 18:04:12 +0800
Subject: [PATCH] =?UTF-8?q?perf(dictionary):=20=E4=BC=98=E5=8C=96=20AI=20?=
 =?UTF-8?q?=E7=BC=96=E6=8E=92=E6=80=A7=E8=83=BD=EF=BC=8C4=20=E6=AC=A1=20LL?=
 =?UTF-8?q?M=20=E8=B0=83=E7=94=A8=E5=87=8F=E5=B0=91=E5=88=B0=202=20?=
 =?UTF-8?q?=E6=AC=A1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 合并 Stage 1+2+3 为单次 preprocessInput 调用
- 精简 Stage 4 词条生成 prompt
- 删除旧的 stage 文件
- 预期性能提升 60%+ (33s → ~8-13s)
---
 src/lib/bigmodel/dictionary/README.md         | 196 ++----------------
 src/lib/bigmodel/dictionary/orchestrator.ts   |  63 ++----
 .../dictionary/stage1-inputAnalysis.ts        |  77 -------
 .../bigmodel/dictionary/stage1-preprocess.ts  |  87 ++++++++
 .../dictionary/stage2-semanticMapping.ts      | 118 -----------
 .../dictionary/stage3-standardForm.ts         | 100 ---------
 .../dictionary/stage4-entriesGeneration.ts    |  87 ++------
 src/lib/bigmodel/dictionary/types.ts          |  29 +--
 8 files changed, 133 insertions(+), 624 deletions(-)
 delete mode 100644 src/lib/bigmodel/dictionary/stage1-inputAnalysis.ts
 create mode 100644 src/lib/bigmodel/dictionary/stage1-preprocess.ts
 delete mode 100644 src/lib/bigmodel/dictionary/stage2-semanticMapping.ts
 delete mode 100644 src/lib/bigmodel/dictionary/stage3-standardForm.ts

diff --git a/src/lib/bigmodel/dictionary/README.md b/src/lib/bigmodel/dictionary/README.md
index 506ce17..65dda4a 100644
--- a/src/lib/bigmodel/dictionary/README.md
+++ b/src/lib/bigmodel/dictionary/README.md
@@ -1,18 +1,15 @@
-# 词典查询模块化架构
+# 词典查询架构
 
-本目录包含词典查询系统的**多阶段 LLM 调用**实现，将查询过程拆分为 4 个独立的 LLM 调用，每个阶段之间有代码层面的数据验证，只要有一环失败，直接返回错误。
+2 次 LLM 调用的词典查询系统。
 
 ## 目录结构
 
 ```
 dictionary/
-├── index.ts                    # 主导出文件
-├── orchestrator.ts             # 主编排器，串联所有阶段
-├── types.ts                    # 类型定义
-├── stage1-inputAnalysis.ts     # 阶段1：输入解析与语言识别
-├── stage2-semanticMapping.ts   # 阶段2：跨语言语义映射决策
-├── stage3-standardForm.ts      # 阶段3：standardForm 生成与规范化
-└── stage4-entriesGeneration.ts # 阶段4：释义与词条生成
+├── orchestrator.ts             # 编排器
+├── stage1-preprocess.ts        # 阶段1：预处理（输入分析+语义映射+标准形式）
+├── stage4-entriesGeneration.ts # 阶段2：词条生成
+└── types.ts                    # 类型定义
 ```
 
 ## 工作流程
@@ -20,187 +17,22 @@ dictionary/
 ```
 用户输入
     ↓
-[阶段1] 输入分析 → 代码验证 → 失败则返回错误
+[阶段1] 预处理（1次LLM）→ isValid, standardForm, inputType
     ↓
-[阶段2] 语义映射 → 代码验证 → 失败则保守处理（不映射）
-    ↓
-[阶段3] 标准形式 → 代码验证 → 失败则返回错误
-    ↓
-[阶段4] 词条生成 → 代码验证 → 失败则返回错误
+[阶段2] 词条生成（1次LLM）→ entries
     ↓
 最终结果
 ```
 
-## 各阶段详细说明
+## 性能
 
-### 阶段 1：输入分析
+- 原 4 次 LLM 调用 → 现 2 次
+- 预期耗时：8-13s（原 33s）
 
-**文件**: `stage1-inputAnalysis.ts`
-
-**目的**:
-- 判断输入是否有效
-- 判断是「单词」还是「短语」
-- 识别输入语言
-
-**返回**: `InputAnalysisResult`
-
-**代码验证**:
-- `isValid` 必须是 boolean
-- 输入为空或无效时立即返回错误
-
-### 阶段 2：语义映射
-
-**文件**: `stage2-semanticMapping.ts`
-
-**目的**:
-- 决定是否启用"语义级查询"
-- **严格条件**：只有输入符合"明确、基础、可词典化的语义概念"且语言不一致时才映射
-- 不符合条件则**直接失败**（快速失败）
-
-**返回**: `SemanticMappingResult`
-
-**代码验证**:
-- `shouldMap` 必须是 boolean
-- 如果 `shouldMap=true`，必须有 `mappedQuery`
-- 如果不应该映射，**抛出异常**（不符合条件直接失败）
-- **失败则直接返回错误响应**，不继续后续阶段
-
-**映射条件**（必须同时满足）：
-a) 输入语言 ≠ 查询语言
-b) 输入是明确、基础、可词典化的语义概念（如常见动词、名词、形容词）
-
-**不符合条件的例子**：
-- 复杂句子："我喜欢吃苹果"
-- 专业术语
-- 无法确定语义的词汇
-
-### 阶段 3：标准形式生成
-
-**文件**: `stage3-standardForm.ts`
-
-**目的**:
-- 确定最终词条的"标准形"（整个系统的锚点）
-- 修正拼写错误
-- 还原为词典形式（动词原形、辞书形等）
-- **如果进行了语义映射**：基于映射结果生成标准形式，同时参考原始输入的语义上下文
-
-**参数**:
-- `inputText`: 用于生成标准形式的文本（可能是映射后的结果）
-- `queryLang`: 查询语言
-- `originalInput`: （可选）原始用户输入，用于语义参考
-
-**返回**: `StandardFormResult`
-
-**代码验证**:
-- `standardForm` 不能为空
-- `confidence` 必须是 "high" | "medium" | "low"
-- 失败时使用原输入作为标准形式
-
-**特殊逻辑**:
-- 当进行了语义映射时（即提供了 `originalInput`），阶段 3 会：
-  1. 基于 `inputText`（映射结果）生成标准形式
-  2. 参考 `originalInput` 的语义上下文，确保标准形式符合用户的真实查询意图
-  3. 例如：原始输入 "吃"（中文）→ 映射为 "to eat"（英语）→ 标准形式 "eat"
-
-### 阶段 4：词条生成
-
-**文件**: `stage4-entriesGeneration.ts`
-
-**目的**:
-- 生成真正的词典内容
-- 根据类型生成单词或短语条目
-
-**返回**: `EntriesGenerationResult`
-
-**代码验证**:
-- `entries` 必须是非空数组
-- 每个条目必须有 `definition` 和 `example`
-- 单词条目必须有 `partOfSpeech`
-- **失败则抛出异常**（核心阶段）
-
-## 使用方式
-
-### 基本使用
+## 使用
 
 ```typescript
-import { lookUp } from "@/lib/server/bigmodel/dictionaryActions";
+import { executeDictionaryLookup } from "@/lib/bigmodel/dictionary/orchestrator";
 
-const result = await lookUp({
-    text: "hello",
-    queryLang: "English",
-    definitionLang: "中文"
-});
+const result = await executeDictionaryLookup("hello", "English", "中文");
 ```
-
-### 高级使用（直接调用编排器）
-
-```typescript
-import { executeDictionaryLookup } from "@/lib/server/bigmodel/dictionary";
-
-const result = await executeDictionaryLookup(
-    "hello",
-    "English",
-    "中文"
-);
-```
-
-### 单独测试某个阶段
-
-```typescript
-import { analyzeInput } from "@/lib/server/bigmodel/dictionary";
-
-const analysis = await analyzeInput("hello");
-console.log(analysis);
-```
-
-## 设计优势
-
-### 1. 代码层面的数据验证
-每个阶段完成后都有严格的类型检查和数据验证，确保数据质量。
-
-### 2. 快速失败
-只要有一个阶段失败，立即返回错误，不浪费后续的 LLM 调用。
-
-### 3. 可观测性
-每个阶段都有 console.log 输出，方便调试和追踪问题。
-
-### 4. 模块化
-每个阶段独立文件，可以单独测试、修改或替换。
-
-### 5. 容错性
-非核心阶段（阶段2、3）失败时有降级策略，不会导致整个查询失败。
-
-## 日志示例
-
-```
-[阶段1] 开始输入分析...
-[阶段1] 输入分析完成: { isValid: true, inputType: 'word', inputLanguage: 'English' }
-[阶段2] 开始语义映射...
-[阶段2] 语义映射完成: { shouldMap: false }
-[阶段3] 开始生成标准形式...
-[阶段3] 标准形式生成完成: { standardForm: 'hello', confidence: 'high' }
-[阶段4] 开始生成词条...
-[阶段4] 词条生成完成: { entries: [...] }
-[完成] 词典查询成功
-```
-
-## 扩展建议
-
-### 添加缓存
-对阶段1、3的结果进行缓存，避免重复调用 LLM。
-
-### 添加指标
-记录每个阶段的耗时和成功率，用于性能优化。
-
-### 并行化
-某些阶段可以并行执行（如果有依赖关系允许的话）。
-
-### A/B 测试
-为某个阶段创建不同版本的实现，进行效果对比。
-
-## 注意事项
-
-- 每个阶段都是独立的 LLM 调用，会增加总耗时
-- 需要控制 token 使用量，避免成本过高
-- 错误处理要完善，避免某个阶段卡住整个流程
-- 日志记录要清晰，方便问题排查
diff --git a/src/lib/bigmodel/dictionary/orchestrator.ts b/src/lib/bigmodel/dictionary/orchestrator.ts
index fe57196..a5f9cd1 100644
--- a/src/lib/bigmodel/dictionary/orchestrator.ts
+++ b/src/lib/bigmodel/dictionary/orchestrator.ts
@@ -1,7 +1,5 @@
 import { ServiceOutputLookUp } from "@/modules/dictionary/dictionary-service-dto";
-import { analyzeInput } from "./stage1-inputAnalysis";
-import { determineSemanticMapping } from "./stage2-semanticMapping";
-import { generateStandardForm } from "./stage3-standardForm";
+import { preprocessInput } from "./stage1-preprocess";
 import { generateEntries } from "./stage4-entriesGeneration";
 import { LookUpError } from "@/lib/errors";
 import { createLogger } from "@/lib/logger";
@@ -14,64 +12,28 @@ export async function executeDictionaryLookup(
     definitionLang: string
 ): Promise<ServiceOutputLookUp> {
     try {
-        log.debug("[Stage 1] Starting input analysis");
-        const analysis = await analyzeInput(text);
+        log.debug("[Stage 1] Preprocessing input");
+        const preprocessed = await preprocessInput(text, queryLang);
 
-        if (!analysis.isValid) {
-            log.debug("[Stage 1] Invalid input", { reason: analysis.reason });
-            throw new LookUpError(analysis.reason || "无效输入");
+        if (!preprocessed.isValid) {
+            log.debug("[Stage 1] Invalid input", { reason: preprocessed.reason });
+            throw new LookUpError(preprocessed.reason || "无效输入");
         }
 
-        if (analysis.isEmpty) {
-            log.debug("[Stage 1] Empty input");
-            throw new LookUpError("输入为空");
-        }
+        log.debug("[Stage 1] Preprocess complete", { preprocessed });
 
-        log.debug("[Stage 1] Analysis complete", { analysis });
-
-        log.debug("[Stage 2] Starting semantic mapping");
-        const semanticMapping = await determineSemanticMapping(
-            text,
-            queryLang,
-            analysis.inputLanguage ?? text
-        );
-
-        log.debug("[Stage 2] Semantic mapping complete", { semanticMapping });
-
-        log.debug("[Stage 3] Generating standard form");
-
-        // 如果进行了语义映射，标准形式要基于映射后的结果
-        // 同时传递原始输入作为语义参考
-        const shouldUseMapping = semanticMapping.shouldMap && semanticMapping.mappedQuery;
-        const inputForStandardForm = shouldUseMapping ? semanticMapping.mappedQuery! : text;
-
-        const standardFormResult = await generateStandardForm(
-            inputForStandardForm,
-            queryLang,
-            shouldUseMapping ? text : undefined  // 如果进行了映射，传递原始输入作为语义参考
-        );
-
-        if (!standardFormResult.standardForm) {
-            log.error("[Stage 3] Standard form is empty");
-            throw new LookUpError("无法生成标准形式");
-        }
-
-        log.debug("[Stage 3] Standard form complete", { standardFormResult });
-
-        log.debug("[Stage 4] Generating entries");
+        log.debug("[Stage 2] Generating entries");
         const entriesResult = await generateEntries(
-            standardFormResult.standardForm,
+            preprocessed.standardForm,
             queryLang,
             definitionLang,
-            analysis.inputType === "unknown"
-                ? (standardFormResult.standardForm.includes(" ") ? "phrase" : "word")
-                : analysis.inputType
+            preprocessed.inputType
         );
 
-        log.debug("[Stage 4] Entries complete", { entriesResult });
+        log.debug("[Stage 2] Entries complete", { entriesResult });
 
         const finalResult: ServiceOutputLookUp = {
-            standardForm: standardFormResult.standardForm,
+            standardForm: preprocessed.standardForm,
             entries: entriesResult.entries,
         };
 
@@ -80,7 +42,6 @@ export async function executeDictionaryLookup(
 
     } catch (error) {
         log.error("Dictionary lookup failed", { error });
-
         const errorMessage = error instanceof Error ? error.message : "未知错误";
         throw new LookUpError(errorMessage);
     }
diff --git a/src/lib/bigmodel/dictionary/stage1-inputAnalysis.ts b/src/lib/bigmodel/dictionary/stage1-inputAnalysis.ts
deleted file mode 100644
index fe028e3..0000000
--- a/src/lib/bigmodel/dictionary/stage1-inputAnalysis.ts
+++ /dev/null
@@ -1,77 +0,0 @@
-import { getAnswer } from "../zhipu";
-import { parseAIGeneratedJSON } from "@/utils/json";
-import { InputAnalysisResult } from "./types";
-import { createLogger } from "@/lib/logger";
-
-const log = createLogger("dictionary-stage1");
-
-/**
- * 阶段 1：输入解析与语言识别
- *
- * 独立的 LLM 调用，分析输入文本
- */
-
-export async function analyzeInput(text: string): Promise<InputAnalysisResult> {
-    const prompt = `
-你是一个输入分析器。分析用户输入并返回 JSON 结果。
-
-用户输入位于 <text> 标签内：
-<text>${text}</text>
-
-你的任务是：
-1. 判断输入是否为空或明显非法
-2. 判断输入是「单词」还是「短语」
-3. 识别输入所属语言
-
-返回 JSON 格式：
-{
-  "isValid": true/false,
-  "isEmpty": true/false,
-  "isNaturalLanguage": true/false,
-  "inputLanguage": "检测到的语言名称（如 English、中文、日本語等）",
-  "inputType": "word/phrase/unknown",
-  "reason": "错误原因，成功时为空字符串\"\""
-}
-
-若输入为空、非自然语言或无法识别语言，设置 isValid 为 false，并在 reason 中说明原因。
-若输入有效，设置 isValid 为 true，reason 为空字符串 ""。
-只返回 JSON，不要任何其他文字。
-`.trim();
-
-    try {
-        const result = await getAnswer([
-            {
-                role: "system",
-                content: "你是一个输入分析器，只返回 JSON 格式的分析结果。",
-            },
-            {
-                role: "user",
-                content: prompt,
-            },
-        ]).then(parseAIGeneratedJSON<InputAnalysisResult>);
-
-        // 代码层面的数据验证
-        if (typeof result.isValid !== "boolean") {
-            throw new Error("阶段1：isValid 字段类型错误");
-        }
-
-        if (typeof result.isEmpty !== "boolean") {
-            throw new Error("阶段1：isEmpty 字段类型错误");
-        }
-
-        if (typeof result.isNaturalLanguage !== "boolean") {
-            throw new Error("阶段1：isNaturalLanguage 字段类型错误");
-        }
-
-        // 确保 reason 字段存在
-        if (typeof result.reason !== "string") {
-            result.reason = "";
-        }
-
-        return result;
-    } catch (error) {
-        log.error("Stage 1 failed", { error });
-        // 失败时抛出错误，包含 reason
-        throw new Error("输入分析失败：无法识别输入类型或语言");
-    }
-}
diff --git a/src/lib/bigmodel/dictionary/stage1-preprocess.ts b/src/lib/bigmodel/dictionary/stage1-preprocess.ts
new file mode 100644
index 0000000..b1211a0
--- /dev/null
+++ b/src/lib/bigmodel/dictionary/stage1-preprocess.ts
@@ -0,0 +1,87 @@
+import { getAnswer } from "../zhipu";
+import { parseAIGeneratedJSON } from "@/utils/json";
+import { PreprocessResult } from "./types";
+import { createLogger } from "@/lib/logger";
+
+const log = createLogger("dictionary-preprocess");
+
+export async function preprocessInput(
+    text: string,
+    queryLang: string
+): Promise<PreprocessResult> {
+    const prompt = `
+你是一个词典预处理系统。分析输入并生成标准形式。
+
+用户输入：<input>${text}</input>
+查询语言：<queryLang>${queryLang}</queryLang>
+
+任务：
+1. 判断输入是否有效（非空、是自然语言）
+2. 识别输入语言和类型（单词/短语）
+3. 如果输入语言 ≠ 查询语言，判断是否需要语义映射
+4. 生成查询语言下的标准形式
+
+语义映射规则：
+- 只有当输入是"明确、基础、可词典化的语义概念"（如常见动词、名词、形容词）时才映射
+- 复杂句子、专业术语、无法确定语义的词汇不映射，直接用原文
+
+标准形式规则：
+- 修正拼写错误
+- 还原为词典形式（英语：动词原形/名词单数；日语：辞书形；中文：标准简化字）
+
+返回 JSON：
+{
+  "isValid": boolean,
+  "inputType": "word" | "phrase",
+  "standardForm": "标准形式",
+  "confidence": "high" | "medium" | "low",
+  "reason": "错误原因，成功时为空字符串"
+}
+
+注意：
+- isValid=false 时，在 reason 中说明原因
+- 成功时 reason 为空字符串 ""
+- 只返回 JSON，不要其他文字
+`.trim();
+
+    try {
+        const result = await getAnswer([
+            {
+                role: "system",
+                content: "你是词典预处理系统，只返回 JSON。",
+            },
+            {
+                role: "user",
+                content: prompt,
+            },
+        ]).then(parseAIGeneratedJSON<PreprocessResult>);
+
+        if (typeof result.isValid !== "boolean") {
+            throw new Error("预处理：isValid 字段类型错误");
+        }
+
+        if (!result.standardForm || result.standardForm.trim().length === 0) {
+            throw new Error(result.reason || "预处理：standardForm 为空");
+        }
+
+        if (!["word", "phrase"].includes(result.inputType)) {
+            result.inputType = result.standardForm.includes(" ") ? "phrase" : "word";
+        }
+
+        let confidence: "high" | "medium" | "low" = "low";
+        const cv = result.confidence?.toLowerCase();
+        if (cv === "高" || cv === "high") confidence = "high";
+        else if (cv === "中" || cv === "medium") confidence = "medium";
+
+        return {
+            isValid: result.isValid,
+            inputType: result.inputType as "word" | "phrase",
+            standardForm: result.standardForm,
+            confidence,
+            reason: typeof result.reason === "string" ? result.reason : "",
+        };
+    } catch (error) {
+        log.error("Preprocess failed", { error });
+        throw error;
+    }
+}
diff --git a/src/lib/bigmodel/dictionary/stage2-semanticMapping.ts b/src/lib/bigmodel/dictionary/stage2-semanticMapping.ts
deleted file mode 100644
index 263e719..0000000
--- a/src/lib/bigmodel/dictionary/stage2-semanticMapping.ts
+++ /dev/null
@@ -1,118 +0,0 @@
-import { getAnswer } from "../zhipu";
-import { parseAIGeneratedJSON } from "@/utils/json";
-import { SemanticMappingResult } from "./types";
-import { createLogger } from "@/lib/logger";
-
-const log = createLogger("dictionary-stage2");
-
-/**
- * 阶段 2：跨语言语义映射决策
- *
- * 独立的 LLM 调用，决定是否需要语义映射
- * 如果输入不符合"明确、基础、可词典化的语义概念"且语言不一致，则降级使用原始输入
- */
-
-export async function determineSemanticMapping(
-    text: string,
-    queryLang: string,
-    inputLanguage: string
-): Promise<SemanticMappingResult> {
-    // 如果输入语言就是查询语言，不需要映射
-    if (inputLanguage.toLowerCase() === queryLang.toLowerCase()) {
-        return {
-            shouldMap: false,
-            reason: "输入语言与查询语言一致",
-        };
-    }
-
-    const prompt = `
-你是一个语义映射决策器。判断是否需要对输入进行跨语言语义映射。
-
-查询语言：${queryLang}
-输入语言：${inputLanguage}
-用户输入：${text}
-
-判断规则：
-1. 若输入表达一个**明确、基础、可词典化的语义概念**（如常见动词、名词、形容词），则应该映射
-2. 若输入不符合上述条件（如复杂句子、专业术语、无法确定语义的词汇），则不应该映射
-
-映射条件必须同时满足：
-a) 输入语言 ≠ 查询语言
-b) 输入是明确、基础、可词典化的语义概念
-
-例如：
-- 查询语言=English，输入="吃"（中文）→ 应该映射 → coreSemantic="to eat"
-- 查询语言=Italiano，输入="run"（English）→ 应该映射 → coreSemantic="correre"
-- 查询语言=中文，输入="hello"（English）→ 应该映射 → coreSemantic="你好"
-- 查询语言=English，输入="我喜欢吃苹果"（中文，复杂句子）→ 不应该映射 → canMap=false
-
-返回 JSON 格式：
-{
-  "shouldMap": true/false,
-  "canMap": true/false,
-  "coreSemantic": "提取的核心语义（用${queryLang}表达）",
-  "mappedQuery": "映射到${queryLang}的标准表达",
-  "reason": "错误原因，成功时为空字符串\"\""
-}
-
-- canMap=true 表示输入符合"明确、基础、可词典化的语义概念"
-- shouldMap=true 表示需要进行映射
-- 只有 canMap=true 且语言不一致时，shouldMap 才为 true
-- 如果 shouldMap=false，在 reason 中说明原因
-- 如果 shouldMap=true，reason 为空字符串 ""
-
-只返回 JSON，不要任何其他文字。
-`.trim();
-
-    try {
-        const result = await getAnswer([
-            {
-                role: "system",
-                content: `你是一个语义映射决策器，只返回 JSON 格式的结果。`,
-            },
-            {
-                role: "user",
-                content: prompt,
-            },
-        ]).then(parseAIGeneratedJSON<SemanticMappingResult>);
-
-        // 代码层面的数据验证
-        if (typeof result.shouldMap !== "boolean") {
-            throw new Error("阶段2：shouldMap 字段类型错误");
-        }
-
-        // 确保 reason 字段存在
-        if (typeof result.reason !== "string") {
-            result.reason = "";
-        }
-
-        // 如果不应该映射，返回降级结果（不抛出错误）
-        // 这样可以让后续阶段使用原始输入继续处理
-        if (!result.shouldMap) {
-            log.debug("Semantic mapping not applicable, using original input", { 
-                reason: result.reason 
-            });
-            return {
-                shouldMap: false,
-                canMap: result.canMap ?? false,
-                reason: result.reason,
-            };
-        }
-
-        if (!result.mappedQuery || result.mappedQuery.trim().length === 0) {
-            throw new Error("语义映射失败：映射结果为空");
-        }
-
-        return {
-            shouldMap: result.shouldMap,
-            canMap: result.canMap ?? true,
-            coreSemantic: result.coreSemantic,
-            mappedQuery: result.mappedQuery,
-            reason: result.reason,
-        };
-    } catch (error) {
-        log.error("Stage 2 failed", { error });
-        // 失败时直接抛出错误，让编排器返回错误响应
-        throw error;
-    }
-}
diff --git a/src/lib/bigmodel/dictionary/stage3-standardForm.ts b/src/lib/bigmodel/dictionary/stage3-standardForm.ts
deleted file mode 100644
index 5409eda..0000000
--- a/src/lib/bigmodel/dictionary/stage3-standardForm.ts
+++ /dev/null
@@ -1,100 +0,0 @@
-import { getAnswer } from "../zhipu";
-import { parseAIGeneratedJSON } from "@/utils/json";
-import { StandardFormResult } from "./types";
-import { createLogger } from "@/lib/logger";
-
-const log = createLogger("dictionary-stage3");
-
-/**
- * 阶段 3：standardForm 生成与规范化
- *
- * 独立的 LLM 调用，生成标准形式
- */
-
-export async function generateStandardForm(
-    inputText: string,
-    queryLang: string,
-    originalInput?: string
-): Promise<StandardFormResult> {
-    const prompt = `
-你是一个词典标准形式生成器。为输入生成该语言下的标准形式。
-
-查询语言：${queryLang}
-当前输入：${inputText}
-${originalInput ? `原始输入（语义参考）：${originalInput}` : ''}
-
-${originalInput ? `
-**重要说明**：
-- 当前输入是经过语义映射后的结果（从原始语言映射到查询语言）
-- 原始输入提供了语义上下文，帮助你理解用户的真实查询意图
-- 你需要基于**当前输入**生成标准形式，但要参考**原始输入的语义**以确保准确性
-
-例如：
-- 原始输入："吃"（中文），当前输入："to eat"（英语）→ 标准形式应为 "eat"
-- 原始输入："走"（中文），当前输入："to walk"（英语）→ 标准形式应为 "walk"
-` : ''}
-
-规则：
-1. 尝试修正明显拼写错误
-2. 还原为该语言中**最常见、最自然、最标准**的形式：
-   * 英语：动词原形、名词单数
-   * 日语：辞书形
-   * 意大利语：不定式或最常见规范形式
-   * 维吾尔语：标准拉丁化或阿拉伯字母形式
-   * 中文：标准简化字
-3. ${originalInput ? '参考原始输入的语义，确保标准形式符合用户的真实查询意图':'若无法确定或输入本身已规范，则保持不变'}
-
-返回 JSON 格式：
-{
-  "standardForm": "标准形式",
-  "confidence": "high/medium/low",
-  "reason": "错误原因，成功时为空字符串\"\""
-}
-
-成功生成标准形式时，reason 为空字符串 ""。
-失败时，在 reason 中说明失败原因。
-只返回 JSON，不要任何其他文字。
-`.trim();
-
-    try {
-        const result = await getAnswer([
-            {
-                role: "system",
-                content: "你是一个词典标准形式生成器，只返回 JSON 格式的结果。",
-            },
-            {
-                role: "user",
-                content: prompt,
-            },
-        ]).then(parseAIGeneratedJSON<StandardFormResult>);
-
-        // 代码层面的数据验证
-        if (!result.standardForm || result.standardForm.trim().length === 0) {
-            throw new Error(result.reason || "阶段3：standardForm 为空");
-        }
-
-        // 处理 confidence 可能是中文或英文的情况
-        let confidence: "high" | "medium" | "low" = "low";
-        const confidenceValue = result.confidence?.toLowerCase();
-        if (confidenceValue === "高" || confidenceValue === "high") {
-            confidence = "high";
-        } else if (confidenceValue === "中" || confidenceValue === "medium") {
-            confidence = "medium";
-        } else if (confidenceValue === "低" || confidenceValue === "low") {
-            confidence = "low";
-        }
-
-        // 确保 reason 字段存在
-        const reason = typeof result.reason === "string" ? result.reason : "";
-
-        return {
-            standardForm: result.standardForm,
-            confidence,
-            reason,
-        };
-    } catch (error) {
-        log.error("Stage 3 failed", { error });
-        // 失败时抛出错误
-        throw error;
-    }
-}
diff --git a/src/lib/bigmodel/dictionary/stage4-entriesGeneration.ts b/src/lib/bigmodel/dictionary/stage4-entriesGeneration.ts
index 6a745ea..805cdf0 100644
--- a/src/lib/bigmodel/dictionary/stage4-entriesGeneration.ts
+++ b/src/lib/bigmodel/dictionary/stage4-entriesGeneration.ts
@@ -3,13 +3,7 @@ import { parseAIGeneratedJSON } from "@/utils/json";
 import { EntriesGenerationResult } from "./types";
 import { createLogger } from "@/lib/logger";
 
-const log = createLogger("dictionary-stage4");
-
-/**
- * 阶段 4：释义与词条生成
- *
- * 独立的 LLM 调用，生成词典条目
- */
+const log = createLogger("dictionary-entries");
 
 export async function generateEntries(
     standardForm: string,
@@ -20,89 +14,42 @@ export async function generateEntries(
     const isWord = inputType === "word";
 
     const prompt = `
-你是一个词典条目生成器。为标准形式生成词典条目。
+生成词典条目。词语："${standardForm}"（${queryLang}）。用${definitionLang}释义。
 
-标准形式：${standardForm}
-查询语言：${queryLang}
-释义语言：${definitionLang}
-词条类型：${isWord ? "单词" : "短语"}
+返回 JSON：
+${isWord ? `{"entries":[{"ipa":"音标","partOfSpeech":"词性","definition":"释义","example":"例句"}]}` : `{"entries":[{"definition":"释义","example":"例句"}]}`}
 
-${isWord ? `
-单词条目要求：
-- ipa：音标（如适用）
-- partOfSpeech：词性
-- definition：释义（使用 ${definitionLang}）
-- example：例句（使用 ${queryLang}）
-` : `
-短语条目要求：
-- definition：短语释义（使用 ${definitionLang}）
-- example：例句（使用 ${queryLang}）
-`}
-
-生成 1-3 个条目，返回 JSON 格式：
-{
-  "entries": [
-    ${isWord ? `
-    {
-      "ipa": "音标",
-      "partOfSpeech": "词性",
-      "definition": "释义",
-      "example": "例句"
-    }` : `
-    {
-      "definition": "释义",
-      "example": "例句"
-    }`}
-  ]
-}
-
-只返回 JSON，不要任何其他文字。
+只返回 JSON。
 `.trim();
 
     try {
         const result = await getAnswer([
-            {
-                role: "system",
-                content: `你是一个词典条目生成器，只返回 JSON 格式的结果。`,
-            },
-            {
-                role: "user",
-                content: prompt,
-            },
+            { role: "system", content: "词典条目生成器，只返回 JSON。" },
+            { role: "user", content: prompt },
         ]).then(parseAIGeneratedJSON<EntriesGenerationResult>);
 
-        // 代码层面的数据验证
-        if (!result.entries || !Array.isArray(result.entries) || result.entries.length === 0) {
-            throw new Error("阶段4：entries 为空或不是数组");
+        if (!result.entries?.length) {
+            throw new Error("词条生成失败：结果为空");
         }
 
-        // 处理每个条目，清理 IPA 格式
         for (const entry of result.entries) {
-            // 清理 IPA：删除两端可能包含的方括号、斜杠等字符
             if (entry.ipa) {
-                entry.ipa = entry.ipa.trim();
-                // 删除开头的 [ / /
-                entry.ipa = entry.ipa.replace(/^[\[\/]/, '');
-                // 删除结尾的 ] / /
-                entry.ipa = entry.ipa.replace(/[\]\/]$/, '');
+                entry.ipa = entry.ipa.trim().replace(/^[\[\/]/, '').replace(/[\]\/]$/, '');
             }
-
-            if (!entry.definition || entry.definition.trim().length === 0) {
-                throw new Error("阶段4：条目缺少 definition");
+            if (!entry.definition?.trim()) {
+                throw new Error("词条缺少释义");
             }
-
-            if (!entry.example || entry.example.trim().length === 0) {
-                throw new Error("阶段4：条目缺少 example");
+            if (!entry.example?.trim()) {
+                throw new Error("词条缺少例句");
             }
-
             if (isWord && !entry.partOfSpeech) {
-                throw new Error("阶段4：单词条目缺少 partOfSpeech");
+                throw new Error("单词条目缺少词性");
             }
         }
 
         return result;
     } catch (error) {
-        log.error("Stage 4 failed", { error });
-        throw error; // 阶段4失败应该返回错误，因为这个阶段是核心
+        log.error("Entries generation failed", { error });
+        throw error;
     }
 }
diff --git a/src/lib/bigmodel/dictionary/types.ts b/src/lib/bigmodel/dictionary/types.ts
index e94eef2..62e4dbd 100644
--- a/src/lib/bigmodel/dictionary/types.ts
+++ b/src/lib/bigmodel/dictionary/types.ts
@@ -1,44 +1,21 @@
-/**
- * 词典查询的类型定义
- */
-
 export interface DictionaryContext {
     queryLang: string;
     definitionLang: string;
 }
 
-// 阶段1：输入分析结果
-export interface InputAnalysisResult {
+export interface PreprocessResult {
     isValid: boolean;
-    isEmpty: boolean;
-    isNaturalLanguage: boolean;
-    inputLanguage?: string;
-    inputType: "word" | "phrase" | "unknown";
-    reason: string;
-}
-
-// 阶段2：语义映射结果
-export interface SemanticMappingResult {
-    shouldMap: boolean;
-    canMap?: boolean;
-    coreSemantic?: string;
-    mappedQuery?: string;
-    reason: string;
-}
-
-// 阶段3：标准形式结果
-export interface StandardFormResult {
+    inputType: "word" | "phrase";
     standardForm: string;
     confidence: "high" | "medium" | "low";
     reason: string;
 }
 
-// 阶段4：词条生成结果
 export interface EntriesGenerationResult {
     entries: Array<{
         ipa?: string;
         definition: string;
         partOfSpeech?: string;
-        example: string; // example 必需
+        example: string;
     }>;
 }