diff --git a/messages/de-DE.json b/messages/de-DE.json index 66c3668..4d554fd 100644 --- a/messages/de-DE.json +++ b/messages/de-DE.json @@ -197,6 +197,35 @@ "favorites": "Favoriten", "settings": "Einstellungen" }, + "ocr": { + "title": "OCR Vokabel-Extraktion", + "description": "Laden Sie Screenshots von Vokabeltabellen aus Lehrbüchern hoch, um Wort-Definition-Paare zu extrahieren", + "uploadImage": "Bild hochladen", + "dragDropHint": "Ziehen Sie ein Bild hierher oder klicken Sie zum Auswählen", + "supportedFormats": "Unterstützt: JPG, PNG, WebP", + "selectFolder": "Ordner auswählen", + "chooseFolder": "Wählen Sie einen Ordner zum Speichern der extrahierten Paare", + "noFolders": "Keine Ordner verfügbar. Bitte erstellen Sie zuerst einen Ordner.", + "languageHints": "Sprachhinweise (Optional)", + "sourceLanguageHint": "Quellsprache (z.B. Englisch)", + "targetLanguageHint": "Ziel-/Übersetzungssprache (z.B. Chinesisch)", + "process": "Bild verarbeiten", + "processing": "Verarbeitung...", + "preview": "Vorschau", + "extractedPairs": "Extrahierte Paare", + "word": "Wort", + "definition": "Definition", + "pairsCount": "{count} Paare extrahiert", + "savePairs": "In Ordner speichern", + "saving": "Speichern...", + "saved": "{count} Paare erfolgreich in {folder} gespeichert", + "saveFailed": "Speichern fehlgeschlagen", + "noImage": "Bitte laden Sie zuerst ein Bild hoch", + "noFolder": "Bitte wählen Sie einen Ordner", + "processingFailed": "OCR-Verarbeitung fehlgeschlagen", + "tryAgain": "Bitte versuchen Sie es mit einem klareren Bild", + "detectedLanguages": "Erkannt: {source} → {target}" + }, "profile": { "myProfile": "Mein Profil", "email": "E-Mail: {email}", diff --git a/messages/en-US.json b/messages/en-US.json index 435fab1..cf29a59 100644 --- a/messages/en-US.json +++ b/messages/en-US.json @@ -197,6 +197,35 @@ "favorites": "Favorites", "settings": "Settings" }, + "ocr": { + "title": "OCR Vocabulary Extractor", + "description": "Upload vocabulary table screenshots from textbooks to extract word-definition pairs", + "uploadImage": "Upload Image", + "dragDropHint": "Drag and drop an image here, or click to select", + "supportedFormats": "Supports: JPG, PNG, WebP", + "selectFolder": "Select Folder", + "chooseFolder": "Choose a folder to save extracted pairs", + "noFolders": "No folders available. Please create a folder first.", + "languageHints": "Language Hints (Optional)", + "sourceLanguageHint": "Source language (e.g., English)", + "targetLanguageHint": "Target/Translation language (e.g., Chinese)", + "process": "Process Image", + "processing": "Processing...", + "preview": "Preview", + "extractedPairs": "Extracted Pairs", + "word": "Word", + "definition": "Definition", + "pairsCount": "{count} pairs extracted", + "savePairs": "Save to Folder", + "saving": "Saving...", + "saved": "Successfully saved {count} pairs to {folder}", + "saveFailed": "Failed to save pairs", + "noImage": "Please upload an image first", + "noFolder": "Please select a folder", + "processingFailed": "OCR processing failed", + "tryAgain": "Please try again with a clearer image", + "detectedLanguages": "Detected: {source} → {target}" + }, "profile": { "myProfile": "My Profile", "email": "Email: {email}", diff --git a/messages/fr-FR.json b/messages/fr-FR.json index 9bc3c2c..c2cee06 100644 --- a/messages/fr-FR.json +++ b/messages/fr-FR.json @@ -197,6 +197,35 @@ "favorites": "Favoris", "settings": "Paramètres" }, + "ocr": { + "title": "Extraction OCR de vocabulaire", + "description": "Téléchargez des captures d'écran de tableaux de vocabulaire pour extraire les paires mot-définition", + "uploadImage": "Télécharger une image", + "dragDropHint": "Glissez-déposez une image ici, ou cliquez pour sélectionner", + "supportedFormats": "Supportés : JPG, PNG, WebP", + "selectFolder": "Sélectionner un dossier", + "chooseFolder": "Choisissez un dossier pour sauvegarder les paires extraites", + "noFolders": "Aucun dossier disponible. Veuillez d'abord créer un dossier.", + "languageHints": "Indices de langue (Optionnel)", + "sourceLanguageHint": "Langue source (ex : Anglais)", + "targetLanguageHint": "Langue cible/traduction (ex : Chinois)", + "process": "Traiter l'image", + "processing": "Traitement...", + "preview": "Aperçu", + "extractedPairs": "Paires extraites", + "word": "Mot", + "definition": "Définition", + "pairsCount": "{count} paires extraites", + "savePairs": "Sauvegarder dans le dossier", + "saving": "Sauvegarde...", + "saved": "{count} paires sauvegardées dans {folder}", + "saveFailed": "Échec de la sauvegarde", + "noImage": "Veuillez d'abord télécharger une image", + "noFolder": "Veuillez sélectionner un dossier", + "processingFailed": "Échec du traitement OCR", + "tryAgain": "Veuillez réessayer avec une image plus claire", + "detectedLanguages": "Détecté : {source} → {target}" + }, "profile": { "myProfile": "Mon profil", "email": "E-mail : {email}", diff --git a/messages/it-IT.json b/messages/it-IT.json index 1cc2a79..e7ee044 100644 --- a/messages/it-IT.json +++ b/messages/it-IT.json @@ -197,6 +197,35 @@ "favorites": "Preferiti", "settings": "Impostazioni" }, + "ocr": { + "title": "Estrazione vocaboli OCR", + "description": "Carica screenshot di tabelle di vocaboli per estrarre coppie parola-definizione", + "uploadImage": "Carica immagine", + "dragDropHint": "Trascina e rilascia un'immagine qui, o clicca per selezionare", + "supportedFormats": "Supportati: JPG, PNG, WebP", + "selectFolder": "Seleziona cartella", + "chooseFolder": "Scegli una cartella per salvare le coppie estratte", + "noFolders": "Nessuna cartella disponibile. Crea prima una cartella.", + "languageHints": "Suggerimenti lingua (Opzionale)", + "sourceLanguageHint": "Lingua sorgente (es: Inglese)", + "targetLanguageHint": "Lingua target/traduzione (es: Cinese)", + "process": "Elabora immagine", + "processing": "Elaborazione...", + "preview": "Anteprima", + "extractedPairs": "Coppie estratte", + "word": "Parola", + "definition": "Definizione", + "pairsCount": "{count} coppie estratte", + "savePairs": "Salva nella cartella", + "saving": "Salvataggio...", + "saved": "{count} coppie salvate in {folder}", + "saveFailed": "Salvataggio fallito", + "noImage": "Carica prima un'immagine", + "noFolder": "Seleziona una cartella", + "processingFailed": "Elaborazione OCR fallita", + "tryAgain": "Riprova con un'immagine più chiara", + "detectedLanguages": "Rilevato: {source} → {target}" + }, "profile": { "myProfile": "Il Mio Profilo", "email": "Email: {email}", diff --git a/messages/ja-JP.json b/messages/ja-JP.json index a91a5dc..a5d227c 100644 --- a/messages/ja-JP.json +++ b/messages/ja-JP.json @@ -197,6 +197,35 @@ "favorites": "お気に入り", "settings": "設定" }, + "ocr": { + "title": "OCR語彙抽出", + "description": "教科書の語彙表のスクリーンショットをアップロードして単語と定義のペアを抽出", + "uploadImage": "画像をアップロード", + "dragDropHint": "ここに画像をドラッグ&ドロップ、またはクリックして選択", + "supportedFormats": "対応形式:JPG、PNG、WebP", + "selectFolder": "フォルダを選択", + "chooseFolder": "抽出したペアを保存するフォルダを選択", + "noFolders": "フォルダがありません。まずフォルダを作成してください。", + "languageHints": "言語ヒント(オプション)", + "sourceLanguageHint": "ソース言語(例:英語)", + "targetLanguageHint": "ターゲット/翻訳言語(例:中国語)", + "process": "画像を処理", + "processing": "処理中...", + "preview": "プレビュー", + "extractedPairs": "抽出されたペア", + "word": "単語", + "definition": "定義", + "pairsCount": "{count} ペアを抽出", + "savePairs": "フォルダに保存", + "saving": "保存中...", + "saved": "{count} ペアを {folder} に保存しました", + "saveFailed": "保存に失敗しました", + "noImage": "先に画像をアップロードしてください", + "noFolder": "フォルダを選択してください", + "processingFailed": "OCR処理に失敗しました", + "tryAgain": "より鮮明な画像でお試しください", + "detectedLanguages": "検出:{source} → {target}" + }, "profile": { "myProfile": "マイプロフィール", "email": "メール: {email}", diff --git a/messages/ko-KR.json b/messages/ko-KR.json index 75de478..f3d2c9d 100644 --- a/messages/ko-KR.json +++ b/messages/ko-KR.json @@ -197,6 +197,35 @@ "favorites": "즐겨찾기", "settings": "설정" }, + "ocr": { + "title": "OCR 어휘 추출", + "description": "교과서 어휘표 스크린샷을 업로드하여 단어-정의 쌍 추출", + "uploadImage": "이미지 업로드", + "dragDropHint": "이미지를 여기에 끌어다 놓거나 클릭하여 선택", + "supportedFormats": "지원 형식: JPG, PNG, WebP", + "selectFolder": "폴더 선택", + "chooseFolder": "추출된 쌍을 저장할 폴더 선택", + "noFolders": "폴더가 없습니다. 먼저 폴더를 만드세요.", + "languageHints": "언어 힌트 (선택사항)", + "sourceLanguageHint": "소스 언어 (예: 영어)", + "targetLanguageHint": "대상/번역 언어 (예: 중국어)", + "process": "이미지 처리", + "processing": "처리 중...", + "preview": "미리보기", + "extractedPairs": "추출된 쌍", + "word": "단어", + "definition": "정의", + "pairsCount": "{count} 쌍 추출됨", + "savePairs": "폴더에 저장", + "saving": "저장 중...", + "saved": "{folder}에 {count} 쌍 저장 완료", + "saveFailed": "저장 실패", + "noImage": "먼저 이미지를 업로드하세요", + "noFolder": "폴더를 선택하세요", + "processingFailed": "OCR 처리 실패", + "tryAgain": "더 선명한 이미지로 다시 시도하세요", + "detectedLanguages": "감지됨: {source} → {target}" + }, "profile": { "myProfile": "내 프로필", "email": "이메일: {email}", diff --git a/messages/ug-CN.json b/messages/ug-CN.json index 53baeba..2adf755 100644 --- a/messages/ug-CN.json +++ b/messages/ug-CN.json @@ -197,6 +197,35 @@ "favorites": "يىغىپ ساقلاش", "settings": "تەڭشەكلەر" }, + "ocr": { + "title": "OCR سۆز ئاستىرىش", + "description": "دەرىسلىك كىتابىدىكى سۆز جەدۋىلى سۈرەتلىرىنى يۈكلەپ سۆز-مەنا جۈپلىرىنى ئاستىرىڭ", + "uploadImage": "سۈرەت يۈكلەش", + "dragDropHint": "سۈرەتنى بۇ يەرگە سۆرەڭ ياكى چېكىپ تاللاڭ", + "supportedFormats": "قوللايدىغان فورماتلار: JPG، PNG، WebP", + "selectFolder": "قىسقۇچ تاللاش", + "chooseFolder": "ئاستىرىلغان جۈپلەرنى ساقلاش ئۈچۈن قىسقۇچ تاللاڭ", + "noFolders": "قىسقۇچ يوق. ئاۋۋال قىسقۇچ قۇرۇڭ.", + "languageHints": "تىل ئۇچۇرلىرى (ئىختىيارىي)", + "sourceLanguageHint": "مەنبە تىلى (مىسال: ئىنگىلىزچە)", + "targetLanguageHint": "نىشان/تەرجىمە تىلى (مىسال: خەنزۇچە)", + "process": "سۈرەتنى بىر تەرەپ قىلىش", + "processing": "بىر تەرەپ قىلىۋاتىدۇ...", + "preview": "ئالدىن كۆرۈش", + "extractedPairs": "ئاستىرىلغان جۈپلەر", + "word": "سۆز", + "definition": "مەنا", + "pairsCount": "{count} جۈپ ئاستىرىلدى", + "savePairs": "قىسقۇچقا ساقلاش", + "saving": "ساقلاۋاتىدۇ...", + "saved": "{folder} غا {count} جۈپ ساقلاندى", + "saveFailed": "ساقلاش مەغلۇپ بولدى", + "noImage": "ئاۋۋال سۈرەت يۈكلەڭ", + "noFolder": "قىسقۇچ تاللاڭ", + "processingFailed": "OCR بىر تەرەپ قىلىش مەغلۇپ بولدى", + "tryAgain": "تېخىمۇ ئېنىق سۈرەت بىلەن قايتا سىناڭ", + "detectedLanguages": "بايقالدى: {source} → {target}" + }, "profile": { "myProfile": "شەخسىي ئۇچۇرۇم", "email": "ئېلخەت: {email}", diff --git a/messages/zh-CN.json b/messages/zh-CN.json index 60e6d12..da5c3c8 100644 --- a/messages/zh-CN.json +++ b/messages/zh-CN.json @@ -197,6 +197,35 @@ "favorites": "收藏", "settings": "设置" }, + "ocr": { + "title": "OCR 词汇提取", + "description": "上传教材词汇表截图,提取单词-释义对", + "uploadImage": "上传图片", + "dragDropHint": "拖放图片到此处,或点击选择", + "supportedFormats": "支持格式:JPG、PNG、WebP", + "selectFolder": "选择文件夹", + "chooseFolder": "选择保存提取词汇的文件夹", + "noFolders": "暂无文件夹,请先创建文件夹", + "languageHints": "语言提示(可选)", + "sourceLanguageHint": "源语言(如:英语)", + "targetLanguageHint": "目标/翻译语言(如:中文)", + "process": "处理图片", + "processing": "处理中...", + "preview": "预览", + "extractedPairs": "提取的词汇对", + "word": "单词", + "definition": "释义", + "pairsCount": "已提取 {count} 个词汇对", + "savePairs": "保存到文件夹", + "saving": "保存中...", + "saved": "成功将 {count} 个词汇对保存到 {folder}", + "saveFailed": "保存失败", + "noImage": "请先上传图片", + "noFolder": "请选择文件夹", + "processingFailed": "OCR 处理失败", + "tryAgain": "请尝试上传更清晰的图片", + "detectedLanguages": "检测到:{source} → {target}" + }, "profile": { "myProfile": "我的个人资料", "email": "邮箱:{email}", diff --git a/src/app/(features)/ocr/OCRClient.tsx b/src/app/(features)/ocr/OCRClient.tsx new file mode 100644 index 0000000..393d905 --- /dev/null +++ b/src/app/(features)/ocr/OCRClient.tsx @@ -0,0 +1,253 @@ +"use client"; + +import { useState, useCallback, useRef } from "react"; +import { useTranslations } from "next-intl"; +import { PageLayout } from "@/components/ui/PageLayout"; +import { PrimaryButton, LightButton } from "@/design-system/base/button"; +import { Input } from "@/design-system/base/input"; +import { Select } from "@/design-system/base/select"; +import { Card } from "@/design-system/base/card"; +import { toast } from "sonner"; +import { Upload, FileImage, Loader2 } from "lucide-react"; +import { actionProcessOCR } from "@/modules/ocr/ocr-action"; +import { TSharedFolder } from "@/shared/folder-type"; +import { OCROutput } from "@/lib/bigmodel/ocr/types"; + +interface OCRClientProps { + initialFolders: TSharedFolder[]; +} + +export function OCRClient({ initialFolders }: OCRClientProps) { + const t = useTranslations("ocr"); + const fileInputRef = useRef(null); + + const [selectedFile, setSelectedFile] = useState(null); + const [previewUrl, setPreviewUrl] = useState(null); + const [selectedFolderId, setSelectedFolderId] = useState( + initialFolders.length > 0 ? initialFolders[0].id : null + ); + const [sourceLanguage, setSourceLanguage] = useState(""); + const [targetLanguage, setTargetLanguage] = useState(""); + const [isProcessing, setIsProcessing] = useState(false); + const [ocrResult, setOcrResult] = useState(null); + + const handleFileChange = useCallback((file: File | null) => { + if (!file) return; + + if (!file.type.startsWith("image/")) { + toast.error(t("processingFailed")); + return; + } + + const url = URL.createObjectURL(file); + setPreviewUrl(url); + setSelectedFile(file); + setOcrResult(null); + }, [t]); + + const handleDrop = useCallback((e: React.DragEvent) => { + e.preventDefault(); + const file = e.dataTransfer.files[0]; + handleFileChange(file); + }, [handleFileChange]); + + const handleDragOver = useCallback((e: React.DragEvent) => { + e.preventDefault(); + }, []); + + const fileToBase64 = async (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => { + const result = reader.result as string; + const base64 = result.split(",")[1]; + resolve(base64); + }; + reader.onerror = reject; + reader.readAsDataURL(file); + }); + }; + + const handleProcess = async () => { + if (!selectedFile) { + toast.error(t("noImage")); + return; + } + + if (!selectedFolderId) { + toast.error(t("noFolder")); + return; + } + + setIsProcessing(true); + setOcrResult(null); + + try { + const base64 = await fileToBase64(selectedFile); + + const result = await actionProcessOCR({ + imageBase64: base64, + folderId: selectedFolderId, + sourceLanguage: sourceLanguage || undefined, + targetLanguage: targetLanguage || undefined, + }); + + if (result.success) { + const folderName = initialFolders.find(f => f.id === selectedFolderId)?.name || ""; + toast.success(t("saved", { count: result.data?.pairsCreated ?? 0, folder: folderName })); + } else { + toast.error(result.message || t("processingFailed")); + } + } catch { + toast.error(t("processingFailed")); + } finally { + setIsProcessing(false); + } + }; + + const clearImage = () => { + if (previewUrl) { + URL.revokeObjectURL(previewUrl); + } + setPreviewUrl(null); + setSelectedFile(null); + setOcrResult(null); + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + }; + + return ( + +
+

{t("title")}

+

{t("description")}

+
+ +
+ +
+
+ + {t("uploadImage")} +
+ +
fileInputRef.current?.click()} + > + {previewUrl ? ( +
+ Preview +
+ { + e.stopPropagation(); + clearImage(); + }} + > + {t("uploadImage")} + +
+
+ ) : ( +
+ +

{t("dragDropHint")}

+

{t("supportedFormats")}

+
+ )} + handleFileChange(e.target.files?.[0] || null)} + /> +
+
+
+ + +
+
{t("selectFolder")}
+ + {initialFolders.length > 0 ? ( + + ) : ( +

{t("noFolders")}

+ )} +
+
+ + +
+
{t("languageHints")}
+ +
+
+ + setSourceLanguage(e.target.value)} + placeholder="English" + /> +
+
+ + setTargetLanguage(e.target.value)} + placeholder="Chinese" + /> +
+
+
+
+ +
+ + {isProcessing ? ( + <> + + {t("processing")} + + ) : ( + t("process") + )} + +
+
+
+ ); +} diff --git a/src/app/(features)/ocr/page.tsx b/src/app/(features)/ocr/page.tsx new file mode 100644 index 0000000..fe18064 --- /dev/null +++ b/src/app/(features)/ocr/page.tsx @@ -0,0 +1,20 @@ +import { OCRClient } from "./OCRClient"; +import { auth } from "@/auth"; +import { headers } from "next/headers"; +import { actionGetFoldersByUserId } from "@/modules/folder/folder-action"; +import { TSharedFolder } from "@/shared/folder-type"; + +export default async function OCRPage() { + const session = await auth.api.getSession({ headers: await headers() }); + + let folders: TSharedFolder[] = []; + + if (session?.user?.id) { + const result = await actionGetFoldersByUserId(session.user.id as string); + if (result.success && result.data) { + folders = result.data; + } + } + + return ; +} diff --git a/src/lib/bigmodel/ocr/orchestrator.ts b/src/lib/bigmodel/ocr/orchestrator.ts new file mode 100644 index 0000000..c453609 --- /dev/null +++ b/src/lib/bigmodel/ocr/orchestrator.ts @@ -0,0 +1,152 @@ +import OpenAI from "openai"; +import { parseAIGeneratedJSON } from "@/utils/json"; +import { createLogger } from "@/lib/logger"; +import { OCRInput, OCROutput, OCRRawResponse } from "./types"; + +const log = createLogger("ocr-orchestrator"); + +const openai = new OpenAI({ + apiKey: process.env.ZHIPU_API_KEY, + baseURL: "https://open.bigmodel.cn/api/paas/v4", +}); + +/** + * Executes OCR on an image to extract vocabulary word-definition pairs. + * + * Uses GLM-4.6V vision model to analyze vocabulary table images and + * extract structured word-definition pairs. + * + * @param input - OCR input containing base64 image and optional language hints + * @returns Structured output with extracted pairs and detected languages + * @throws Error if OCR fails or response is malformed + * + * @example + * ```typescript + * const result = await executeOCR({ + * imageBase64: "iVBORw0KGgo...", + * sourceLanguage: "English", + * targetLanguage: "Chinese" + * }); + * // result.pairs: [{ word: "hello", definition: "你好" }, ...] + * ``` + */ +export async function executeOCR(input: OCRInput): Promise { + const { imageBase64, sourceLanguage, targetLanguage } = input; + + log.debug("Starting OCR", { + hasSourceHint: !!sourceLanguage, + hasTargetHint: !!targetLanguage, + imageLength: imageBase64.length, + }); + + const languageHints: string[] = []; + if (sourceLanguage) { + languageHints.push(`源语言提示: ${sourceLanguage}`); + } + if (targetLanguage) { + languageHints.push(`目标语言提示: ${targetLanguage}`); + } + + const prompt = ` +你是一个专业的OCR识别助手,专门从词汇表截图中提取单词和释义。 + +${languageHints.length > 0 ? `语言提示:\n${languageHints.join("\n")}\n` : ""} + +你的任务是分析图片中的词汇表,提取所有单词-释义对。 + +要求: +1. 识别图片中的词汇表结构(可能是两列或多列) +2. 提取每一行的单词和对应的释义/翻译 +3. 自动检测源语言和目标语言 +4. 保持原始大小写和拼写 +5. 如果图片模糊或不清晰,尽力识别并标注置信度较低的项目 +6. 忽略表头、页码等非词汇内容 + +返回 JSON 格式: +{ + "pairs": [ + { "word": "单词1", "definition": "释义1" }, + { "word": "单词2", "definition": "释义2" } + ], + "detectedSourceLanguage": "检测到的源语言", + "detectedTargetLanguage": "检测到的目标语言" +} + +只返回 JSON,不要任何其他文字。 +`.trim(); + + try { + const response = await openai.chat.completions.create({ + model: "glm-4.6v", + messages: [ + { + role: "user", + content: [ + { + type: "image_url", + image_url: { + url: imageBase64, + }, + }, + { + type: "text", + text: prompt, + }, + ], + }, + ], + temperature: 0.1, + }); + + const content = response.choices[0]?.message?.content; + + if (!content) { + log.error("OCR returned empty response"); + throw new Error("OCR 返回空响应"); + } + + log.debug("Received OCR response", { contentLength: content.length }); + + const parsed = parseAIGeneratedJSON(content); + + if (!parsed.pairs || !Array.isArray(parsed.pairs)) { + log.error("Invalid OCR response: missing or invalid pairs array", { parsed }); + throw new Error("OCR 响应格式无效:缺少 pairs 数组"); + } + + const validPairs = parsed.pairs.filter((pair) => { + const isValid = typeof pair.word === "string" && typeof pair.definition === "string"; + if (!isValid) { + log.warn("Skipping invalid pair", { pair }); + } + return isValid; + }); + + if (validPairs.length === 0) { + log.error("No valid pairs extracted from image"); + throw new Error("未能从图片中提取有效的词汇对"); + } + + const result: OCROutput = { + pairs: validPairs, + detectedSourceLanguage: parsed.detectedSourceLanguage, + detectedTargetLanguage: parsed.detectedTargetLanguage, + }; + + log.info("OCR completed successfully", { + pairCount: result.pairs.length, + sourceLanguage: result.detectedSourceLanguage, + targetLanguage: result.detectedTargetLanguage, + }); + + return result; + } catch (error) { + if (error instanceof Error && error.message.startsWith("OCR")) { + throw error; + } + + log.error("OCR failed", { error }); + const errorMessage = error instanceof Error ? error.message : "未知错误"; + throw new Error(`OCR 处理失败: ${errorMessage}`); + } +} diff --git a/src/lib/bigmodel/ocr/types.ts b/src/lib/bigmodel/ocr/types.ts new file mode 100644 index 0000000..31a0185 --- /dev/null +++ b/src/lib/bigmodel/ocr/types.ts @@ -0,0 +1,44 @@ +/** + * Input for OCR pipeline + */ +export interface OCRInput { + /** Base64 encoded image (without data URL prefix) */ + imageBase64: string; + /** Optional: hint about source language */ + sourceLanguage?: string; + /** Optional: hint about target/translation language */ + targetLanguage?: string; +} + +/** + * Single word-definition pair extracted from image + */ +export interface VocabularyPair { + /** The original word */ + word: string; + /** The translation/definition */ + definition: string; +} + +/** + * Output from OCR pipeline + */ +export interface OCROutput { + /** Extracted word-definition pairs */ + pairs: VocabularyPair[]; + /** Detected source language */ + detectedSourceLanguage?: string; + /** Detected target/translation language */ + detectedTargetLanguage?: string; +} + +/** + * Internal structure for AI response parsing + */ +interface OCRRawResponse { + pairs: Array<{ word: string; definition: string }>; + detectedSourceLanguage?: string; + detectedTargetLanguage?: string; +} + +export type { OCRRawResponse }; diff --git a/src/modules/ocr/ocr-action-dto.ts b/src/modules/ocr/ocr-action-dto.ts new file mode 100644 index 0000000..8aaae49 --- /dev/null +++ b/src/modules/ocr/ocr-action-dto.ts @@ -0,0 +1,20 @@ +import { z } from "zod"; + +export const schemaActionInputProcessOCR = z.object({ + imageBase64: z.string().min(1, "Image is required"), + folderId: z.number().int().positive("Folder ID must be positive"), + sourceLanguage: z.string().optional(), + targetLanguage: z.string().optional(), +}); + +export type ActionInputProcessOCR = z.infer; + +export interface ActionOutputProcessOCR { + success: boolean; + message: string; + data?: { + pairsCreated: number; + sourceLanguage?: string; + targetLanguage?: string; + }; +} diff --git a/src/modules/ocr/ocr-action.ts b/src/modules/ocr/ocr-action.ts new file mode 100644 index 0000000..cdcb77f --- /dev/null +++ b/src/modules/ocr/ocr-action.ts @@ -0,0 +1,25 @@ +"use server"; + +import { validate } from "@/utils/validate"; +import { ValidateError } from "@/lib/errors"; +import { createLogger } from "@/lib/logger"; +import { serviceProcessOCR } from "./ocr-service"; +import { schemaActionInputProcessOCR } from "./ocr-action-dto"; +import type { ActionOutputProcessOCR } from "./ocr-action-dto"; + +const log = createLogger("ocr-action"); + +export async function actionProcessOCR( + input: unknown +): Promise { + try { + const validatedInput = validate(input, schemaActionInputProcessOCR); + return serviceProcessOCR(validatedInput); + } catch (e) { + if (e instanceof ValidateError) { + return { success: false, message: e.message }; + } + log.error("OCR action failed", { error: e }); + return { success: false, message: "Unknown error occurred." }; + } +} diff --git a/src/modules/ocr/ocr-repository-dto.ts b/src/modules/ocr/ocr-repository-dto.ts new file mode 100644 index 0000000..7a1f1b7 --- /dev/null +++ b/src/modules/ocr/ocr-repository-dto.ts @@ -0,0 +1 @@ +export type { RepoInputCreatePair } from "@/modules/folder/folder-repository-dto"; diff --git a/src/modules/ocr/ocr-repository.ts b/src/modules/ocr/ocr-repository.ts new file mode 100644 index 0000000..a4e1bad --- /dev/null +++ b/src/modules/ocr/ocr-repository.ts @@ -0,0 +1,5 @@ +import { repoCreatePair, repoGetUserIdByFolderId } from "@/modules/folder/folder-repository"; +import type { RepoInputCreatePair } from "./ocr-repository-dto"; + +export { repoCreatePair, repoGetUserIdByFolderId }; +export type { RepoInputCreatePair }; diff --git a/src/modules/ocr/ocr-service-dto.ts b/src/modules/ocr/ocr-service-dto.ts new file mode 100644 index 0000000..2591c16 --- /dev/null +++ b/src/modules/ocr/ocr-service-dto.ts @@ -0,0 +1,20 @@ +import { z } from "zod"; + +export const schemaServiceInputProcessOCR = z.object({ + imageBase64: z.string().min(1, "Image is required"), + folderId: z.number().int().positive("Folder ID must be positive"), + sourceLanguage: z.string().optional(), + targetLanguage: z.string().optional(), +}); + +export type ServiceInputProcessOCR = z.infer; + +export interface ServiceOutputProcessOCR { + success: boolean; + message: string; + data?: { + pairsCreated: number; + sourceLanguage?: string; + targetLanguage?: string; + }; +} diff --git a/src/modules/ocr/ocr-service.ts b/src/modules/ocr/ocr-service.ts new file mode 100644 index 0000000..5aff329 --- /dev/null +++ b/src/modules/ocr/ocr-service.ts @@ -0,0 +1,96 @@ +"use server"; + +import { executeOCR } from "@/lib/bigmodel/ocr/orchestrator"; +import { repoCreatePair, repoGetUserIdByFolderId } from "@/modules/folder/folder-repository"; +import { auth } from "@/auth"; +import { headers } from "next/headers"; +import { createLogger } from "@/lib/logger"; +import type { ServiceInputProcessOCR, ServiceOutputProcessOCR } from "./ocr-service-dto"; + +const log = createLogger("ocr-service"); + +export async function serviceProcessOCR( + input: ServiceInputProcessOCR +): Promise { + log.info("Processing OCR request", { folderId: input.folderId }); + + const session = await auth.api.getSession({ headers: await headers() }); + if (!session?.user?.id) { + log.warn("Unauthorized OCR attempt"); + return { success: false, message: "Unauthorized" }; + } + + const folderOwner = await repoGetUserIdByFolderId(input.folderId); + if (folderOwner !== session.user.id) { + log.warn("Folder ownership mismatch", { + folderId: input.folderId, + userId: session.user.id + }); + return { + success: false, + message: "You don't have permission to modify this folder" + }; + } + + let ocrResult; + try { + log.debug("Calling OCR pipeline"); + ocrResult = await executeOCR({ + imageBase64: input.imageBase64, + sourceLanguage: input.sourceLanguage, + targetLanguage: input.targetLanguage, + }); + } catch (error) { + log.error("OCR pipeline failed", { error }); + return { + success: false, + message: "Failed to process image. Please try again." + }; + } + + if (!ocrResult.pairs || ocrResult.pairs.length === 0) { + log.info("No vocabulary pairs extracted from image"); + return { + success: false, + message: "No vocabulary pairs could be extracted from the image" + }; + } + + const sourceLanguage = ocrResult.detectedSourceLanguage || input.sourceLanguage || "Unknown"; + const targetLanguage = ocrResult.detectedTargetLanguage || input.targetLanguage || "Unknown"; + + let pairsCreated = 0; + for (const pair of ocrResult.pairs) { + try { + await repoCreatePair({ + folderId: input.folderId, + language1: sourceLanguage, + language2: targetLanguage, + text1: pair.word, + text2: pair.definition, + }); + pairsCreated++; + } catch (error) { + log.error("Failed to create pair", { + word: pair.word, + error + }); + } + } + + log.info("OCR processing complete", { + pairsCreated, + sourceLanguage, + targetLanguage + }); + + return { + success: true, + message: `Successfully created ${pairsCreated} vocabulary pairs`, + data: { + pairsCreated, + sourceLanguage, + targetLanguage, + }, + }; +}