feat: 添加 OCR 词汇提取功能
新增 OCR 页面,用户可上传教材词汇表截图,使用 GLM-4.6V 视觉模型 提取单词-释义对并保存到指定文件夹。 - AI 管道: src/lib/bigmodel/ocr/ (orchestrator, types) - 后端模块: src/modules/ocr/ (action-service-repository 架构) - 前端页面: src/app/(features)/ocr/ (拖拽上传、folder 选择) - i18n: 8 种语言翻译支持
This commit is contained in:
@@ -197,6 +197,35 @@
|
||||
"favorites": "Favoriten",
|
||||
"settings": "Einstellungen"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "OCR Vokabel-Extraktion",
|
||||
"description": "Laden Sie Screenshots von Vokabeltabellen aus Lehrbüchern hoch, um Wort-Definition-Paare zu extrahieren",
|
||||
"uploadImage": "Bild hochladen",
|
||||
"dragDropHint": "Ziehen Sie ein Bild hierher oder klicken Sie zum Auswählen",
|
||||
"supportedFormats": "Unterstützt: JPG, PNG, WebP",
|
||||
"selectFolder": "Ordner auswählen",
|
||||
"chooseFolder": "Wählen Sie einen Ordner zum Speichern der extrahierten Paare",
|
||||
"noFolders": "Keine Ordner verfügbar. Bitte erstellen Sie zuerst einen Ordner.",
|
||||
"languageHints": "Sprachhinweise (Optional)",
|
||||
"sourceLanguageHint": "Quellsprache (z.B. Englisch)",
|
||||
"targetLanguageHint": "Ziel-/Übersetzungssprache (z.B. Chinesisch)",
|
||||
"process": "Bild verarbeiten",
|
||||
"processing": "Verarbeitung...",
|
||||
"preview": "Vorschau",
|
||||
"extractedPairs": "Extrahierte Paare",
|
||||
"word": "Wort",
|
||||
"definition": "Definition",
|
||||
"pairsCount": "{count} Paare extrahiert",
|
||||
"savePairs": "In Ordner speichern",
|
||||
"saving": "Speichern...",
|
||||
"saved": "{count} Paare erfolgreich in {folder} gespeichert",
|
||||
"saveFailed": "Speichern fehlgeschlagen",
|
||||
"noImage": "Bitte laden Sie zuerst ein Bild hoch",
|
||||
"noFolder": "Bitte wählen Sie einen Ordner",
|
||||
"processingFailed": "OCR-Verarbeitung fehlgeschlagen",
|
||||
"tryAgain": "Bitte versuchen Sie es mit einem klareren Bild",
|
||||
"detectedLanguages": "Erkannt: {source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "Mein Profil",
|
||||
"email": "E-Mail: {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "Favorites",
|
||||
"settings": "Settings"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "OCR Vocabulary Extractor",
|
||||
"description": "Upload vocabulary table screenshots from textbooks to extract word-definition pairs",
|
||||
"uploadImage": "Upload Image",
|
||||
"dragDropHint": "Drag and drop an image here, or click to select",
|
||||
"supportedFormats": "Supports: JPG, PNG, WebP",
|
||||
"selectFolder": "Select Folder",
|
||||
"chooseFolder": "Choose a folder to save extracted pairs",
|
||||
"noFolders": "No folders available. Please create a folder first.",
|
||||
"languageHints": "Language Hints (Optional)",
|
||||
"sourceLanguageHint": "Source language (e.g., English)",
|
||||
"targetLanguageHint": "Target/Translation language (e.g., Chinese)",
|
||||
"process": "Process Image",
|
||||
"processing": "Processing...",
|
||||
"preview": "Preview",
|
||||
"extractedPairs": "Extracted Pairs",
|
||||
"word": "Word",
|
||||
"definition": "Definition",
|
||||
"pairsCount": "{count} pairs extracted",
|
||||
"savePairs": "Save to Folder",
|
||||
"saving": "Saving...",
|
||||
"saved": "Successfully saved {count} pairs to {folder}",
|
||||
"saveFailed": "Failed to save pairs",
|
||||
"noImage": "Please upload an image first",
|
||||
"noFolder": "Please select a folder",
|
||||
"processingFailed": "OCR processing failed",
|
||||
"tryAgain": "Please try again with a clearer image",
|
||||
"detectedLanguages": "Detected: {source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "My Profile",
|
||||
"email": "Email: {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "Favoris",
|
||||
"settings": "Paramètres"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "Extraction OCR de vocabulaire",
|
||||
"description": "Téléchargez des captures d'écran de tableaux de vocabulaire pour extraire les paires mot-définition",
|
||||
"uploadImage": "Télécharger une image",
|
||||
"dragDropHint": "Glissez-déposez une image ici, ou cliquez pour sélectionner",
|
||||
"supportedFormats": "Supportés : JPG, PNG, WebP",
|
||||
"selectFolder": "Sélectionner un dossier",
|
||||
"chooseFolder": "Choisissez un dossier pour sauvegarder les paires extraites",
|
||||
"noFolders": "Aucun dossier disponible. Veuillez d'abord créer un dossier.",
|
||||
"languageHints": "Indices de langue (Optionnel)",
|
||||
"sourceLanguageHint": "Langue source (ex : Anglais)",
|
||||
"targetLanguageHint": "Langue cible/traduction (ex : Chinois)",
|
||||
"process": "Traiter l'image",
|
||||
"processing": "Traitement...",
|
||||
"preview": "Aperçu",
|
||||
"extractedPairs": "Paires extraites",
|
||||
"word": "Mot",
|
||||
"definition": "Définition",
|
||||
"pairsCount": "{count} paires extraites",
|
||||
"savePairs": "Sauvegarder dans le dossier",
|
||||
"saving": "Sauvegarde...",
|
||||
"saved": "{count} paires sauvegardées dans {folder}",
|
||||
"saveFailed": "Échec de la sauvegarde",
|
||||
"noImage": "Veuillez d'abord télécharger une image",
|
||||
"noFolder": "Veuillez sélectionner un dossier",
|
||||
"processingFailed": "Échec du traitement OCR",
|
||||
"tryAgain": "Veuillez réessayer avec une image plus claire",
|
||||
"detectedLanguages": "Détecté : {source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "Mon profil",
|
||||
"email": "E-mail : {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "Preferiti",
|
||||
"settings": "Impostazioni"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "Estrazione vocaboli OCR",
|
||||
"description": "Carica screenshot di tabelle di vocaboli per estrarre coppie parola-definizione",
|
||||
"uploadImage": "Carica immagine",
|
||||
"dragDropHint": "Trascina e rilascia un'immagine qui, o clicca per selezionare",
|
||||
"supportedFormats": "Supportati: JPG, PNG, WebP",
|
||||
"selectFolder": "Seleziona cartella",
|
||||
"chooseFolder": "Scegli una cartella per salvare le coppie estratte",
|
||||
"noFolders": "Nessuna cartella disponibile. Crea prima una cartella.",
|
||||
"languageHints": "Suggerimenti lingua (Opzionale)",
|
||||
"sourceLanguageHint": "Lingua sorgente (es: Inglese)",
|
||||
"targetLanguageHint": "Lingua target/traduzione (es: Cinese)",
|
||||
"process": "Elabora immagine",
|
||||
"processing": "Elaborazione...",
|
||||
"preview": "Anteprima",
|
||||
"extractedPairs": "Coppie estratte",
|
||||
"word": "Parola",
|
||||
"definition": "Definizione",
|
||||
"pairsCount": "{count} coppie estratte",
|
||||
"savePairs": "Salva nella cartella",
|
||||
"saving": "Salvataggio...",
|
||||
"saved": "{count} coppie salvate in {folder}",
|
||||
"saveFailed": "Salvataggio fallito",
|
||||
"noImage": "Carica prima un'immagine",
|
||||
"noFolder": "Seleziona una cartella",
|
||||
"processingFailed": "Elaborazione OCR fallita",
|
||||
"tryAgain": "Riprova con un'immagine più chiara",
|
||||
"detectedLanguages": "Rilevato: {source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "Il Mio Profilo",
|
||||
"email": "Email: {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "お気に入り",
|
||||
"settings": "設定"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "OCR語彙抽出",
|
||||
"description": "教科書の語彙表のスクリーンショットをアップロードして単語と定義のペアを抽出",
|
||||
"uploadImage": "画像をアップロード",
|
||||
"dragDropHint": "ここに画像をドラッグ&ドロップ、またはクリックして選択",
|
||||
"supportedFormats": "対応形式:JPG、PNG、WebP",
|
||||
"selectFolder": "フォルダを選択",
|
||||
"chooseFolder": "抽出したペアを保存するフォルダを選択",
|
||||
"noFolders": "フォルダがありません。まずフォルダを作成してください。",
|
||||
"languageHints": "言語ヒント(オプション)",
|
||||
"sourceLanguageHint": "ソース言語(例:英語)",
|
||||
"targetLanguageHint": "ターゲット/翻訳言語(例:中国語)",
|
||||
"process": "画像を処理",
|
||||
"processing": "処理中...",
|
||||
"preview": "プレビュー",
|
||||
"extractedPairs": "抽出されたペア",
|
||||
"word": "単語",
|
||||
"definition": "定義",
|
||||
"pairsCount": "{count} ペアを抽出",
|
||||
"savePairs": "フォルダに保存",
|
||||
"saving": "保存中...",
|
||||
"saved": "{count} ペアを {folder} に保存しました",
|
||||
"saveFailed": "保存に失敗しました",
|
||||
"noImage": "先に画像をアップロードしてください",
|
||||
"noFolder": "フォルダを選択してください",
|
||||
"processingFailed": "OCR処理に失敗しました",
|
||||
"tryAgain": "より鮮明な画像でお試しください",
|
||||
"detectedLanguages": "検出:{source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "マイプロフィール",
|
||||
"email": "メール: {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "즐겨찾기",
|
||||
"settings": "설정"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "OCR 어휘 추출",
|
||||
"description": "교과서 어휘표 스크린샷을 업로드하여 단어-정의 쌍 추출",
|
||||
"uploadImage": "이미지 업로드",
|
||||
"dragDropHint": "이미지를 여기에 끌어다 놓거나 클릭하여 선택",
|
||||
"supportedFormats": "지원 형식: JPG, PNG, WebP",
|
||||
"selectFolder": "폴더 선택",
|
||||
"chooseFolder": "추출된 쌍을 저장할 폴더 선택",
|
||||
"noFolders": "폴더가 없습니다. 먼저 폴더를 만드세요.",
|
||||
"languageHints": "언어 힌트 (선택사항)",
|
||||
"sourceLanguageHint": "소스 언어 (예: 영어)",
|
||||
"targetLanguageHint": "대상/번역 언어 (예: 중국어)",
|
||||
"process": "이미지 처리",
|
||||
"processing": "처리 중...",
|
||||
"preview": "미리보기",
|
||||
"extractedPairs": "추출된 쌍",
|
||||
"word": "단어",
|
||||
"definition": "정의",
|
||||
"pairsCount": "{count} 쌍 추출됨",
|
||||
"savePairs": "폴더에 저장",
|
||||
"saving": "저장 중...",
|
||||
"saved": "{folder}에 {count} 쌍 저장 완료",
|
||||
"saveFailed": "저장 실패",
|
||||
"noImage": "먼저 이미지를 업로드하세요",
|
||||
"noFolder": "폴더를 선택하세요",
|
||||
"processingFailed": "OCR 처리 실패",
|
||||
"tryAgain": "더 선명한 이미지로 다시 시도하세요",
|
||||
"detectedLanguages": "감지됨: {source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "내 프로필",
|
||||
"email": "이메일: {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "يىغىپ ساقلاش",
|
||||
"settings": "تەڭشەكلەر"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "OCR سۆز ئاستىرىش",
|
||||
"description": "دەرىسلىك كىتابىدىكى سۆز جەدۋىلى سۈرەتلىرىنى يۈكلەپ سۆز-مەنا جۈپلىرىنى ئاستىرىڭ",
|
||||
"uploadImage": "سۈرەت يۈكلەش",
|
||||
"dragDropHint": "سۈرەتنى بۇ يەرگە سۆرەڭ ياكى چېكىپ تاللاڭ",
|
||||
"supportedFormats": "قوللايدىغان فورماتلار: JPG، PNG، WebP",
|
||||
"selectFolder": "قىسقۇچ تاللاش",
|
||||
"chooseFolder": "ئاستىرىلغان جۈپلەرنى ساقلاش ئۈچۈن قىسقۇچ تاللاڭ",
|
||||
"noFolders": "قىسقۇچ يوق. ئاۋۋال قىسقۇچ قۇرۇڭ.",
|
||||
"languageHints": "تىل ئۇچۇرلىرى (ئىختىيارىي)",
|
||||
"sourceLanguageHint": "مەنبە تىلى (مىسال: ئىنگىلىزچە)",
|
||||
"targetLanguageHint": "نىشان/تەرجىمە تىلى (مىسال: خەنزۇچە)",
|
||||
"process": "سۈرەتنى بىر تەرەپ قىلىش",
|
||||
"processing": "بىر تەرەپ قىلىۋاتىدۇ...",
|
||||
"preview": "ئالدىن كۆرۈش",
|
||||
"extractedPairs": "ئاستىرىلغان جۈپلەر",
|
||||
"word": "سۆز",
|
||||
"definition": "مەنا",
|
||||
"pairsCount": "{count} جۈپ ئاستىرىلدى",
|
||||
"savePairs": "قىسقۇچقا ساقلاش",
|
||||
"saving": "ساقلاۋاتىدۇ...",
|
||||
"saved": "{folder} غا {count} جۈپ ساقلاندى",
|
||||
"saveFailed": "ساقلاش مەغلۇپ بولدى",
|
||||
"noImage": "ئاۋۋال سۈرەت يۈكلەڭ",
|
||||
"noFolder": "قىسقۇچ تاللاڭ",
|
||||
"processingFailed": "OCR بىر تەرەپ قىلىش مەغلۇپ بولدى",
|
||||
"tryAgain": "تېخىمۇ ئېنىق سۈرەت بىلەن قايتا سىناڭ",
|
||||
"detectedLanguages": "بايقالدى: {source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "شەخسىي ئۇچۇرۇم",
|
||||
"email": "ئېلخەت: {email}",
|
||||
|
||||
@@ -197,6 +197,35 @@
|
||||
"favorites": "收藏",
|
||||
"settings": "设置"
|
||||
},
|
||||
"ocr": {
|
||||
"title": "OCR 词汇提取",
|
||||
"description": "上传教材词汇表截图,提取单词-释义对",
|
||||
"uploadImage": "上传图片",
|
||||
"dragDropHint": "拖放图片到此处,或点击选择",
|
||||
"supportedFormats": "支持格式:JPG、PNG、WebP",
|
||||
"selectFolder": "选择文件夹",
|
||||
"chooseFolder": "选择保存提取词汇的文件夹",
|
||||
"noFolders": "暂无文件夹,请先创建文件夹",
|
||||
"languageHints": "语言提示(可选)",
|
||||
"sourceLanguageHint": "源语言(如:英语)",
|
||||
"targetLanguageHint": "目标/翻译语言(如:中文)",
|
||||
"process": "处理图片",
|
||||
"processing": "处理中...",
|
||||
"preview": "预览",
|
||||
"extractedPairs": "提取的词汇对",
|
||||
"word": "单词",
|
||||
"definition": "释义",
|
||||
"pairsCount": "已提取 {count} 个词汇对",
|
||||
"savePairs": "保存到文件夹",
|
||||
"saving": "保存中...",
|
||||
"saved": "成功将 {count} 个词汇对保存到 {folder}",
|
||||
"saveFailed": "保存失败",
|
||||
"noImage": "请先上传图片",
|
||||
"noFolder": "请选择文件夹",
|
||||
"processingFailed": "OCR 处理失败",
|
||||
"tryAgain": "请尝试上传更清晰的图片",
|
||||
"detectedLanguages": "检测到:{source} → {target}"
|
||||
},
|
||||
"profile": {
|
||||
"myProfile": "我的个人资料",
|
||||
"email": "邮箱:{email}",
|
||||
|
||||
253
src/app/(features)/ocr/OCRClient.tsx
Normal file
253
src/app/(features)/ocr/OCRClient.tsx
Normal file
@@ -0,0 +1,253 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback, useRef } from "react";
|
||||
import { useTranslations } from "next-intl";
|
||||
import { PageLayout } from "@/components/ui/PageLayout";
|
||||
import { PrimaryButton, LightButton } from "@/design-system/base/button";
|
||||
import { Input } from "@/design-system/base/input";
|
||||
import { Select } from "@/design-system/base/select";
|
||||
import { Card } from "@/design-system/base/card";
|
||||
import { toast } from "sonner";
|
||||
import { Upload, FileImage, Loader2 } from "lucide-react";
|
||||
import { actionProcessOCR } from "@/modules/ocr/ocr-action";
|
||||
import { TSharedFolder } from "@/shared/folder-type";
|
||||
import { OCROutput } from "@/lib/bigmodel/ocr/types";
|
||||
|
||||
interface OCRClientProps {
|
||||
initialFolders: TSharedFolder[];
|
||||
}
|
||||
|
||||
export function OCRClient({ initialFolders }: OCRClientProps) {
|
||||
const t = useTranslations("ocr");
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
const [selectedFile, setSelectedFile] = useState<File | null>(null);
|
||||
const [previewUrl, setPreviewUrl] = useState<string | null>(null);
|
||||
const [selectedFolderId, setSelectedFolderId] = useState<number | null>(
|
||||
initialFolders.length > 0 ? initialFolders[0].id : null
|
||||
);
|
||||
const [sourceLanguage, setSourceLanguage] = useState<string>("");
|
||||
const [targetLanguage, setTargetLanguage] = useState<string>("");
|
||||
const [isProcessing, setIsProcessing] = useState(false);
|
||||
const [ocrResult, setOcrResult] = useState<OCROutput | null>(null);
|
||||
|
||||
const handleFileChange = useCallback((file: File | null) => {
|
||||
if (!file) return;
|
||||
|
||||
if (!file.type.startsWith("image/")) {
|
||||
toast.error(t("processingFailed"));
|
||||
return;
|
||||
}
|
||||
|
||||
const url = URL.createObjectURL(file);
|
||||
setPreviewUrl(url);
|
||||
setSelectedFile(file);
|
||||
setOcrResult(null);
|
||||
}, [t]);
|
||||
|
||||
const handleDrop = useCallback((e: React.DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
const file = e.dataTransfer.files[0];
|
||||
handleFileChange(file);
|
||||
}, [handleFileChange]);
|
||||
|
||||
const handleDragOver = useCallback((e: React.DragEvent<HTMLDivElement>) => {
|
||||
e.preventDefault();
|
||||
}, []);
|
||||
|
||||
const fileToBase64 = async (file: File): Promise<string> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
const result = reader.result as string;
|
||||
const base64 = result.split(",")[1];
|
||||
resolve(base64);
|
||||
};
|
||||
reader.onerror = reject;
|
||||
reader.readAsDataURL(file);
|
||||
});
|
||||
};
|
||||
|
||||
const handleProcess = async () => {
|
||||
if (!selectedFile) {
|
||||
toast.error(t("noImage"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!selectedFolderId) {
|
||||
toast.error(t("noFolder"));
|
||||
return;
|
||||
}
|
||||
|
||||
setIsProcessing(true);
|
||||
setOcrResult(null);
|
||||
|
||||
try {
|
||||
const base64 = await fileToBase64(selectedFile);
|
||||
|
||||
const result = await actionProcessOCR({
|
||||
imageBase64: base64,
|
||||
folderId: selectedFolderId,
|
||||
sourceLanguage: sourceLanguage || undefined,
|
||||
targetLanguage: targetLanguage || undefined,
|
||||
});
|
||||
|
||||
if (result.success) {
|
||||
const folderName = initialFolders.find(f => f.id === selectedFolderId)?.name || "";
|
||||
toast.success(t("saved", { count: result.data?.pairsCreated ?? 0, folder: folderName }));
|
||||
} else {
|
||||
toast.error(result.message || t("processingFailed"));
|
||||
}
|
||||
} catch {
|
||||
toast.error(t("processingFailed"));
|
||||
} finally {
|
||||
setIsProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const clearImage = () => {
|
||||
if (previewUrl) {
|
||||
URL.revokeObjectURL(previewUrl);
|
||||
}
|
||||
setPreviewUrl(null);
|
||||
setSelectedFile(null);
|
||||
setOcrResult(null);
|
||||
if (fileInputRef.current) {
|
||||
fileInputRef.current.value = "";
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<PageLayout>
|
||||
<div className="text-center mb-6">
|
||||
<h1 className="text-3xl font-bold text-gray-800 mb-2">{t("title")}</h1>
|
||||
<p className="text-gray-600">{t("description")}</p>
|
||||
</div>
|
||||
|
||||
<div className="space-y-6">
|
||||
<Card variant="bordered" padding="lg">
|
||||
<div className="space-y-4">
|
||||
<div className="font-semibold text-gray-800 flex items-center gap-2">
|
||||
<Upload className="w-5 h-5" />
|
||||
{t("uploadImage")}
|
||||
</div>
|
||||
|
||||
<div
|
||||
className={`border-2 border-dashed rounded-lg p-8 text-center cursor-pointer transition-colors ${
|
||||
previewUrl
|
||||
? "border-primary-300 bg-primary-50"
|
||||
: "border-gray-300 hover:border-primary-400 hover:bg-gray-50"
|
||||
}`}
|
||||
onDrop={handleDrop}
|
||||
onDragOver={handleDragOver}
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
>
|
||||
{previewUrl ? (
|
||||
<div className="space-y-3">
|
||||
<img
|
||||
src={previewUrl}
|
||||
alt="Preview"
|
||||
className="max-h-64 mx-auto rounded-lg shadow-md"
|
||||
/>
|
||||
<div className="flex justify-center gap-2">
|
||||
<LightButton
|
||||
type="button"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
clearImage();
|
||||
}}
|
||||
>
|
||||
{t("uploadImage")}
|
||||
</LightButton>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3 text-gray-500">
|
||||
<FileImage className="w-12 h-12 mx-auto text-gray-400" />
|
||||
<p>{t("dragDropHint")}</p>
|
||||
<p className="text-sm">{t("supportedFormats")}</p>
|
||||
</div>
|
||||
)}
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="image/*"
|
||||
className="hidden"
|
||||
onChange={(e) => handleFileChange(e.target.files?.[0] || null)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
|
||||
<Card variant="bordered" padding="lg">
|
||||
<div className="space-y-4">
|
||||
<div className="font-semibold text-gray-800">{t("selectFolder")}</div>
|
||||
|
||||
{initialFolders.length > 0 ? (
|
||||
<Select
|
||||
value={selectedFolderId?.toString() || ""}
|
||||
onChange={(e) => setSelectedFolderId(Number(e.target.value))}
|
||||
className="w-full"
|
||||
>
|
||||
{initialFolders.map((folder) => (
|
||||
<option key={folder.id} value={folder.id}>
|
||||
{folder.name}
|
||||
</option>
|
||||
))}
|
||||
</Select>
|
||||
) : (
|
||||
<p className="text-gray-500 text-sm">{t("noFolders")}</p>
|
||||
)}
|
||||
</div>
|
||||
</Card>
|
||||
|
||||
<Card variant="bordered" padding="lg">
|
||||
<div className="space-y-4">
|
||||
<div className="font-semibold text-gray-800">{t("languageHints")}</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div>
|
||||
<label className="text-sm text-gray-600 block mb-1">
|
||||
{t("sourceLanguageHint")}
|
||||
</label>
|
||||
<Input
|
||||
value={sourceLanguage}
|
||||
onChange={(e) => setSourceLanguage(e.target.value)}
|
||||
placeholder="English"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="text-sm text-gray-600 block mb-1">
|
||||
{t("targetLanguageHint")}
|
||||
</label>
|
||||
<Input
|
||||
value={targetLanguage}
|
||||
onChange={(e) => setTargetLanguage(e.target.value)}
|
||||
placeholder="Chinese"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
|
||||
<div className="flex justify-center">
|
||||
<PrimaryButton
|
||||
onClick={handleProcess}
|
||||
disabled={isProcessing || !selectedFile || !selectedFolderId}
|
||||
size="lg"
|
||||
className="px-8"
|
||||
>
|
||||
{isProcessing ? (
|
||||
<>
|
||||
<Loader2 className="w-5 h-5 mr-2 animate-spin" />
|
||||
{t("processing")}
|
||||
</>
|
||||
) : (
|
||||
t("process")
|
||||
)}
|
||||
</PrimaryButton>
|
||||
</div>
|
||||
</div>
|
||||
</PageLayout>
|
||||
);
|
||||
}
|
||||
20
src/app/(features)/ocr/page.tsx
Normal file
20
src/app/(features)/ocr/page.tsx
Normal file
@@ -0,0 +1,20 @@
|
||||
import { OCRClient } from "./OCRClient";
|
||||
import { auth } from "@/auth";
|
||||
import { headers } from "next/headers";
|
||||
import { actionGetFoldersByUserId } from "@/modules/folder/folder-action";
|
||||
import { TSharedFolder } from "@/shared/folder-type";
|
||||
|
||||
export default async function OCRPage() {
|
||||
const session = await auth.api.getSession({ headers: await headers() });
|
||||
|
||||
let folders: TSharedFolder[] = [];
|
||||
|
||||
if (session?.user?.id) {
|
||||
const result = await actionGetFoldersByUserId(session.user.id as string);
|
||||
if (result.success && result.data) {
|
||||
folders = result.data;
|
||||
}
|
||||
}
|
||||
|
||||
return <OCRClient initialFolders={folders} />;
|
||||
}
|
||||
152
src/lib/bigmodel/ocr/orchestrator.ts
Normal file
152
src/lib/bigmodel/ocr/orchestrator.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
import OpenAI from "openai";
|
||||
import { parseAIGeneratedJSON } from "@/utils/json";
|
||||
import { createLogger } from "@/lib/logger";
|
||||
import { OCRInput, OCROutput, OCRRawResponse } from "./types";
|
||||
|
||||
const log = createLogger("ocr-orchestrator");
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.ZHIPU_API_KEY,
|
||||
baseURL: "https://open.bigmodel.cn/api/paas/v4",
|
||||
});
|
||||
|
||||
/**
|
||||
* Executes OCR on an image to extract vocabulary word-definition pairs.
|
||||
*
|
||||
* Uses GLM-4.6V vision model to analyze vocabulary table images and
|
||||
* extract structured word-definition pairs.
|
||||
*
|
||||
* @param input - OCR input containing base64 image and optional language hints
|
||||
* @returns Structured output with extracted pairs and detected languages
|
||||
* @throws Error if OCR fails or response is malformed
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const result = await executeOCR({
|
||||
* imageBase64: "iVBORw0KGgo...",
|
||||
* sourceLanguage: "English",
|
||||
* targetLanguage: "Chinese"
|
||||
* });
|
||||
* // result.pairs: [{ word: "hello", definition: "你好" }, ...]
|
||||
* ```
|
||||
*/
|
||||
export async function executeOCR(input: OCRInput): Promise<OCROutput> {
|
||||
const { imageBase64, sourceLanguage, targetLanguage } = input;
|
||||
|
||||
log.debug("Starting OCR", {
|
||||
hasSourceHint: !!sourceLanguage,
|
||||
hasTargetHint: !!targetLanguage,
|
||||
imageLength: imageBase64.length,
|
||||
});
|
||||
|
||||
const languageHints: string[] = [];
|
||||
if (sourceLanguage) {
|
||||
languageHints.push(`源语言提示: ${sourceLanguage}`);
|
||||
}
|
||||
if (targetLanguage) {
|
||||
languageHints.push(`目标语言提示: ${targetLanguage}`);
|
||||
}
|
||||
|
||||
const prompt = `
|
||||
你是一个专业的OCR识别助手,专门从词汇表截图中提取单词和释义。
|
||||
|
||||
${languageHints.length > 0 ? `语言提示:\n${languageHints.join("\n")}\n` : ""}
|
||||
|
||||
你的任务是分析图片中的词汇表,提取所有单词-释义对。
|
||||
|
||||
要求:
|
||||
1. 识别图片中的词汇表结构(可能是两列或多列)
|
||||
2. 提取每一行的单词和对应的释义/翻译
|
||||
3. 自动检测源语言和目标语言
|
||||
4. 保持原始大小写和拼写
|
||||
5. 如果图片模糊或不清晰,尽力识别并标注置信度较低的项目
|
||||
6. 忽略表头、页码等非词汇内容
|
||||
|
||||
返回 JSON 格式:
|
||||
{
|
||||
"pairs": [
|
||||
{ "word": "单词1", "definition": "释义1" },
|
||||
{ "word": "单词2", "definition": "释义2" }
|
||||
],
|
||||
"detectedSourceLanguage": "检测到的源语言",
|
||||
"detectedTargetLanguage": "检测到的目标语言"
|
||||
}
|
||||
|
||||
只返回 JSON,不要任何其他文字。
|
||||
`.trim();
|
||||
|
||||
try {
|
||||
const response = await openai.chat.completions.create({
|
||||
model: "glm-4.6v",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: imageBase64,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: prompt,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature: 0.1,
|
||||
});
|
||||
|
||||
const content = response.choices[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
log.error("OCR returned empty response");
|
||||
throw new Error("OCR 返回空响应");
|
||||
}
|
||||
|
||||
log.debug("Received OCR response", { contentLength: content.length });
|
||||
|
||||
const parsed = parseAIGeneratedJSON<OCRRawResponse>(content);
|
||||
|
||||
if (!parsed.pairs || !Array.isArray(parsed.pairs)) {
|
||||
log.error("Invalid OCR response: missing or invalid pairs array", { parsed });
|
||||
throw new Error("OCR 响应格式无效:缺少 pairs 数组");
|
||||
}
|
||||
|
||||
const validPairs = parsed.pairs.filter((pair) => {
|
||||
const isValid = typeof pair.word === "string" && typeof pair.definition === "string";
|
||||
if (!isValid) {
|
||||
log.warn("Skipping invalid pair", { pair });
|
||||
}
|
||||
return isValid;
|
||||
});
|
||||
|
||||
if (validPairs.length === 0) {
|
||||
log.error("No valid pairs extracted from image");
|
||||
throw new Error("未能从图片中提取有效的词汇对");
|
||||
}
|
||||
|
||||
const result: OCROutput = {
|
||||
pairs: validPairs,
|
||||
detectedSourceLanguage: parsed.detectedSourceLanguage,
|
||||
detectedTargetLanguage: parsed.detectedTargetLanguage,
|
||||
};
|
||||
|
||||
log.info("OCR completed successfully", {
|
||||
pairCount: result.pairs.length,
|
||||
sourceLanguage: result.detectedSourceLanguage,
|
||||
targetLanguage: result.detectedTargetLanguage,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.startsWith("OCR")) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
log.error("OCR failed", { error });
|
||||
const errorMessage = error instanceof Error ? error.message : "未知错误";
|
||||
throw new Error(`OCR 处理失败: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
44
src/lib/bigmodel/ocr/types.ts
Normal file
44
src/lib/bigmodel/ocr/types.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Input for OCR pipeline
|
||||
*/
|
||||
export interface OCRInput {
|
||||
/** Base64 encoded image (without data URL prefix) */
|
||||
imageBase64: string;
|
||||
/** Optional: hint about source language */
|
||||
sourceLanguage?: string;
|
||||
/** Optional: hint about target/translation language */
|
||||
targetLanguage?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Single word-definition pair extracted from image
|
||||
*/
|
||||
export interface VocabularyPair {
|
||||
/** The original word */
|
||||
word: string;
|
||||
/** The translation/definition */
|
||||
definition: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Output from OCR pipeline
|
||||
*/
|
||||
export interface OCROutput {
|
||||
/** Extracted word-definition pairs */
|
||||
pairs: VocabularyPair[];
|
||||
/** Detected source language */
|
||||
detectedSourceLanguage?: string;
|
||||
/** Detected target/translation language */
|
||||
detectedTargetLanguage?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal structure for AI response parsing
|
||||
*/
|
||||
interface OCRRawResponse {
|
||||
pairs: Array<{ word: string; definition: string }>;
|
||||
detectedSourceLanguage?: string;
|
||||
detectedTargetLanguage?: string;
|
||||
}
|
||||
|
||||
export type { OCRRawResponse };
|
||||
20
src/modules/ocr/ocr-action-dto.ts
Normal file
20
src/modules/ocr/ocr-action-dto.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { z } from "zod";
|
||||
|
||||
export const schemaActionInputProcessOCR = z.object({
|
||||
imageBase64: z.string().min(1, "Image is required"),
|
||||
folderId: z.number().int().positive("Folder ID must be positive"),
|
||||
sourceLanguage: z.string().optional(),
|
||||
targetLanguage: z.string().optional(),
|
||||
});
|
||||
|
||||
export type ActionInputProcessOCR = z.infer<typeof schemaActionInputProcessOCR>;
|
||||
|
||||
export interface ActionOutputProcessOCR {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data?: {
|
||||
pairsCreated: number;
|
||||
sourceLanguage?: string;
|
||||
targetLanguage?: string;
|
||||
};
|
||||
}
|
||||
25
src/modules/ocr/ocr-action.ts
Normal file
25
src/modules/ocr/ocr-action.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
"use server";
|
||||
|
||||
import { validate } from "@/utils/validate";
|
||||
import { ValidateError } from "@/lib/errors";
|
||||
import { createLogger } from "@/lib/logger";
|
||||
import { serviceProcessOCR } from "./ocr-service";
|
||||
import { schemaActionInputProcessOCR } from "./ocr-action-dto";
|
||||
import type { ActionOutputProcessOCR } from "./ocr-action-dto";
|
||||
|
||||
const log = createLogger("ocr-action");
|
||||
|
||||
export async function actionProcessOCR(
|
||||
input: unknown
|
||||
): Promise<ActionOutputProcessOCR> {
|
||||
try {
|
||||
const validatedInput = validate(input, schemaActionInputProcessOCR);
|
||||
return serviceProcessOCR(validatedInput);
|
||||
} catch (e) {
|
||||
if (e instanceof ValidateError) {
|
||||
return { success: false, message: e.message };
|
||||
}
|
||||
log.error("OCR action failed", { error: e });
|
||||
return { success: false, message: "Unknown error occurred." };
|
||||
}
|
||||
}
|
||||
1
src/modules/ocr/ocr-repository-dto.ts
Normal file
1
src/modules/ocr/ocr-repository-dto.ts
Normal file
@@ -0,0 +1 @@
|
||||
export type { RepoInputCreatePair } from "@/modules/folder/folder-repository-dto";
|
||||
5
src/modules/ocr/ocr-repository.ts
Normal file
5
src/modules/ocr/ocr-repository.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
import { repoCreatePair, repoGetUserIdByFolderId } from "@/modules/folder/folder-repository";
|
||||
import type { RepoInputCreatePair } from "./ocr-repository-dto";
|
||||
|
||||
export { repoCreatePair, repoGetUserIdByFolderId };
|
||||
export type { RepoInputCreatePair };
|
||||
20
src/modules/ocr/ocr-service-dto.ts
Normal file
20
src/modules/ocr/ocr-service-dto.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { z } from "zod";
|
||||
|
||||
export const schemaServiceInputProcessOCR = z.object({
|
||||
imageBase64: z.string().min(1, "Image is required"),
|
||||
folderId: z.number().int().positive("Folder ID must be positive"),
|
||||
sourceLanguage: z.string().optional(),
|
||||
targetLanguage: z.string().optional(),
|
||||
});
|
||||
|
||||
export type ServiceInputProcessOCR = z.infer<typeof schemaServiceInputProcessOCR>;
|
||||
|
||||
export interface ServiceOutputProcessOCR {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data?: {
|
||||
pairsCreated: number;
|
||||
sourceLanguage?: string;
|
||||
targetLanguage?: string;
|
||||
};
|
||||
}
|
||||
96
src/modules/ocr/ocr-service.ts
Normal file
96
src/modules/ocr/ocr-service.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
"use server";
|
||||
|
||||
import { executeOCR } from "@/lib/bigmodel/ocr/orchestrator";
|
||||
import { repoCreatePair, repoGetUserIdByFolderId } from "@/modules/folder/folder-repository";
|
||||
import { auth } from "@/auth";
|
||||
import { headers } from "next/headers";
|
||||
import { createLogger } from "@/lib/logger";
|
||||
import type { ServiceInputProcessOCR, ServiceOutputProcessOCR } from "./ocr-service-dto";
|
||||
|
||||
const log = createLogger("ocr-service");
|
||||
|
||||
export async function serviceProcessOCR(
|
||||
input: ServiceInputProcessOCR
|
||||
): Promise<ServiceOutputProcessOCR> {
|
||||
log.info("Processing OCR request", { folderId: input.folderId });
|
||||
|
||||
const session = await auth.api.getSession({ headers: await headers() });
|
||||
if (!session?.user?.id) {
|
||||
log.warn("Unauthorized OCR attempt");
|
||||
return { success: false, message: "Unauthorized" };
|
||||
}
|
||||
|
||||
const folderOwner = await repoGetUserIdByFolderId(input.folderId);
|
||||
if (folderOwner !== session.user.id) {
|
||||
log.warn("Folder ownership mismatch", {
|
||||
folderId: input.folderId,
|
||||
userId: session.user.id
|
||||
});
|
||||
return {
|
||||
success: false,
|
||||
message: "You don't have permission to modify this folder"
|
||||
};
|
||||
}
|
||||
|
||||
let ocrResult;
|
||||
try {
|
||||
log.debug("Calling OCR pipeline");
|
||||
ocrResult = await executeOCR({
|
||||
imageBase64: input.imageBase64,
|
||||
sourceLanguage: input.sourceLanguage,
|
||||
targetLanguage: input.targetLanguage,
|
||||
});
|
||||
} catch (error) {
|
||||
log.error("OCR pipeline failed", { error });
|
||||
return {
|
||||
success: false,
|
||||
message: "Failed to process image. Please try again."
|
||||
};
|
||||
}
|
||||
|
||||
if (!ocrResult.pairs || ocrResult.pairs.length === 0) {
|
||||
log.info("No vocabulary pairs extracted from image");
|
||||
return {
|
||||
success: false,
|
||||
message: "No vocabulary pairs could be extracted from the image"
|
||||
};
|
||||
}
|
||||
|
||||
const sourceLanguage = ocrResult.detectedSourceLanguage || input.sourceLanguage || "Unknown";
|
||||
const targetLanguage = ocrResult.detectedTargetLanguage || input.targetLanguage || "Unknown";
|
||||
|
||||
let pairsCreated = 0;
|
||||
for (const pair of ocrResult.pairs) {
|
||||
try {
|
||||
await repoCreatePair({
|
||||
folderId: input.folderId,
|
||||
language1: sourceLanguage,
|
||||
language2: targetLanguage,
|
||||
text1: pair.word,
|
||||
text2: pair.definition,
|
||||
});
|
||||
pairsCreated++;
|
||||
} catch (error) {
|
||||
log.error("Failed to create pair", {
|
||||
word: pair.word,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
log.info("OCR processing complete", {
|
||||
pairsCreated,
|
||||
sourceLanguage,
|
||||
targetLanguage
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Successfully created ${pairsCreated} vocabulary pairs`,
|
||||
data: {
|
||||
pairsCreated,
|
||||
sourceLanguage,
|
||||
targetLanguage,
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user