feat: 添加 OCR 词汇提取功能
新增 OCR 页面,用户可上传教材词汇表截图,使用 GLM-4.6V 视觉模型 提取单词-释义对并保存到指定文件夹。 - AI 管道: src/lib/bigmodel/ocr/ (orchestrator, types) - 后端模块: src/modules/ocr/ (action-service-repository 架构) - 前端页面: src/app/(features)/ocr/ (拖拽上传、folder 选择) - i18n: 8 种语言翻译支持
This commit is contained in:
20
src/modules/ocr/ocr-action-dto.ts
Normal file
20
src/modules/ocr/ocr-action-dto.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { z } from "zod";
|
||||
|
||||
export const schemaActionInputProcessOCR = z.object({
|
||||
imageBase64: z.string().min(1, "Image is required"),
|
||||
folderId: z.number().int().positive("Folder ID must be positive"),
|
||||
sourceLanguage: z.string().optional(),
|
||||
targetLanguage: z.string().optional(),
|
||||
});
|
||||
|
||||
export type ActionInputProcessOCR = z.infer<typeof schemaActionInputProcessOCR>;
|
||||
|
||||
export interface ActionOutputProcessOCR {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data?: {
|
||||
pairsCreated: number;
|
||||
sourceLanguage?: string;
|
||||
targetLanguage?: string;
|
||||
};
|
||||
}
|
||||
25
src/modules/ocr/ocr-action.ts
Normal file
25
src/modules/ocr/ocr-action.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
"use server";
|
||||
|
||||
import { validate } from "@/utils/validate";
|
||||
import { ValidateError } from "@/lib/errors";
|
||||
import { createLogger } from "@/lib/logger";
|
||||
import { serviceProcessOCR } from "./ocr-service";
|
||||
import { schemaActionInputProcessOCR } from "./ocr-action-dto";
|
||||
import type { ActionOutputProcessOCR } from "./ocr-action-dto";
|
||||
|
||||
const log = createLogger("ocr-action");
|
||||
|
||||
export async function actionProcessOCR(
|
||||
input: unknown
|
||||
): Promise<ActionOutputProcessOCR> {
|
||||
try {
|
||||
const validatedInput = validate(input, schemaActionInputProcessOCR);
|
||||
return serviceProcessOCR(validatedInput);
|
||||
} catch (e) {
|
||||
if (e instanceof ValidateError) {
|
||||
return { success: false, message: e.message };
|
||||
}
|
||||
log.error("OCR action failed", { error: e });
|
||||
return { success: false, message: "Unknown error occurred." };
|
||||
}
|
||||
}
|
||||
1
src/modules/ocr/ocr-repository-dto.ts
Normal file
1
src/modules/ocr/ocr-repository-dto.ts
Normal file
@@ -0,0 +1 @@
|
||||
export type { RepoInputCreatePair } from "@/modules/folder/folder-repository-dto";
|
||||
5
src/modules/ocr/ocr-repository.ts
Normal file
5
src/modules/ocr/ocr-repository.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
import { repoCreatePair, repoGetUserIdByFolderId } from "@/modules/folder/folder-repository";
|
||||
import type { RepoInputCreatePair } from "./ocr-repository-dto";
|
||||
|
||||
export { repoCreatePair, repoGetUserIdByFolderId };
|
||||
export type { RepoInputCreatePair };
|
||||
20
src/modules/ocr/ocr-service-dto.ts
Normal file
20
src/modules/ocr/ocr-service-dto.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { z } from "zod";
|
||||
|
||||
export const schemaServiceInputProcessOCR = z.object({
|
||||
imageBase64: z.string().min(1, "Image is required"),
|
||||
folderId: z.number().int().positive("Folder ID must be positive"),
|
||||
sourceLanguage: z.string().optional(),
|
||||
targetLanguage: z.string().optional(),
|
||||
});
|
||||
|
||||
export type ServiceInputProcessOCR = z.infer<typeof schemaServiceInputProcessOCR>;
|
||||
|
||||
export interface ServiceOutputProcessOCR {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data?: {
|
||||
pairsCreated: number;
|
||||
sourceLanguage?: string;
|
||||
targetLanguage?: string;
|
||||
};
|
||||
}
|
||||
96
src/modules/ocr/ocr-service.ts
Normal file
96
src/modules/ocr/ocr-service.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
"use server";
|
||||
|
||||
import { executeOCR } from "@/lib/bigmodel/ocr/orchestrator";
|
||||
import { repoCreatePair, repoGetUserIdByFolderId } from "@/modules/folder/folder-repository";
|
||||
import { auth } from "@/auth";
|
||||
import { headers } from "next/headers";
|
||||
import { createLogger } from "@/lib/logger";
|
||||
import type { ServiceInputProcessOCR, ServiceOutputProcessOCR } from "./ocr-service-dto";
|
||||
|
||||
const log = createLogger("ocr-service");
|
||||
|
||||
export async function serviceProcessOCR(
|
||||
input: ServiceInputProcessOCR
|
||||
): Promise<ServiceOutputProcessOCR> {
|
||||
log.info("Processing OCR request", { folderId: input.folderId });
|
||||
|
||||
const session = await auth.api.getSession({ headers: await headers() });
|
||||
if (!session?.user?.id) {
|
||||
log.warn("Unauthorized OCR attempt");
|
||||
return { success: false, message: "Unauthorized" };
|
||||
}
|
||||
|
||||
const folderOwner = await repoGetUserIdByFolderId(input.folderId);
|
||||
if (folderOwner !== session.user.id) {
|
||||
log.warn("Folder ownership mismatch", {
|
||||
folderId: input.folderId,
|
||||
userId: session.user.id
|
||||
});
|
||||
return {
|
||||
success: false,
|
||||
message: "You don't have permission to modify this folder"
|
||||
};
|
||||
}
|
||||
|
||||
let ocrResult;
|
||||
try {
|
||||
log.debug("Calling OCR pipeline");
|
||||
ocrResult = await executeOCR({
|
||||
imageBase64: input.imageBase64,
|
||||
sourceLanguage: input.sourceLanguage,
|
||||
targetLanguage: input.targetLanguage,
|
||||
});
|
||||
} catch (error) {
|
||||
log.error("OCR pipeline failed", { error });
|
||||
return {
|
||||
success: false,
|
||||
message: "Failed to process image. Please try again."
|
||||
};
|
||||
}
|
||||
|
||||
if (!ocrResult.pairs || ocrResult.pairs.length === 0) {
|
||||
log.info("No vocabulary pairs extracted from image");
|
||||
return {
|
||||
success: false,
|
||||
message: "No vocabulary pairs could be extracted from the image"
|
||||
};
|
||||
}
|
||||
|
||||
const sourceLanguage = ocrResult.detectedSourceLanguage || input.sourceLanguage || "Unknown";
|
||||
const targetLanguage = ocrResult.detectedTargetLanguage || input.targetLanguage || "Unknown";
|
||||
|
||||
let pairsCreated = 0;
|
||||
for (const pair of ocrResult.pairs) {
|
||||
try {
|
||||
await repoCreatePair({
|
||||
folderId: input.folderId,
|
||||
language1: sourceLanguage,
|
||||
language2: targetLanguage,
|
||||
text1: pair.word,
|
||||
text2: pair.definition,
|
||||
});
|
||||
pairsCreated++;
|
||||
} catch (error) {
|
||||
log.error("Failed to create pair", {
|
||||
word: pair.word,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
log.info("OCR processing complete", {
|
||||
pairsCreated,
|
||||
sourceLanguage,
|
||||
targetLanguage
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Successfully created ${pairsCreated} vocabulary pairs`,
|
||||
data: {
|
||||
pairsCreated,
|
||||
sourceLanguage,
|
||||
targetLanguage,
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user