KevanSoon commited on
Commit
ed6290e
·
1 Parent(s): 688bfaa

added java dockerfile

Browse files
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. __pycache__/app.cpython-310.pyc +0 -0
  3. app.py +0 -995
  4. requirements.txt +0 -100
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build Spring Boot app with Maven
2
+ FROM maven:3.9.6-eclipse-temurin-17 AS build
3
+ WORKDIR /app
4
+
5
+ # Copy project files
6
+ COPY pom.xml .
7
+ COPY src ./src
8
+
9
+ # Build the Spring Boot JAR (skip tests to save time)
10
+ RUN mvn clean package -DskipTests
11
+
12
+ # Stage 2: Run the app
13
+ FROM openjdk:17-jdk-slim
14
+ WORKDIR /app
15
+
16
+ # Copy the JAR from the build stage
17
+ COPY --from=build /app/target/*.jar app.jar
18
+
19
+ # Hugging Face Spaces requires exposing port 7860
20
+ EXPOSE 7860
21
+
22
+ # Run Spring Boot on port 7860
23
+ ENTRYPOINT ["java","-jar","app.jar","--server.port=7860"]
__pycache__/app.cpython-310.pyc DELETED
Binary file (1.07 kB)
 
app.py DELETED
@@ -1,995 +0,0 @@
1
- # backend.py
2
-
3
- import base64
4
- import json
5
- import asyncio
6
- import re
7
- import os
8
- import html
9
- import requests
10
- import httpx
11
- import uuid
12
- import tempfile
13
- import io
14
- import traceback
15
- import atexit
16
- import functools
17
- from queue import Queue
18
- from threading import Event, Thread
19
-
20
- # beautifulsoup
21
- from bs4 import BeautifulSoup
22
-
23
- # fastapi
24
- from fastapi import FastAPI, File, Form, UploadFile, HTTPException, Request, Header
25
- from fastapi.middleware.cors import CORSMiddleware
26
- from fastapi.responses import JSONResponse, HTMLResponse
27
- from fastapi import Depends
28
- from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
29
-
30
- # pydantic
31
- from pydantic import BaseModel
32
-
33
- # requests
34
- from requests.exceptions import RequestException
35
-
36
- # dotenv
37
- from dotenv import load_dotenv
38
-
39
- # google
40
- import google.generativeai as genai
41
- from google.api_core import exceptions as google_exceptions
42
-
43
- # gradio
44
- from gradio_client import Client, handle_file
45
-
46
- # pillow
47
- from PIL import Image
48
-
49
- # pytesseract
50
- import pytesseract
51
- # from auth.clerk import verify_clerk_jwt
52
-
53
- # --- MODIFIED: Replaced old tool imports with the new one ---
54
- # from tools.tools import analyze_contract
55
-
56
- #numpy and paddleocr
57
- import numpy as np
58
- from paddleocr import PaddleOCR
59
-
60
-
61
-
62
-
63
-
64
- app = FastAPI(
65
- title="Document Translator (Final Architecture)",
66
- description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
67
- version="10.0.1", # Final Architecture, patched
68
- )
69
-
70
- # Allow requests from the default React frontend port
71
- app.add_middleware(
72
- CORSMiddleware,
73
- allow_origins=["https://fair-work-contract.vercel.app"], # or ["*"] for all origins
74
- allow_credentials=True,
75
- allow_methods=["*"],
76
- allow_headers=["*"],
77
- )
78
-
79
-
80
- security = HTTPBearer()
81
- # Load environment variables from a .env file
82
- load_dotenv()
83
-
84
- SUPABASE_URL = os.getenv("SUPABASE_URL")
85
- SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
86
-
87
-
88
- # --- START: NEW ENDPOINT FOR THE REFACTORED TOOL ---
89
-
90
-
91
- # @app.post("/api/analyze_contract")
92
- # async def analyze_contract_endpoint(file: UploadFile = File(...)):
93
- # """
94
- # Receives an uploaded HTML contract, analyzes it to extract key clauses
95
- # and language, and returns a structured JSON response containing a
96
- # user-friendly HTML summary sheet.
97
- # """
98
- # # 1. Validate file type
99
- # if file.content_type != "text/html":
100
- # raise HTTPException(
101
- # status_code=400, detail="Unsupported file type. Please upload a .html file."
102
- # )
103
-
104
- # try:
105
- # # 2. Read HTML content from the uploaded file
106
- # html_content_bytes = await file.read()
107
- # html_content = html_content_bytes.decode("utf-8")
108
-
109
- # # 3. Call the new, powerful analysis tool
110
- # analysis_results = await analyze_contract(html_content)
111
-
112
- # # 4. Handle potential errors returned from the tool
113
- # if "error" in analysis_results:
114
- # # Use a 500 server error for tool-side failures
115
- # raise HTTPException(status_code=500, detail=analysis_results["error"])
116
-
117
- # # 5. Return the successful analysis results
118
- # # FastAPI will automatically convert the dictionary to a JSON response
119
- # return analysis_results
120
-
121
- # except Exception as e:
122
- # # Catch any other unexpected errors during file processing or the API call
123
- # raise HTTPException(
124
- # status_code=500, detail=f"An unexpected server error occurred: {str(e)}"
125
- # )
126
-
127
-
128
- # @app.post("/upload")
129
- # async def upload_file(authorization: str = Header(...), file: UploadFile = File(...)):
130
- # if not authorization.startswith("Bearer "):
131
- # raise HTTPException(status_code=401, detail="Missing Bearer token")
132
-
133
- # token = authorization.split(" ")[1]
134
- # claims = await verify_clerk_jwt(token)
135
-
136
- # user_id = claims.get("sub") # Clerk user ID
137
- # filename = f"{user_id}/{uuid.uuid4()}.png"
138
-
139
- # # Upload to Supabase Storage
140
- # async with httpx.AsyncClient() as client:
141
- # upload_resp = await client.post(
142
- # f"{SUPABASE_URL}/storage/v1/object/user-documents/{filename}",
143
- # headers={
144
- # "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
145
- # "Content-Type": file.content_type,
146
- # },
147
- # content=await file.read(),
148
- # )
149
-
150
- # if upload_resp.status_code != 200:
151
- # raise HTTPException(
152
- # status_code=500, detail="Failed to upload to Supabase Storage"
153
- # )
154
-
155
- # file_url = f"user-documents/{filename}"
156
-
157
- # # Insert metadata to `documents` table
158
- # async with httpx.AsyncClient() as client:
159
- # insert_resp = await client.post(
160
- # f"{SUPABASE_URL}/rest/v1/documents",
161
- # headers={
162
- # "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
163
- # "apikey": SUPABASE_SERVICE_ROLE_KEY,
164
- # "Content-Type": "application/json",
165
- # "Prefer": "return=representation",
166
- # },
167
- # json={
168
- # "user_id": user_id,
169
- # "filename": filename.split("/")[-1],
170
- # "file_url": file_url,
171
- # },
172
- # )
173
-
174
- # if insert_resp.status_code >= 300:
175
- # raise HTTPException(
176
- # status_code=500, detail="Failed to insert document metadata"
177
- # )
178
-
179
- # return {"message": f"File uploaded as {filename}"}
180
-
181
-
182
- # @app.get("/api/documents")
183
- # async def get_user_documents(
184
- # credentials: HTTPAuthorizationCredentials = Depends(security),
185
- # ):
186
- # token = credentials.credentials
187
- # claims = await verify_clerk_jwt(token)
188
- # user_id = claims.get("sub")
189
- # if not user_id:
190
- # raise HTTPException(status_code=401, detail="Invalid user")
191
-
192
- # # Step 1: Get documents from Supabase
193
- # async with httpx.AsyncClient() as client:
194
- # resp = await client.get(
195
- # f"{SUPABASE_URL}/rest/v1/documents?user_id=eq.{user_id}",
196
- # headers={
197
- # "apikey": SUPABASE_SERVICE_ROLE_KEY,
198
- # "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
199
- # "Accept": "application/json",
200
- # },
201
- # )
202
-
203
- # if resp.status_code != 200:
204
- # raise HTTPException(status_code=500, detail="Failed to fetch documents")
205
-
206
- # documents = resp.json()
207
-
208
- # # Step 2: Get signed URLs for each file
209
- # async with httpx.AsyncClient() as client:
210
- # for doc in documents:
211
- # file_path = doc["file_url"].split("user-documents/", 1)[-1]
212
- # if not file_path:
213
- # doc["signed_url"] = None
214
- # continue
215
-
216
- # signed_url_resp = await client.post(
217
- # f"{SUPABASE_URL}/storage/v1/object/sign/user-documents/{file_path}",
218
- # headers={
219
- # "apikey": SUPABASE_SERVICE_ROLE_KEY,
220
- # "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
221
- # # "Content-Type": "application/json"
222
- # },
223
- # json={"expiresIn": 3600}, # 1 hour
224
- # )
225
-
226
- # if signed_url_resp.status_code == 200:
227
- # print(
228
- # f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
229
- # )
230
- # doc["signed_url"] = (
231
- # f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
232
- # )
233
-
234
- # else:
235
- # doc["signed_url"] = None
236
- # print(documents)
237
-
238
- # return documents
239
-
240
-
241
- # --- END: NEW ENDPOINT FOR THE REFACTORED TOOL ---
242
-
243
-
244
- # testing clerk backend authentication
245
- # @app.post("/upload")
246
- # async def upload_file(
247
- # authorization: str = Header(...),
248
- # file: UploadFile = File(...)
249
- # ):
250
- # if not authorization.startswith("Bearer "):
251
- # raise HTTPException(status_code=401, detail="Missing Bearer token")
252
-
253
- # token = authorization.split(" ")[1]
254
- # claims = await verify_clerk_jwt(token)
255
-
256
- # user_id = claims.get("sub") # Clerk user ID
257
-
258
- # # ✅ Now the Clerk user is verified
259
- # # You can securely store this file, e.g., to Supabase or local
260
- # return {"message": f"File uploaded by Clerk user {user_id}"}
261
-
262
- #------------------------ start of gemini workflow ---------------------------------
263
-
264
- # This helper function for calling the Sea-Lion API is now UNUSED in the pipeline,
265
- # but is kept here as requested.
266
- # async def call_sealion_for_translation(text_to_translate: str, lang: str) -> str:
267
- # """Helper function to call the translation API for a single piece of text."""
268
- # if not text_to_translate.strip():
269
- # return "" # Don't send empty strings for translation
270
-
271
- # url = "https://api.sea-lion.ai/v1/chat/completions"
272
- # api_key = os.getenv("SEALION_API_KEY")
273
- # if not api_key:
274
- # print("Warning: SEALION_API_KEY not set. Skipping translation.")
275
- # return f"{text_to_translate} (Translation Skipped)"
276
-
277
- # headers = {
278
- # "Authorization": f"Bearer {api_key}",
279
- # "Content-Type": "application/json",
280
- # }
281
- # # Precise prompt for clean output
282
- # prompt = f'Translate the following text to {lang}. Return ONLY the translated text, without any additional explanations, formatting, or quotation marks:\n\n"{text_to_translate}"'
283
- # payload = {
284
- # "max_completion_tokens": 2048,
285
- # "messages": [{"role": "user", "content": prompt}],
286
- # "model": "aisingapore/Llama-SEA-LION-v3-70B-IT",
287
- # }
288
-
289
- # async with httpx.AsyncClient() as client:
290
- # try:
291
- # response = await client.post(
292
- # url, headers=headers, json=payload, timeout=45.0
293
- # )
294
- # response.raise_for_status()
295
- # response_json = response.json()
296
- # translated_text = response_json["choices"][0]["message"]["content"].strip()
297
- # # Clean up potential extra quotes that the model might add
298
- # return re.sub(r'^"|"$', "", translated_text)
299
- # except httpx.RequestError as e:
300
- # print(f"Translation request failed: {e}")
301
- # return f"Translation Error: {text_to_translate}"
302
- # except (KeyError, IndexError) as e:
303
- # print(f"Could not parse translation response: {e}")
304
- # return f"Translation Parsing Error: {text_to_translate}"
305
-
306
-
307
- # # --- NEW GEMINI TRANSLATION FUNCTION ---
308
-
309
- # async def translate_texts_with_gemini(texts: list[str], target_language: str) -> list[str]:
310
- # """
311
- # Translates a list of texts using Gemini in a single batch API call.
312
- # """
313
- # if not texts:
314
- # return []
315
-
316
- # try:
317
- # api_key = os.getenv("GEMINI_API_KEY")
318
- # if not api_key:
319
- # raise ValueError("GEMINI_API_KEY not found in environment variables.")
320
-
321
- # genai.configure(api_key=api_key)
322
- # model = genai.GenerativeModel(model_name="gemini-2.5-flash") # Using Flash for speed
323
-
324
- # # Create a single prompt asking for a JSON array response
325
- # prompt = f"""
326
- # Translate each string in the following JSON array of strings to {target_language}.
327
- # Return a single JSON array where each element is the translated string corresponding
328
- # to the original at the same index. Your output MUST be only the JSON array and nothing else.
329
-
330
- # Example Input:
331
- # ["Hello world", "How are you?"]
332
-
333
- # Example Output for target language 'Spanish':
334
- # ["Hola mundo", "¿Cómo estás?"]
335
-
336
- # Input for this task:
337
- # {json.dumps(texts)}
338
- # """
339
-
340
- # def do_request():
341
- # """Synchronous function to be run in a separate thread."""
342
- # response = model.generate_content(prompt)
343
- # return response.text.strip()
344
-
345
- # # Run the synchronous SDK call in a thread to avoid blocking asyncio
346
- # response_text = await asyncio.to_thread(do_request)
347
-
348
- # # Clean the response to ensure it's valid JSON
349
- # json_response_match = re.search(r'\[.*\]', response_text, re.DOTALL)
350
- # if not json_response_match:
351
- # print(f"Warning: Gemini did not return a valid JSON array. Response: {response_text}")
352
- # # Fallback: return original texts if parsing fails
353
- # return texts
354
-
355
- # cleaned_json = json_response_match.group(0)
356
- # translated_texts = json.loads(cleaned_json)
357
-
358
- # if len(translated_texts) != len(texts):
359
- # print(f"Warning: Mismatch in translation count. Expected {len(texts)}, got {len(translated_texts)}.")
360
- # # Fallback in case of length mismatch
361
- # return texts
362
-
363
- # return translated_texts
364
-
365
- # except Exception as e:
366
- # print(f"An error occurred during Gemini translation: {e}")
367
- # # Return original texts as a fallback
368
- # return texts
369
-
370
- # # --- OCR EXTRACTION FUNCTIONS ---
371
-
372
-
373
- # async def get_hocr_from_image(image_bytes: bytes) -> str:
374
- # """
375
- # Performs OCR using Tesseract to get raw hOCR HTML output.
376
- # This function accepts image bytes.
377
- # """
378
- # if not image_bytes:
379
- # raise ValueError("Image bytes cannot be empty.")
380
-
381
- # try:
382
- # image = Image.open(io.BytesIO(image_bytes))
383
- # except Exception as e:
384
- # raise HTTPException(
385
- # status_code=400,
386
- # detail=f"Cannot open image for Tesseract. It may be corrupted or unsupported. Error: {e}",
387
- # )
388
-
389
- # # Run Tesseract OCR in a thread to avoid blocking the asyncio event loop
390
- # loop = asyncio.get_running_loop()
391
- # hocr_bytes = await loop.run_in_executor(
392
- # None, lambda: pytesseract.image_to_pdf_or_hocr(image, extension="hocr")
393
- # )
394
- # return hocr_bytes.decode("utf-8")
395
-
396
-
397
- # async def extract_text_and_boxes_with_paddle(image_bytes: bytes) -> list[dict]:
398
- # """
399
- # Extracts text and their bounding boxes from an image using PaddleOCR.
400
- # Returns the full list of dictionary objects from the OCR tool.
401
- # """
402
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
403
- # temp_file.write(image_bytes)
404
- # temp_filepath = temp_file.name
405
-
406
- # try:
407
-
408
- # def do_ocr() -> list[dict]:
409
- # """Synchronous function to be run in a separate thread."""
410
- # client = Client("kevansoon/PaddleOCR")
411
- # # Returns a list of dictionaries, e.g., [{'text': '...', 'box': [...]}]
412
- # result = client.predict(
413
- # img=handle_file(temp_filepath),
414
- # lang="en",
415
- # api_name="/predict",
416
- # )
417
- # return result
418
-
419
- # loop = asyncio.get_running_loop()
420
- # extracted_data = await loop.run_in_executor(None, do_ocr)
421
- # if not extracted_data:
422
- # print("Warning: PaddleOCR returned no data.")
423
- # return []
424
- # return extracted_data
425
- # finally:
426
- # os.unlink(temp_filepath)
427
-
428
-
429
- # # --- TRANSLATION FUNCTIONS (UPDATED TO USE GEMINI) ---
430
-
431
-
432
- # async def translate_hocr_html_with_gemini(hocr_html: str, target_language: str) -> str:
433
- # """
434
- # Parses hOCR, translates all text in a single batch call to Gemini,
435
- # and injects translations back into the HTML.
436
- # """
437
- # soup = BeautifulSoup(hocr_html, "html.parser")
438
- # elements_to_translate = soup.find_all(class_="ocrx_word")
439
- # if not elements_to_translate:
440
- # elements_to_translate = soup.find_all(class_="ocr_line")
441
-
442
- # original_texts = [el.get_text(strip=True) for el in elements_to_translate]
443
-
444
- # # Translate all texts in one go
445
- # translated_texts = await translate_texts_with_gemini(original_texts, target_language)
446
-
447
- # # Inject translations back
448
- # for i, element in enumerate(elements_to_translate):
449
- # if element.string:
450
- # # Ensure we don't go out of bounds if translation failed
451
- # if i < len(translated_texts):
452
- # element.string.replace_with(translated_texts[i])
453
-
454
- # return str(soup)
455
-
456
-
457
- # async def translate_paddle_data_with_gemini(
458
- # paddle_data: list[dict], target_language: str
459
- # ) -> list[dict]:
460
- # """
461
- # Translates the 'text' field of each item in the paddle_data list
462
- # using a single batch call to Gemini.
463
- # """
464
- # original_texts = [item.get("text", "") for item in paddle_data]
465
-
466
- # # Translate all texts in one go
467
- # translated_texts = await translate_texts_with_gemini(original_texts, target_language)
468
-
469
- # translated_data = []
470
- # for i, item in enumerate(paddle_data):
471
- # # Ensure we don't go out of bounds if translation failed
472
- # translated_text = translated_texts[i] if i < len(translated_texts) else original_texts[i]
473
- # translated_data.append({"text": translated_text, "box": item.get("box")})
474
-
475
- # return translated_data
476
-
477
-
478
- # # --- FINAL HTML GENERATION ---
479
-
480
-
481
- # async def generate_html_from_dual_ocr(
482
- # translated_hocr_html: str, translated_paddle_data: list[dict]
483
- # ) -> str:
484
- # """
485
- # Receives translated hOCR and PaddleOCR data and uses Gemini to generate
486
- # a final, layout-aware HTML document.
487
- # """
488
- # try:
489
- # api_key = os.getenv("GEMINI_API_KEY")
490
- # if not api_key:
491
- # raise ValueError("GEMINI_API_KEY not found in environment variables.")
492
-
493
- # genai.configure(api_key=api_key)
494
- # model = genai.GenerativeModel(model_name="gemini-2.5-flash") # Using Flash for speed
495
-
496
- # prompt = f"""
497
- # You are provided with two different translated OCR outputs for the same document.
498
- # Your task is to MERGE them into a SINGLE, CLEAN, and WELL-STYLED HTML document that can be rendered directly in an iframe.
499
-
500
- # Input 1: Translated hOCR HTML
501
- # --- HOCR START ---
502
- # {translated_hocr_html}
503
- # --- HOCR END ---
504
-
505
- # Input 2: Translated PaddleOCR data (Python list of dicts with 'text' and 'box'):
506
- # --- PADDLEOCR START ---
507
- # {str(translated_paddle_data)}
508
- # --- PADDLEOCR END ---
509
-
510
- # STRICT RULES:
511
- # 1. You MUST output ONLY the FINAL RAW HTML code.
512
- # - No ```html, no triple quotes, no markdown, no explanations.
513
- # - Output must begin with <!DOCTYPE html> and end with </html>.
514
- # 2. ALL text from the second input (PaddleOCR) MUST be included in the final HTML without omission.
515
- # - Every PaddleOCR text must appear exactly once in the correct order and location.
516
- # 3. The HTML must be fully self-contained:
517
- # - Include <html>, <head>, <style>, and <body>.
518
- # - Include CSS in a <style> block so it renders exactly in an iframe.
519
- # 4. Table structure requirement:
520
- # - Use <table>, <tbody>, <tr>, and <td> to organize words into rows and columns.
521
- # - Each PaddleOCR word must be placed in a separate <td> within the correct row based on vertical alignment.
522
- # - Apply CSS for borders, padding, and cell alignment to ensure readability.
523
- # - Use colspan/rowspan where necessary to match the original layout.
524
- # 5. Positioning:
525
- # - Use bounding box data to size and place each cell proportionally.
526
- # - Avoid text overlap — if bounding boxes would overlap, adjust table cell spans or widths.
527
- # 6. Before outputting:
528
- # - Validate internally that the HTML is valid.
529
- # - Confirm every PaddleOCR text appears in the table.
530
- # - Confirm the table renders correctly in an iframe.
531
-
532
- # FINAL OUTPUT REQUIREMENT:
533
- # - Output ONLY the complete, valid HTML — no commentary, no extra text.
534
- # """
535
-
536
- # def do_request():
537
- # """Synchronous function to be run in a separate thread."""
538
- # response = model.generate_content(prompt)
539
- # return response.text.strip()
540
-
541
- # return await asyncio.to_thread(do_request)
542
-
543
- # except Exception as e:
544
- # error_message = f"An error occurred while generating the HTML structure with Gemini: {str(e)}"
545
- # traceback.print_exc()
546
- # return f"<html><body><h1>HTML Generation Error</h1><p>{html.escape(error_message)}</p></body></html>"
547
-
548
-
549
- # @app.post("/api/translate_file_gemini", response_class=HTMLResponse)
550
- # async def translate_document_dual_ocr(
551
- # target_language: str = Form(...), file: UploadFile = File(...)
552
- # ):
553
- # """
554
- # Processes a document using a dual OCR pipeline:
555
- # 1. Tesseract and PaddleOCR extract text and coordinates concurrently.
556
- # 2. Gemini translates the text from both outputs concurrently using a batch method.
557
- # 3. Gemini uses both translated outputs to generate the final layout-aware HTML.
558
- # """
559
- # content_type = file.content_type
560
- # if content_type not in ["image/png", "image/jpeg", "image/bmp", "image/tiff"]:
561
- # raise HTTPException(
562
- # status_code=400,
563
- # detail="Unsupported file type. Please use PNG, JPG, BMP or TIFF.",
564
- # )
565
-
566
- # try:
567
- # await file.seek(0)
568
- # image_bytes = await file.read()
569
- # if not image_bytes:
570
- # raise HTTPException(status_code=400, detail="Uploaded file is empty.")
571
-
572
- # # === STEP 1: Run both OCR extractions concurrently ===
573
- # print(
574
- # "***** Step 1: Starting concurrent OCR extraction (Tesseract & PaddleOCR) ******"
575
- # )
576
- # hocr_task = get_hocr_from_image(image_bytes)
577
- # paddle_task = extract_text_and_boxes_with_paddle(image_bytes)
578
- # hocr_html, paddle_data = await asyncio.gather(hocr_task, paddle_task)
579
-
580
- # if (not hocr_html or "ocr_page" not in hocr_html) and not paddle_data:
581
- # raise HTTPException(
582
- # status_code=400,
583
- # detail="Neither Tesseract nor PaddleOCR could extract any data from the image.",
584
- # )
585
- # print("***** Step 1 Done: Finished OCR extraction ******")
586
-
587
- # # === STEP 2: Translate both OCR outputs concurrently using Gemini ===
588
- # print("***** Step 2: Starting concurrent translation with Gemini ******")
589
- # translated_hocr_task = translate_hocr_html_with_gemini(
590
- # hocr_html, target_language
591
- # )
592
- # translated_paddle_task = translate_paddle_data_with_gemini(
593
- # paddle_data, target_language
594
- # )
595
- # translated_hocr, translated_paddle = await asyncio.gather(
596
- # translated_hocr_task, translated_paddle_task
597
- # )
598
- # print("***** Step 2 Done: Finished translation ******")
599
-
600
- # # === STEP 3: Generate final HTML from both translated outputs ===
601
- # print(
602
- # "***** Step 3: Generating final HTML from dual OCR data via Gemini ******"
603
- # )
604
- # final_html = await generate_html_from_dual_ocr(
605
- # translated_hocr, translated_paddle
606
- # )
607
-
608
- # print("***** Step 3 Done: Generated final HTML ******")
609
-
610
- # return HTMLResponse(content=final_html)
611
-
612
- # except HTTPException:
613
- # raise
614
- # except Exception as e:
615
- # traceback.print_exc()
616
- # raise HTTPException(
617
- # status_code=500,
618
- # detail=f"An unexpected error occurred during processing: {str(e)}",
619
- # )
620
- #-------------------------- end of gemini workflow ----------------------------------
621
-
622
- #-------------------------- start of updated gemini workflow ----------------------------------
623
-
624
- # --- PADDLEOCR LOCAL MODEL MANAGER SETUP (WITH HUGGING FACE SPACES FIX) ---
625
-
626
- # 1. Define the cache directory in a globally writable location like /tmp.
627
- # This is the key to fixing "Permission Denied" errors in containerized environments.
628
- CACHE_DIR = "/tmp/paddleocr_cache"
629
-
630
- # 2. Set the environment variable *before* any PaddleOCR functions are called.
631
- os.environ['PADDLEOCR_HOME'] = CACHE_DIR
632
-
633
- # 3. Create the directory when the script starts to ensure it exists.
634
- os.makedirs(CACHE_DIR, exist_ok=True)
635
-
636
- print(f"✅ PaddleOCR model cache is set to a writable directory: {CACHE_DIR}")
637
-
638
-
639
- LANG_CONFIG = {
640
- "ch": {"num_workers": 2},
641
- "en": {"num_workers": 2},
642
- "fr": {"num_workers": 1},
643
- "german": {"num_workers": 1},
644
- "korean": {"num_workers": 1},
645
- "japan": {"num_workers": 1},
646
- }
647
- CONCURRENCY_LIMIT = 8
648
-
649
-
650
- class PaddleOCRModelManager(object):
651
- def __init__(self, num_workers, model_factory):
652
- super().__init__()
653
- self._model_factory = model_factory
654
- self._queue = Queue()
655
- self._workers = []
656
- self._model_initialized_event = Event()
657
- for _ in range(num_workers):
658
- # Use daemon threads so they don't block app exit
659
- worker = Thread(target=self._worker, daemon=True)
660
- worker.start()
661
- self._model_initialized_event.wait()
662
- self._model_initialized_event.clear()
663
- self._workers.append(worker)
664
-
665
- def infer(self, *args, **kwargs):
666
- result_queue = Queue(maxsize=1)
667
- self._queue.put((args, kwargs, result_queue))
668
- success, payload = result_queue.get()
669
- if success:
670
- return payload
671
- else:
672
- raise payload
673
-
674
- def close(self):
675
- for _ in self._workers:
676
- self._queue.put(None)
677
- for worker in self._workers:
678
- worker.join()
679
-
680
- def _worker(self):
681
- print("Initializing PaddleOCR model in worker thread...")
682
- model = self._model_factory()
683
- self._model_initialized_event.set()
684
- print("✅ PaddleOCR model initialized in worker.")
685
- while True:
686
- item = self._queue.get()
687
- if item is None:
688
- break
689
- args, kwargs, result_queue = item
690
- try:
691
- result = model.ocr(*args, **kwargs)
692
- result_queue.put((True, result))
693
- except Exception as e:
694
- result_queue.put((False, e))
695
- finally:
696
- self._queue.task_done()
697
-
698
-
699
- def create_model(lang):
700
- """Creates an instance of the PaddleOCR model."""
701
- print(f"Creating PaddleOCR model for language: {lang}")
702
- # The cache directory is now set globally, so this function is simplified.
703
- return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
704
-
705
-
706
- # --- Initialize Model Managers ---
707
- model_managers = {}
708
- for lang, config in LANG_CONFIG.items():
709
- print(f"Setting up model manager for language: {lang}")
710
- model_manager = PaddleOCRModelManager(
711
- config["num_workers"], functools.partial(create_model, lang=lang)
712
- )
713
- model_managers[lang] = model_manager
714
-
715
-
716
- def close_model_managers():
717
- print("Closing all PaddleOCR model managers...")
718
- for manager in model_managers.values():
719
- manager.close()
720
-
721
- atexit.register(close_model_managers)
722
-
723
-
724
- def local_inference(img_bytes: bytes, lang: str) -> list[dict]:
725
- """Performs OCR using the local PaddleOCRModelManager."""
726
- ocr_manager = model_managers.get(lang)
727
- if not ocr_manager:
728
- print(f"Warning: Language '{lang}' not configured. Falling back to 'en'.")
729
- ocr_manager = model_managers['en']
730
-
731
- image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
732
- img_array = np.array(image)
733
-
734
- result = ocr_manager.infer(img_array, cls=True)
735
-
736
- # Standardize result format
737
- if result and isinstance(result, list) and len(result) == 1:
738
- result = result[0]
739
-
740
- output = []
741
- if result:
742
- for line in result:
743
- # Ensure line structure is as expected before unpacking
744
- if isinstance(line, list) and len(line) == 2:
745
- box = line[0]
746
- text_info = line[1]
747
- if isinstance(text_info, tuple) and len(text_info) == 2:
748
- text = text_info[0]
749
- output.append({"text": text, "box": box})
750
- return output
751
-
752
-
753
- # --- GEMINI TRANSLATION FUNCTION ---
754
-
755
- async def translate_texts_with_gemini(texts: list[str], target_language: str) -> list[str]:
756
- """Translates a list of texts using Gemini in a single batch API call."""
757
- if not texts or all(not s.strip() for s in texts):
758
- return [""] * len(texts)
759
-
760
- try:
761
- api_key = os.getenv("GEMINI_API_KEY")
762
- if not api_key:
763
- raise ValueError("GEMINI_API_KEY not found in environment variables.")
764
- if not genai:
765
- raise ImportError("'google.generativeai' library is not available.")
766
-
767
- genai.configure(api_key=api_key)
768
- model = genai.GenerativeModel(model_name="gemini-1.5-flash")
769
-
770
- prompt = f"""
771
- Translate each string in the following JSON array of strings to {target_language}.
772
- Return a single JSON array where each element is the translated string corresponding
773
- to the original at the same index. Your output MUST be only the JSON array and nothing else.
774
-
775
- Example Input:
776
- ["Hello world", "How are you?"]
777
-
778
- Example Output for target language 'Spanish':
779
- ["Hola mundo", "¿Cómo estás?"]
780
-
781
- Input for this task:
782
- {json.dumps(texts)}
783
- """
784
-
785
- def do_request():
786
- response = model.generate_content(prompt)
787
- return response.text.strip()
788
-
789
- response_text = await asyncio.to_thread(do_request)
790
- json_response_match = re.search(r'\[.*\]', response_text, re.DOTALL)
791
- if not json_response_match:
792
- print(f"Warning: Gemini did not return a valid JSON array. Response: {response_text}")
793
- return texts
794
-
795
- cleaned_json = json_response_match.group(0)
796
- translated_texts = json.loads(cleaned_json)
797
-
798
- if len(translated_texts) != len(texts):
799
- print(f"Warning: Mismatch in translation count. Expected {len(texts)}, got {len(translated_texts)}.")
800
- return texts
801
-
802
- return translated_texts
803
-
804
- except Exception as e:
805
- print(f"An error occurred during Gemini translation: {e}")
806
- return texts
807
-
808
-
809
- # --- OCR EXTRACTION FUNCTIONS ---
810
-
811
- async def get_hocr_from_image(image_bytes: bytes) -> str:
812
- """Performs OCR using Tesseract to get raw hOCR HTML output."""
813
- if not image_bytes:
814
- raise ValueError("Image bytes cannot be empty.")
815
- try:
816
- image = Image.open(io.BytesIO(image_bytes))
817
- hocr_bytes = await asyncio.to_thread(
818
- pytesseract.image_to_pdf_or_hocr, image, extension="hocr"
819
- )
820
- return hocr_bytes.decode("utf-8")
821
- except Exception as e:
822
- raise HTTPException(status_code=400, detail=f"Tesseract OCR failed. Error: {e}")
823
-
824
-
825
- async def extract_text_and_boxes_with_paddle(image_bytes: bytes, lang: str = "en") -> list[dict]:
826
- """Extracts text and bounding boxes using the local PaddleOCRModelManager."""
827
- try:
828
- extracted_data = await asyncio.to_thread(local_inference, image_bytes, lang)
829
- if not extracted_data:
830
- print("Warning: Local PaddleOCR returned no data.")
831
- return extracted_data
832
- except Exception as e:
833
- print(f"An error occurred during local PaddleOCR processing: {e}")
834
- traceback.print_exc()
835
- return []
836
-
837
-
838
- # --- BATCH TRANSLATION FUNCTIONS ---
839
-
840
- async def translate_hocr_html_with_gemini(hocr_html: str, target_language: str) -> str:
841
- """Parses hOCR, translates all text, and injects translations back."""
842
- soup = BeautifulSoup(hocr_html, "html.parser")
843
- elements_to_translate = soup.find_all(class_="ocrx_word")
844
- if not elements_to_translate:
845
- elements_to_translate = soup.find_all(class_="ocr_line")
846
-
847
- original_texts = [el.get_text(strip=True) for el in elements_to_translate]
848
- if not original_texts:
849
- return str(soup)
850
-
851
- translated_texts = await translate_texts_with_gemini(original_texts, target_language)
852
-
853
- for i, element in enumerate(elements_to_translate):
854
- if element.string and i < len(translated_texts):
855
- element.string.replace_with(translated_texts[i])
856
- return str(soup)
857
-
858
-
859
- async def translate_paddle_data_with_gemini(
860
- paddle_data: list[dict], target_language: str
861
- ) -> list[dict]:
862
- """Translates the 'text' field of each item in the paddle_data list."""
863
- original_texts = [item.get("text", "") for item in paddle_data]
864
- if not original_texts:
865
- return []
866
-
867
- translated_texts = await translate_texts_with_gemini(original_texts, target_language)
868
-
869
- translated_data = []
870
- for i, item in enumerate(paddle_data):
871
- translated_text = translated_texts[i] if i < len(translated_texts) else original_texts[i]
872
- translated_data.append({"text": translated_text, "box": item.get("box")})
873
- return translated_data
874
-
875
-
876
- # --- FINAL HTML GENERATION ---
877
-
878
- async def generate_html_from_dual_ocr(
879
- translated_hocr_html: str, translated_paddle_data: list[dict]
880
- ) -> str:
881
- """Uses Gemini to generate a final, layout-aware HTML document."""
882
- try:
883
- api_key = os.getenv("GEMINI_API_KEY")
884
- if not api_key:
885
- raise ValueError("GEMINI_API_KEY not found in environment variables.")
886
- if not genai:
887
- raise ImportError("'google.generativeai' library is not available.")
888
-
889
- genai.configure(api_key=api_key)
890
- model = genai.GenerativeModel(model_name="gemini-1.5-flash")
891
-
892
- prompt = f"""
893
- You are an expert web developer. Your task is to merge two translated OCR outputs into a single, clean, and well-styled HTML document.
894
-
895
- Input 1: Translated hOCR HTML (for structural guidance).
896
- --- HOCR START ---
897
- {translated_hocr_html}
898
- --- HOCR END ---
899
-
900
- Input 2: Translated PaddleOCR data (the source of truth for text and position).
901
- --- PADDLEOCR START ---
902
- {str(translated_paddle_data)}
903
- --- PADDLEOCR END ---
904
-
905
- STRICT INSTRUCTIONS:
906
- 1. **Output Raw HTML Only**: Your output must be a single block of HTML code, starting with `<!DOCTYPE html>` and ending with `</html>`. Do NOT use markdown fences (```html) or add any commentary.
907
- 2. **Prioritize PaddleOCR Data**: ALL text from the PaddleOCR input MUST be included. Its bounding boxes are the ground truth for positioning.
908
- 3. **Self-Contained HTML**: Embed all CSS in a `<style>` block in the `<head>`.
909
- 4. **Layout Reconstruction**: Use absolute positioning for `<span>` or `<div>` elements containing the text. Use the bounding box coordinates from PaddleOCR to set the `top`, `left`, `width`, and `height` CSS properties for each element to reconstruct the original layout.
910
- 5. **Coordinate System**: The bounding box format is [[top-left-x, top-left-y], [top-right-x, top-right-y], [bottom-right-x, bottom-right-y], [bottom-left-x, bottom-left-y]]. You MUST use `left: top-left-x`, `top: top-left-y`, `width: top-right-x - top-left-x`, and `height: bottom-left-y - top-left-y`.
911
-
912
- FINAL OUTPUT: ONLY the complete, valid, self-contained HTML.
913
- """
914
-
915
- def do_request():
916
- response = model.generate_content(prompt)
917
- clean_text = re.sub(r'^```html\s*', '', response.text.strip(), flags=re.IGNORECASE)
918
- clean_text = re.sub(r'\s*```$', '', clean_text)
919
- return clean_text
920
-
921
- return await asyncio.to_thread(do_request)
922
-
923
- except Exception as e:
924
- error_message = f"An error occurred during HTML generation with Gemini: {str(e)}"
925
- traceback.print_exc()
926
- return f"<html><body><h1>HTML Generation Error</h1><p>{html.escape(error_message)}</p></body></html>"
927
-
928
-
929
- # --- FASTAPI ENDPOINT ---
930
-
931
- @app.post("/api/translate_file_gemini_local", response_class=HTMLResponse)
932
- async def translate_document_dual_ocr(
933
- target_language: str = Form(...),
934
- source_language: str = Form("en"),
935
- file: UploadFile = File(...)
936
- ):
937
- """
938
- Processes a document using a dual OCR pipeline with local PaddleOCR.
939
- """
940
- content_type = file.content_type
941
- if content_type not in ["image/png", "image/jpeg", "image/bmp", "image/tiff"]:
942
- raise HTTPException(
943
- status_code=400,
944
- detail="Unsupported file type. Please use PNG, JPG, BMP or TIFF.",
945
- )
946
-
947
- try:
948
- image_bytes = await file.read()
949
- if not image_bytes:
950
- raise HTTPException(status_code=400, detail="Uploaded file is empty.")
951
-
952
- # === STEP 1: Run both OCR extractions concurrently ===
953
- print("***** 1. Starting concurrent OCR (Tesseract & PaddleOCR) *****")
954
- hocr_task = get_hocr_from_image(image_bytes)
955
- paddle_task = extract_text_and_boxes_with_paddle(image_bytes, lang=source_language)
956
- hocr_html, paddle_data = await asyncio.gather(hocr_task, paddle_task)
957
-
958
- if (not hocr_html or "ocr_page" not in hocr_html) and not paddle_data:
959
- raise HTTPException(
960
- status_code=400,
961
- detail="Neither Tesseract nor PaddleOCR could extract any data.",
962
- )
963
- print("***** 1. Finished OCR extraction *****")
964
-
965
- # === STEP 2: Translate both OCR outputs concurrently ===
966
- print("***** 2. Starting concurrent translation with Gemini *****")
967
- translated_hocr_task = translate_hocr_html_with_gemini(hocr_html, target_language)
968
- translated_paddle_task = translate_paddle_data_with_gemini(paddle_data, target_language)
969
- translated_hocr, translated_paddle = await asyncio.gather(
970
- translated_hocr_task, translated_paddle_task
971
- )
972
- print("***** 2. Finished translation *****")
973
-
974
- # === STEP 3: Generate final HTML from both translated outputs ===
975
- print("***** 3. Generating final HTML via Gemini *****")
976
- final_html = await generate_html_from_dual_ocr(translated_hocr, translated_paddle)
977
- print("***** 3. Generated final HTML *****")
978
-
979
- return HTMLResponse(content=final_html)
980
-
981
- except HTTPException:
982
- raise
983
- except Exception as e:
984
- traceback.print_exc()
985
- raise HTTPException(
986
- status_code=500,
987
- detail=f"An unexpected error occurred during processing: {str(e)}",
988
- )
989
-
990
- # To run this application:
991
- # 1. Save the code as a Python file (e.g., `main.py`).
992
- # 2. Make sure you have a `requirements.txt` file with all dependencies.
993
- # 3. Set your GEMINI_API_KEY environment variable in your Hugging Face Space secrets.
994
- # 4. Run the command: uvicorn main:app --host 0.0.0.0 --port 7860
995
- #-------------------------- end of updated gemini workflow ----------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,100 +0,0 @@
1
- accelerate==1.9.0
2
- annotated-types==0.7.0
3
- anyio==4.9.0
4
- beautifulsoup4==4.13.4
5
- cachetools==5.5.2
6
- certifi==2025.7.14
7
- cffi==1.17.1
8
- charset-normalizer==3.4.2
9
- click==8.2.1
10
- colorama==0.4.6
11
- cryptography==45.0.5
12
- dnspython==2.7.0
13
- dotenv==0.9.9
14
- ecdsa==0.19.1
15
- email_validator==2.2.0
16
- exceptiongroup==1.3.0
17
- fastapi==0.116.1
18
- fastapi-cli==0.0.8
19
- fastapi-cloud-cli==0.1.4
20
- filelock==3.13.1
21
- fsspec==2024.6.1
22
- google-ai-generativelanguage==0.6.15
23
- google-api-core==2.25.1
24
- google-api-python-client==2.177.0
25
- google-auth==2.40.3
26
- google-auth-httplib2==0.2.0
27
- google-generativeai==0.8.5
28
- googleapis-common-protos==1.70.0
29
- grpcio==1.74.0
30
- grpcio-status==1.71.2
31
- h11==0.16.0
32
- httpcore==1.0.9
33
- httplib2==0.22.0
34
- httptools==0.6.4
35
- httpx==0.28.1
36
- huggingface-hub==0.34.3
37
- idna==3.10
38
- itsdangerous==2.2.0
39
- Jinja2==3.1.6
40
- langdetect==1.0.9
41
- markdown-it-py==3.0.0
42
- MarkupSafe==2.1.5
43
- mdurl==0.1.2
44
- mpmath==1.3.0
45
- networkx==3.3
46
- numpy==2.1.2
47
- orjson==3.11.0
48
- packaging==25.0
49
- pillow==11.0.0
50
- proto-plus==1.26.1
51
- protobuf==5.29.5
52
- psutil==7.0.0
53
- pyasn1==0.6.1
54
- pyasn1_modules==0.4.2
55
- pycparser==2.22
56
- pydantic==2.11.7
57
- pydantic-extra-types==2.10.5
58
- pydantic-settings==2.10.1
59
- pydantic_core==2.33.2
60
- Pygments==2.19.2
61
- PyMuPDF==1.26.3
62
- pyparsing==3.2.3
63
- python-dotenv==1.1.1
64
- python-jose==3.5.0
65
- python-multipart==0.0.20
66
- PyYAML==6.0.2
67
- regex==2025.7.31
68
- requests==2.32.4
69
- rich==14.0.0
70
- rich-toolkit==0.14.8
71
- rignore==0.6.4
72
- rsa==4.9.1
73
- safetensors==0.5.3
74
- sentry-sdk==2.33.2
75
- shellingham==1.5.4
76
- six==1.17.0
77
- sniffio==1.3.1
78
- soupsieve==2.7
79
- starlette==0.47.2
80
- sympy==1.13.3
81
- tokenizers==0.21.4
82
- torch==2.7.1
83
- torchaudio==2.7.1
84
- torchvision==0.22.1
85
- tqdm==4.67.1
86
- transformers==4.54.1
87
- typer==0.16.0
88
- typing-inspection==0.4.1
89
- typing_extensions==4.12.2
90
- ujson==5.10.0
91
- uritemplate==4.2.0
92
- urllib3==2.5.0
93
- uvicorn==0.35.0
94
- watchfiles==1.1.0
95
- websockets==15.0.1
96
- langextract
97
- gradio_client
98
- pytesseract
99
- paddlepaddle
100
- paddleocr==2.10.0