MasteredUltraInstinct commited on
Commit
62f52d3
Β·
verified Β·
1 Parent(s): ad40dc4

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +12 -7
model.py CHANGED
@@ -1,8 +1,13 @@
1
- from pix2tex.cli import LatexOCR
2
- from pix2tex.model import get_model as pix2tex_model
3
- from pix2tex.utils import Tokenizer
4
 
5
- def get_model(weights='trained_model'):
6
- tokenizer = Tokenizer()
7
- model = pix2tex_model(checkpoint_path=weights)
8
- return model, tokenizer
 
 
 
 
 
 
 
1
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
2
+ from PIL import Image
 
3
 
4
+ class HandwrittenOCR:
5
+ def __init__(self, weights="trained_model"):
6
+ self.processor = TrOCRProcessor.from_pretrained(weights)
7
+ self.model = VisionEncoderDecoderModel.from_pretrained(weights)
8
+
9
+ def __call__(self, image: Image.Image) -> str:
10
+ pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
11
+ generated_ids = self.model.generate(pixel_values)
12
+ generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
13
+ return generated_text