anikde commited on
Commit
ba402b9
·
1 Parent(s): 1a662ac

added AUTO choose capability

Browse files
IndicPhotoOCR/ocr.py CHANGED
@@ -17,7 +17,7 @@ from IndicPhotoOCR.utils.helper import detect_para
17
 
18
 
19
  class OCR:
20
- def __init__(self, device='cuda:0', verbose=False):
21
  # self.detect_model_checkpoint = detect_model_checkpoint
22
  self.device = device
23
  self.verbose = verbose
@@ -27,6 +27,7 @@ class OCR:
27
  self.recogniser = PARseqrecogniser()
28
  # self.identifier = CLIPidentifier()
29
  self.identifier = VIT_identifier()
 
30
 
31
  # def detect(self, image_path, detect_model_checkpoint=cfg.checkpoint):
32
  # """Run the detection model to get bounding boxes of text areas."""
@@ -109,7 +110,7 @@ class OCR:
109
  # Predict script language, here we assume "hindi" as the model name
110
  if self.verbose:
111
  print("Identifying script for the cropped area...")
112
- script_lang = self.identifier.identify(cropped_path, "hindi", self.device) # Use "hindi" as the model name
113
  # print(script_lang)
114
 
115
  # Clean up temporary file
 
17
 
18
 
19
  class OCR:
20
+ def __init__(self, device='cuda:0', identifier_lang='hindi', verbose=False):
21
  # self.detect_model_checkpoint = detect_model_checkpoint
22
  self.device = device
23
  self.verbose = verbose
 
27
  self.recogniser = PARseqrecogniser()
28
  # self.identifier = CLIPidentifier()
29
  self.identifier = VIT_identifier()
30
+ self.indentifier_lang = identifier_lang
31
 
32
  # def detect(self, image_path, detect_model_checkpoint=cfg.checkpoint):
33
  # """Run the detection model to get bounding boxes of text areas."""
 
110
  # Predict script language, here we assume "hindi" as the model name
111
  if self.verbose:
112
  print("Identifying script for the cropped area...")
113
+ script_lang = self.identifier.identify(cropped_path, self.indentifier_lang, self.device) # Use "hindi" as the model name
114
  # print(script_lang)
115
 
116
  # Clean up temporary file
IndicPhotoOCR/recognition/parseq_recogniser.py CHANGED
@@ -37,6 +37,14 @@ model_info = {
37
  "path": "models/gujarati.ckpt",
38
  "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/gujarati.ckpt",
39
  },
 
 
 
 
 
 
 
 
40
  "marathi": {
41
  "path": "models/marathi.ckpt",
42
  "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/marathi.ckpt",
@@ -200,7 +208,7 @@ class PARseqrecogniser:
200
  Returns:
201
  str: The recognized text from the image.
202
  """
203
- # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
204
 
205
  if language != "english":
206
  model_path = self.ensure_model(checkpoint)
 
37
  "path": "models/gujarati.ckpt",
38
  "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/gujarati.ckpt",
39
  },
40
+ "kannada": {
41
+ "path": "models/kannada.ckpt",
42
+ "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/kannada.ckpt",
43
+ },
44
+ "malayalam": {
45
+ "path": "models/malayalam.ckpt",
46
+ "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/malayalam.ckpt",
47
+ },
48
  "marathi": {
49
  "path": "models/marathi.ckpt",
50
  "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/marathi.ckpt",
 
208
  Returns:
209
  str: The recognized text from the image.
210
  """
211
+ device = device
212
 
213
  if language != "english":
214
  model_path = self.ensure_model(checkpoint)
IndicPhotoOCR/script_identification/vit/vit_infer.py CHANGED
@@ -74,11 +74,16 @@ model_info = {
74
  "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/hindienglishtelugu.zip",
75
  "subcategories": ["hindi", "english", "telugu"]
76
  },
77
- "12C": {
78
  "path": "models/12_classes",
79
  "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/12_classes.zip",
80
  "subcategories": ["hindi", "english", "assamese","bengali","gujarati","kannada","malayalam","marathi","odia","punjabi","tamil","telegu"]
81
  },
 
 
 
 
 
82
 
83
 
84
  }
 
74
  "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/hindienglishtelugu.zip",
75
  "subcategories": ["hindi", "english", "telugu"]
76
  },
77
+ "auto": {
78
  "path": "models/12_classes",
79
  "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/12_classes.zip",
80
  "subcategories": ["hindi", "english", "assamese","bengali","gujarati","kannada","malayalam","marathi","odia","punjabi","tamil","telegu"]
81
  },
82
+ "10C": {
83
+ "path": "models/12_classes",
84
+ "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/10_classes.zip",
85
+ "subcategories": ["hindi", "english", "assamese","bengali","gujarati","kannada","malayalam","marathi","odia","punjabi","tamil","telegu"]
86
+ },
87
 
88
 
89
  }