Pclanglais commited on
Commit
2814dfb
1 Parent(s): 63e8ceb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -116,7 +116,7 @@ def preprocess_text(text):
116
  text = re.sub(r'\s+', ' ', text)
117
  return text.strip()
118
 
119
- def split_text(text, max_tokens=400):
120
  encoded = tokenizer.encode(text)
121
  splits = []
122
  for i in range(0, len(encoded), max_tokens):
@@ -125,8 +125,8 @@ def split_text(text, max_tokens=400):
125
  return splits
126
 
127
  # Function to generate text using CTranslate2
128
- def ocr_correction(prompt, max_new_tokens=600):
129
- splits = split_text(prompt, max_tokens=400)
130
  corrected_splits = []
131
 
132
  list_prompts = []
 
116
  text = re.sub(r'\s+', ' ', text)
117
  return text.strip()
118
 
119
+ def split_text(text, max_tokens=500):
120
  encoded = tokenizer.encode(text)
121
  splits = []
122
  for i in range(0, len(encoded), max_tokens):
 
125
  return splits
126
 
127
  # Function to generate text using CTranslate2
128
+ def ocr_correction(prompt, max_new_tokens=500):
129
+ splits = split_text(prompt, max_tokens=500)
130
  corrected_splits = []
131
 
132
  list_prompts = []