Spaces:
Running
Running
Pclanglais
commited on
Commit
•
2814dfb
1
Parent(s):
63e8ceb
Update app.py
Browse files
app.py
CHANGED
@@ -116,7 +116,7 @@ def preprocess_text(text):
|
|
116 |
text = re.sub(r'\s+', ' ', text)
|
117 |
return text.strip()
|
118 |
|
119 |
-
def split_text(text, max_tokens=
|
120 |
encoded = tokenizer.encode(text)
|
121 |
splits = []
|
122 |
for i in range(0, len(encoded), max_tokens):
|
@@ -125,8 +125,8 @@ def split_text(text, max_tokens=400):
|
|
125 |
return splits
|
126 |
|
127 |
# Function to generate text using CTranslate2
|
128 |
-
def ocr_correction(prompt, max_new_tokens=
|
129 |
-
splits = split_text(prompt, max_tokens=
|
130 |
corrected_splits = []
|
131 |
|
132 |
list_prompts = []
|
|
|
116 |
text = re.sub(r'\s+', ' ', text)
|
117 |
return text.strip()
|
118 |
|
119 |
+
def split_text(text, max_tokens=500):
|
120 |
encoded = tokenizer.encode(text)
|
121 |
splits = []
|
122 |
for i in range(0, len(encoded), max_tokens):
|
|
|
125 |
return splits
|
126 |
|
127 |
# Function to generate text using CTranslate2
|
128 |
+
def ocr_correction(prompt, max_new_tokens=500):
|
129 |
+
splits = split_text(prompt, max_tokens=500)
|
130 |
corrected_splits = []
|
131 |
|
132 |
list_prompts = []
|