Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,23 +5,23 @@ import re
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
import accelerate
|
8 |
-
import spaces
|
9 |
import subprocess
|
10 |
-
from huggingface_hub import hf_hub_download, InferenceClient
|
11 |
-
from llama_cpp import Llama
|
12 |
|
13 |
-
from huggingface_hub import login
|
14 |
-
login(token = os.getenv('HF_TOKEN'))
|
15 |
|
16 |
-
repo_id = "srijaydeshpande/Deid-Fine-Tuned"
|
17 |
-
model_id = "deid_finetuned.Q4_K_M.gguf"
|
18 |
|
19 |
|
20 |
-
hf_hub_download(
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
)
|
25 |
|
26 |
def process_document(pdf_path):
|
27 |
extracted_pages = extract_pages(pdf_path)
|
@@ -142,7 +142,7 @@ def deidentify_doc(llm_type, pdftext, maxtokens, temperature, top_probability):
|
|
142 |
return response
|
143 |
|
144 |
|
145 |
-
@spaces.GPU(duration=80)
|
146 |
def pdf_to_text(files, llm_type, maxtokens=2048, temperature=0, top_probability=0.95):
|
147 |
files=[files]
|
148 |
for file in files:
|
@@ -158,12 +158,12 @@ def pdf_to_text(files, llm_type, maxtokens=2048, temperature=0, top_probability=
|
|
158 |
for page_id in page2content:
|
159 |
pdftext = page2content[page_id]
|
160 |
original_pdf_text += pdftext + '\n'
|
161 |
-
response_generator = deidentify_doc(llm_type, pdftext, maxtokens, temperature, top_probability)
|
162 |
-
print('RESPONSE GENERATOR IS ',response_generator)
|
163 |
# for chunk in response_generator:
|
164 |
# accumulated_text += chunk
|
165 |
# yield accumulated_text # Keep updating output
|
166 |
-
return response_generator # + "\n\n"
|
167 |
print('Extracted Page Content Is ', original_pdf_text)
|
168 |
print('------------------------------------------------------------')
|
169 |
# return anonymized_text
|
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
import accelerate
|
8 |
+
# import spaces
|
9 |
import subprocess
|
10 |
+
# from huggingface_hub import hf_hub_download, InferenceClient
|
11 |
+
# from llama_cpp import Llama
|
12 |
|
13 |
+
# from huggingface_hub import login
|
14 |
+
# login(token = os.getenv('HF_TOKEN'))
|
15 |
|
16 |
+
# repo_id = "srijaydeshpande/Deid-Fine-Tuned"
|
17 |
+
# model_id = "deid_finetuned.Q4_K_M.gguf"
|
18 |
|
19 |
|
20 |
+
# hf_hub_download(
|
21 |
+
# repo_id="srijaydeshpande/Deid-Fine-Tuned",
|
22 |
+
# filename="deid_finetuned.Q4_K_M.gguf",
|
23 |
+
# local_dir = "./models"
|
24 |
+
# )
|
25 |
|
26 |
def process_document(pdf_path):
|
27 |
extracted_pages = extract_pages(pdf_path)
|
|
|
142 |
return response
|
143 |
|
144 |
|
145 |
+
# @spaces.GPU(duration=80)
|
146 |
def pdf_to_text(files, llm_type, maxtokens=2048, temperature=0, top_probability=0.95):
|
147 |
files=[files]
|
148 |
for file in files:
|
|
|
158 |
for page_id in page2content:
|
159 |
pdftext = page2content[page_id]
|
160 |
original_pdf_text += pdftext + '\n'
|
161 |
+
# response_generator = deidentify_doc(llm_type, pdftext, maxtokens, temperature, top_probability)
|
162 |
+
# print('RESPONSE GENERATOR IS ',response_generator)
|
163 |
# for chunk in response_generator:
|
164 |
# accumulated_text += chunk
|
165 |
# yield accumulated_text # Keep updating output
|
166 |
+
# return response_generator # + "\n\n"
|
167 |
print('Extracted Page Content Is ', original_pdf_text)
|
168 |
print('------------------------------------------------------------')
|
169 |
# return anonymized_text
|