Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,15 +10,21 @@ model = BertModel.from_pretrained(model_name)
|
|
10 |
|
11 |
# Function to get BERT embeddings
|
12 |
def get_embeddings(text):
|
|
|
13 |
inputs = tokenizer.encode_plus(
|
14 |
text,
|
15 |
add_special_tokens=True,
|
16 |
max_length=512,
|
|
|
17 |
return_attention_mask=True,
|
18 |
return_tensors='pt'
|
19 |
)
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Extract text from PDF
|
24 |
def extract_text_from_pdf(pdf_file):
|
@@ -56,5 +62,5 @@ if st.button("Get Response"):
|
|
56 |
# For demonstration, simply return the PDF text.
|
57 |
# Implement similarity matching logic here as needed.
|
58 |
st.write("### Response:")
|
59 |
-
st.write(pdf_text) # For simplicity, returning all text
|
60 |
|
|
|
10 |
|
11 |
# Function to get BERT embeddings
|
12 |
def get_embeddings(text):
|
13 |
+
# Ensure that text length does not exceed BERT's maximum input length
|
14 |
inputs = tokenizer.encode_plus(
|
15 |
text,
|
16 |
add_special_tokens=True,
|
17 |
max_length=512,
|
18 |
+
truncation=True, # This will truncate the text to the maximum length
|
19 |
return_attention_mask=True,
|
20 |
return_tensors='pt'
|
21 |
)
|
22 |
+
|
23 |
+
with torch.no_grad(): # Disable gradient calculation for inference
|
24 |
+
outputs = model(**inputs)
|
25 |
+
|
26 |
+
# Extract the embeddings from the last hidden state
|
27 |
+
return outputs.last_hidden_state[:, 0, :].detach().cpu().numpy() # Move to CPU before converting to numpy
|
28 |
|
29 |
# Extract text from PDF
|
30 |
def extract_text_from_pdf(pdf_file):
|
|
|
62 |
# For demonstration, simply return the PDF text.
|
63 |
# Implement similarity matching logic here as needed.
|
64 |
st.write("### Response:")
|
65 |
+
st.write(pdf_text) # For simplicity, returning all text
|
66 |
|