Spaces:

Shankarm08
/

pdfreader

Sleeping

Shankarm08 commited on Oct 5, 2024

Commit

a472326

verified ·

1 Parent(s): ec022ee

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,15 +10,21 @@ model = BertModel.from_pretrained(model_name)
 # Function to get BERT embeddings
 def get_embeddings(text):
     inputs = tokenizer.encode_plus(
         text,
         add_special_tokens=True,
         max_length=512,
         return_attention_mask=True,
         return_tensors='pt'
     )
-    outputs = model(**inputs)
-    return outputs.last_hidden_state[:, 0, :].detach().numpy()
 # Extract text from PDF
 def extract_text_from_pdf(pdf_file):
@@ -56,5 +62,5 @@ if st.button("Get Response"):
         # For demonstration, simply return the PDF text.
         # Implement similarity matching logic here as needed.
         st.write("### Response:")
-        st.write(pdf_text)  # For simplicity, returning all text

 # Function to get BERT embeddings
 def get_embeddings(text):
+    # Ensure that text length does not exceed BERT's maximum input length
     inputs = tokenizer.encode_plus(
         text,
         add_special_tokens=True,
         max_length=512,
+        truncation=True,  # This will truncate the text to the maximum length
         return_attention_mask=True,
         return_tensors='pt'
     )
+    with torch.no_grad():  # Disable gradient calculation for inference
+        outputs = model(**inputs)
+    # Extract the embeddings from the last hidden state
+    return outputs.last_hidden_state[:, 0, :].detach().cpu().numpy()  # Move to CPU before converting to numpy
 # Extract text from PDF
 def extract_text_from_pdf(pdf_file):
         # For demonstration, simply return the PDF text.
         # Implement similarity matching logic here as needed.
         st.write("### Response:")
+        st.write(pdf_text)  # For simplicity, returning all text