Shankarm08 commited on
Commit
a472326
·
verified ·
1 Parent(s): ec022ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -10,15 +10,21 @@ model = BertModel.from_pretrained(model_name)
10
 
11
  # Function to get BERT embeddings
12
  def get_embeddings(text):
 
13
  inputs = tokenizer.encode_plus(
14
  text,
15
  add_special_tokens=True,
16
  max_length=512,
 
17
  return_attention_mask=True,
18
  return_tensors='pt'
19
  )
20
- outputs = model(**inputs)
21
- return outputs.last_hidden_state[:, 0, :].detach().numpy()
 
 
 
 
22
 
23
  # Extract text from PDF
24
  def extract_text_from_pdf(pdf_file):
@@ -56,5 +62,5 @@ if st.button("Get Response"):
56
  # For demonstration, simply return the PDF text.
57
  # Implement similarity matching logic here as needed.
58
  st.write("### Response:")
59
- st.write(pdf_text) # For simplicity, returning all text
60
 
 
10
 
11
  # Function to get BERT embeddings
12
  def get_embeddings(text):
13
+ # Ensure that text length does not exceed BERT's maximum input length
14
  inputs = tokenizer.encode_plus(
15
  text,
16
  add_special_tokens=True,
17
  max_length=512,
18
+ truncation=True, # This will truncate the text to the maximum length
19
  return_attention_mask=True,
20
  return_tensors='pt'
21
  )
22
+
23
+ with torch.no_grad(): # Disable gradient calculation for inference
24
+ outputs = model(**inputs)
25
+
26
+ # Extract the embeddings from the last hidden state
27
+ return outputs.last_hidden_state[:, 0, :].detach().cpu().numpy() # Move to CPU before converting to numpy
28
 
29
  # Extract text from PDF
30
  def extract_text_from_pdf(pdf_file):
 
62
  # For demonstration, simply return the PDF text.
63
  # Implement similarity matching logic here as needed.
64
  st.write("### Response:")
65
+ st.write(pdf_text) # For simplicity, returning all text
66