zmbfeng commited on
Commit
c0b7df2
1 Parent(s): 8ebc1ba

namespace variable name issues fixed

Browse files
Files changed (1) hide show
  1. app.py +119 -64
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import streamlit as st
2
  import os
3
- import json
4
  import fitz
5
  import re
6
- from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoModelForSequenceClassification, BertTokenizer, BertModel,T5Tokenizer, T5ForConditionalGeneration,AutoTokenizer, AutoModelForSeq2SeqLM
 
7
 
8
  import torch
9
  from sklearn.metrics.pairwise import cosine_similarity
@@ -12,6 +13,24 @@ import nltk
12
  from nltk.tokenize import sent_tokenize
13
  from nltk.corpus import stopwords
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def is_new_file_upload(uploaded_file):
16
  if 'last_uploaded_file' in st.session_state:
17
  # Check if the newly uploaded file is different from the last one
@@ -27,21 +46,25 @@ def is_new_file_upload(uploaded_file):
27
  # st.write("This is the first file upload detected.")
28
  st.session_state.last_uploaded_file = {'name': uploaded_file.name, 'size': uploaded_file.size}
29
  return True
30
- def add_commonality_to_similarity_score(similarity, sentence, query):
 
 
31
  # Tokenize both the sentence and the query
32
  # sentence_words = set(sentence.split())
33
  # query_words = set(query.split())
34
- sentence_words = set(word for word in sentence.split() if word.lower() not in st.session_state.stop_words)
35
- query_words = set(word for word in query.split() if word.lower() not in st.session_state.stop_words)
36
 
37
  # Calculate the number of common words
38
  common_words = len(sentence_words.intersection(query_words))
39
 
40
  # Adjust the similarity score with the common words count
41
- combined_score = similarity + (common_words / max(len(query_words), 1)) # Normalize by the length of the query to keep the score between -1 and 1
42
- return combined_score,similarity,(common_words / max(len(query_words), 1))
 
43
 
44
- def contradiction_detection(premise,hypothesis):
 
45
  inputs = st.session_state.roberta_tokenizer.encode_plus(premise, hypothesis, return_tensors="pt", truncation=True)
46
 
47
  # Get model predictions
@@ -78,43 +101,47 @@ if 'is_initialized' not in st.session_state:
78
  st.session_state.bert_model = BertModel.from_pretrained("bert-base-uncased", ).to('cuda')
79
  st.session_state.roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
80
  st.session_state.roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli")
81
- def encode_sentence(sentence):
82
- if len(sentence.strip()) < 4:
 
 
83
  return None
84
 
85
- sentence_tokens = st.session_state.bert_tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(
86
  'cuda')
87
  with torch.no_grad():
88
  sentence_encoding = st.session_state.bert_model(**sentence_tokens).last_hidden_state[:, 0, :].cpu().numpy()
89
  return sentence_encoding
90
 
91
- def encode_paragraph(paragraph):
92
- sentence_encodings = []
93
- paragraph_without_newline = paragraph.replace("\n", "")
94
- sentences = sent_tokenize(paragraph_without_newline)
95
- for sentence in sentences:
 
96
  # if sentence.strip().endswith('?'):
97
  # sentence_encodings.append(None)
98
  # continue
99
- sentence_encoding = encode_sentence(sentence)
100
- sentence_encodings.append([sentence, sentence_encoding])
101
- return sentence_encodings
 
 
102
  if 'list_count' in st.session_state:
103
- st.write(f'The number of elements at the top level of the hierarchy: {st.session_state.list_count }')
104
  if 'paragraph_sentence_encodings' not in st.session_state:
105
  print("start embedding paragarphs")
106
  read_progress_bar = st.progress(0)
107
  st.session_state.paragraph_sentence_encodings = []
108
- for index,paragraph in enumerate(st.session_state.restored_paragraphs):
109
- #print(paragraph)
110
 
111
- progress_percentage = (index) / (st.session_state.list_count - 1)
112
  # print(progress_percentage)
113
  read_progress_bar.progress(progress_percentage)
114
 
115
-
116
- # sentence_encodings.append([sentence,bert_model(**sentence_tokens).last_hidden_state[:, 0, :].detach().numpy()])
117
- sentence_encodings=encode_paragraph(paragraph['paragraph'])
118
  st.session_state.paragraph_sentence_encodings.append([paragraph, sentence_encodings])
119
  st.rerun()
120
 
@@ -123,19 +150,17 @@ big_text = """
123
  <h1 style='font-size: 30x;'>Contradiction Dectection</h1>
124
  </div>
125
  """
126
- # Display the styled text
127
  st.markdown(big_text, unsafe_allow_html=True)
128
 
129
- def convert_pdf_to_paragraph_list(doc):
 
130
  paragraphs = []
131
- sentence_endings = ('.', '!', '?')
132
  start_page = 1
133
 
134
- for page_num in range(start_page - 1, len(doc)): # start_page - 1 to adjust for 0-based index
135
- page = doc.load_page(page_num)
136
  blocks = page.get_text("blocks")
137
-
138
- block_index = 1
139
  for block in blocks:
140
  x0, y0, x1, y1, text, block_type, flags = block
141
  if text.strip() != "":
@@ -147,30 +172,32 @@ def convert_pdf_to_paragraph_list(doc):
147
  if match:
148
  containsList = True
149
  # print ("list detected")
150
- paragraph = ""
151
  if bool(re.search(r'\n{2,}', text)):
152
  substrings = re.split(r'\n{2,}', text)
153
  for substring in substrings:
154
  if substring.strip() != "":
155
- paragraph = substring
156
  paragraphs.append(
157
- {"paragraph": paragraph, "containsList": containsList, "page_num": page_num,
158
- "text": text});
159
  # print(f"<substring> {substring} </substring>")
160
  else:
161
- paragraph = text
162
  paragraphs.append(
163
- {"paragraph": paragraph, "containsList": containsList, "page_num": page_num, "text": None});
164
- return paragraphs
 
165
 
166
  uploaded_pdf_file = st.file_uploader("Upload a PDF file",
167
  type=['pdf'])
168
  st.markdown(
169
  f'<a href="https://ikmtechnology.github.io/ikmtechnology/Sample_Master_Sample_Life_Insurance_Policy.pdf" target="_blank">Sample Master PDF download and then upload to above</a>',
170
  unsafe_allow_html=True)
171
- st.markdown("sample queries to invoke contradiction: <br/> A Member shall be deemed disabled under this provision if, due to illness or injury, the Member is unable to safely and fully carry out two or more Activities of Daily Living without the assistance or verbal prompting of another individual.",unsafe_allow_html=True)
172
  st.markdown(
173
- f'<a href="https://ikmtechnology.github.io/ikmtechnology/Sample_Secondary.txt" target="_blank">Sample Secondary txt download and then upload to above</a>',
 
 
 
174
  unsafe_allow_html=True)
175
  if uploaded_pdf_file is not None:
176
  if is_new_file_upload(uploaded_pdf_file):
@@ -185,20 +212,22 @@ if uploaded_pdf_file is not None:
185
  with open(os.path.join(save_path, uploaded_pdf_file.name), "wb") as f:
186
  f.write(uploaded_pdf_file.getbuffer()) # Write the file to the specified location
187
  st.success(f'Saved file temp_{uploaded_pdf_file.name} in {save_path}')
188
- st.session_state.uploaded_path=os.path.join(save_path, uploaded_pdf_file.name)
189
  # st.session_state.page_count = utils.get_pdf_page_count(st.session_state.uploaded_pdf_path)
190
  # print("page_count=",st.session_state.page_count)
191
 
192
  doc = fitz.open(st.session_state.uploaded_path)
193
 
194
- st.session_state.restored_paragraphs=convert_pdf_to_paragraph_list(doc)
195
  if isinstance(st.session_state.restored_paragraphs, list):
196
  # Count the restored_paragraphs of top-level elements
197
  st.session_state.list_count = len(st.session_state.restored_paragraphs)
198
  st.write(f'The number of elements at the top level of the hierarchy: {st.session_state.list_count}')
199
  st.rerun()
200
- def contradiction_detection_for_sentence(query):
201
- query_encoding = encode_sentence(query)
 
 
202
 
203
  total_count = len(st.session_state.paragraph_sentence_encodings)
204
  processing_progress_bar = st.progress(0)
@@ -209,19 +238,21 @@ def contradiction_detection_for_sentence(query):
209
  sorted_paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
210
 
211
  st.write("Top scored paragraphs and their scores:")
212
- for i, (similarity_score, commonality_score, paragraph) in enumerate(
213
  sorted_paragraph_scores[:3]): # number of paragraphs to consider
214
  # st.write("top_three_sentences: ", paragraph['top_three_sentences'])
215
  st.write("paragarph number ***", i)
216
  prev_contradiction_detected = True
217
- for top_sentence in paragraph['top_three_sentences']:
218
 
219
  if prev_contradiction_detected:
220
- contradiction_detection_result = contradiction_detection(st.session_state.premise, top_sentence[1])
 
 
 
 
221
  if contradiction_detection_result == {"Contradiction"}:
222
- st.write("master document page number ", paragraph['original_text']['page_num'])
223
- st.write("master document sentence: ", top_sentence[1])
224
- st.write("secondary document sentence: ", st.session_state.premise)
225
  st.write(contradiction_detection_result)
226
  # st.write(contradiction_detection(st.session_state.premise, top_sentence[1]))
227
 
@@ -229,19 +260,22 @@ def contradiction_detection_for_sentence(query):
229
  prev_contradiction_detected = False
230
  else:
231
  break
232
- def find_sentences_scores(paragraph_sentence_encodings, query_encoding, processing_progress_bar,total_count):
 
 
233
  paragraph_scores = []
234
  sentence_scores = []
235
- for index, paragraph_sentence_encoding in enumerate(paragraph_sentence_encodings):
236
- progress_percentage = index / (total_count - 1)
237
- processing_progress_bar.progress(progress_percentage)
238
 
239
  sentence_similarities = []
240
  for sentence_encoding in paragraph_sentence_encoding[1]:
241
  if sentence_encoding:
242
  similarity = cosine_similarity(query_encoding, sentence_encoding[1])[0][0]
243
  combined_score, similarity_score, commonality_score = add_commonality_to_similarity_score(similarity,
244
- sentence_encoding[0],
 
245
  query)
246
  sentence_similarities.append((combined_score, sentence_encoding[0], commonality_score))
247
  sentence_scores.append((combined_score, sentence_encoding[0]))
@@ -275,22 +309,43 @@ def find_sentences_scores(paragraph_sentence_encodings, query_encoding, processi
275
 
276
  sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
277
  return sentence_scores, paragraph_scores
 
 
278
  if 'paragraph_sentence_encodings' in st.session_state:
279
  query = st.text_input("Enter your query")
280
-
281
  if query:
282
  if 'prev_query' not in st.session_state or st.session_state.prev_query != query:
283
  st.session_state.prev_query = query
284
  st.session_state.premise = query
285
  contradiction_detection_for_sentence(query)
286
 
287
-
288
- #print(top_sentence[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  # st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
290
  # st.write("top_three_sentences: ", paragraph['top_three_sentences'])
291
- #st.write("Original Paragraph: ", paragraph['original_text'])
292
- #A Member will be considered Actively at Work if he or she is able and available for active performance of all of his or her regular duties
293
  # A Member will be considered as inactive at Work if he or she is able and available for active performance of all of his or her regular duties
294
- #A Member shall be deemed inactive at Work if he or she is capable and available to perform all of his or her regular responsibilities.
295
  # st.write("Modified Paragraph: ", paragraph['modified_text'])
296
-
 
1
  import streamlit as st
2
  import os
3
+
4
  import fitz
5
  import re
6
+ from transformers import AutoModelForSequenceClassification, BertTokenizer, BertModel, \
7
+ AutoTokenizer
8
 
9
  import torch
10
  from sklearn.metrics.pairwise import cosine_similarity
 
13
  from nltk.tokenize import sent_tokenize
14
  from nltk.corpus import stopwords
15
 
16
+
17
+ def is_new_txt_file_upload(uploaded_txt_file):
18
+ if 'last_uploaded_txt_file' in st.session_state:
19
+ # Check if the newly uploaded file is different from the last one
20
+ if (uploaded_txt_file.name != st.session_state.last_uploaded_txt_file['name'] or
21
+ uploaded_txt_file.size != st.session_state.last_uploaded_txt_file['size']):
22
+ st.session_state.last_uploaded_txt_file = {'name': uploaded_txt_file.name, 'size': uploaded_txt_file.size}
23
+ # st.write("A new src image file has been uploaded.")
24
+ return True
25
+ else:
26
+ # st.write("The same src image file has been re-uploaded.")
27
+ return False
28
+ else:
29
+ # st.write("This is the first file upload detected.")
30
+ st.session_state.last_uploaded_txt_file = {'name': uploaded_txt_file.name, 'size': uploaded_txt_file.size}
31
+ return True
32
+
33
+
34
  def is_new_file_upload(uploaded_file):
35
  if 'last_uploaded_file' in st.session_state:
36
  # Check if the newly uploaded file is different from the last one
 
46
  # st.write("This is the first file upload detected.")
47
  st.session_state.last_uploaded_file = {'name': uploaded_file.name, 'size': uploaded_file.size}
48
  return True
49
+
50
+
51
+ def add_commonality_to_similarity_score(similarity, sentence_to_find_similarity_score, query_to_find_similiarty_score):
52
  # Tokenize both the sentence and the query
53
  # sentence_words = set(sentence.split())
54
  # query_words = set(query.split())
55
+ sentence_words = set(word for word in sentence_to_find_similarity_score.split() if word.lower() not in st.session_state.stop_words)
56
+ query_words = set(word for word in query_to_find_similiarty_score.split() if word.lower() not in st.session_state.stop_words)
57
 
58
  # Calculate the number of common words
59
  common_words = len(sentence_words.intersection(query_words))
60
 
61
  # Adjust the similarity score with the common words count
62
+ combined_score = similarity + (common_words / max(len(query_words),
63
+ 1)) # Normalize by the length of the query to keep the score between -1 and 1
64
+ return combined_score, similarity, (common_words / max(len(query_words), 1))
65
 
66
+
67
+ def contradiction_detection(premise, hypothesis):
68
  inputs = st.session_state.roberta_tokenizer.encode_plus(premise, hypothesis, return_tensors="pt", truncation=True)
69
 
70
  # Get model predictions
 
101
  st.session_state.bert_model = BertModel.from_pretrained("bert-base-uncased", ).to('cuda')
102
  st.session_state.roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
103
  st.session_state.roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli")
104
+
105
+
106
+ def encode_sentence(sentence_to_be_encoded):
107
+ if len(sentence_to_be_encoded.strip()) < 4:
108
  return None
109
 
110
+ sentence_tokens = st.session_state.bert_tokenizer(sentence_to_be_encoded, return_tensors="pt", padding=True, truncation=True).to(
111
  'cuda')
112
  with torch.no_grad():
113
  sentence_encoding = st.session_state.bert_model(**sentence_tokens).last_hidden_state[:, 0, :].cpu().numpy()
114
  return sentence_encoding
115
 
116
+
117
+ def encode_paragraph(paragraph_to_be_encoded):
118
+ sentence_encodings_for_encoding_paragraph = []
119
+ paragraph_without_newline = paragraph_to_be_encoded.replace("\n", "")
120
+ sentences_for_encoding_paragraph = sent_tokenize(paragraph_without_newline)
121
+ for sentence_for_encoding_paragraph in sentences_for_encoding_paragraph:
122
  # if sentence.strip().endswith('?'):
123
  # sentence_encodings.append(None)
124
  # continue
125
+ sentence_encoding = encode_sentence(sentence_for_encoding_paragraph)
126
+ sentence_encodings_for_encoding_paragraph.append([sentence_for_encoding_paragraph, sentence_encoding])
127
+ return sentence_encodings_for_encoding_paragraph
128
+
129
+
130
  if 'list_count' in st.session_state:
131
+ st.write(f'The number of elements at the top level of the hierarchy: {st.session_state.list_count}')
132
  if 'paragraph_sentence_encodings' not in st.session_state:
133
  print("start embedding paragarphs")
134
  read_progress_bar = st.progress(0)
135
  st.session_state.paragraph_sentence_encodings = []
136
+ for index, paragraph in enumerate(st.session_state.restored_paragraphs):
137
+ # print(paragraph)
138
 
139
+ progress_percentage = index / (st.session_state.list_count - 1)
140
  # print(progress_percentage)
141
  read_progress_bar.progress(progress_percentage)
142
 
143
+ # sentence_encodings.append([sentence,bert_model(**sentence_tokens).last_hidden_state[:, 0, :].detach().numpy()])
144
+ sentence_encodings = encode_paragraph(paragraph['paragraph'])
 
145
  st.session_state.paragraph_sentence_encodings.append([paragraph, sentence_encodings])
146
  st.rerun()
147
 
 
150
  <h1 style='font-size: 30x;'>Contradiction Dectection</h1>
151
  </div>
152
  """
153
+ # Display the styled text
154
  st.markdown(big_text, unsafe_allow_html=True)
155
 
156
+
157
+ def convert_pdf_to_paragraph_list(pdf_doc_to_paragraph_list):
158
  paragraphs = []
 
159
  start_page = 1
160
 
161
+ for page_num in range(start_page - 1, len(pdf_doc_to_paragraph_list)): # start_page - 1 to adjust for 0-based index
162
+ page = pdf_doc_to_paragraph_list.load_page(page_num)
163
  blocks = page.get_text("blocks")
 
 
164
  for block in blocks:
165
  x0, y0, x1, y1, text, block_type, flags = block
166
  if text.strip() != "":
 
172
  if match:
173
  containsList = True
174
  # print ("list detected")
 
175
  if bool(re.search(r'\n{2,}', text)):
176
  substrings = re.split(r'\n{2,}', text)
177
  for substring in substrings:
178
  if substring.strip() != "":
179
+ paragraph_for_converting_pdf = substring
180
  paragraphs.append(
181
+ {"paragraph": paragraph_for_converting_pdf, "containsList": containsList, "page_num": page_num,
182
+ "text": text})
183
  # print(f"<substring> {substring} </substring>")
184
  else:
185
+ paragraph_for_converting_pdf = text
186
  paragraphs.append(
187
+ {"paragraph": paragraph_for_converting_pdf, "containsList": containsList, "page_num": page_num, "text": None})
188
+ return paragraphs
189
+
190
 
191
  uploaded_pdf_file = st.file_uploader("Upload a PDF file",
192
  type=['pdf'])
193
  st.markdown(
194
  f'<a href="https://ikmtechnology.github.io/ikmtechnology/Sample_Master_Sample_Life_Insurance_Policy.pdf" target="_blank">Sample Master PDF download and then upload to above</a>',
195
  unsafe_allow_html=True)
 
196
  st.markdown(
197
+ "sample queries to invoke contradiction: <br/> A Member shall be deemed disabled under this provision if, due to illness or injury, the Member is unable to safely and fully carry out two or more Activities of Daily Living without the assistance or verbal prompting of another individual.",
198
+ unsafe_allow_html=True)
199
+ st.markdown(
200
+ f'<a href="https://ikmtechnology.github.io/ikmtechnology/Sample_Secondary.txt" target="_blank">Sample Secondary txt download and then upload to above</a>',
201
  unsafe_allow_html=True)
202
  if uploaded_pdf_file is not None:
203
  if is_new_file_upload(uploaded_pdf_file):
 
212
  with open(os.path.join(save_path, uploaded_pdf_file.name), "wb") as f:
213
  f.write(uploaded_pdf_file.getbuffer()) # Write the file to the specified location
214
  st.success(f'Saved file temp_{uploaded_pdf_file.name} in {save_path}')
215
+ st.session_state.uploaded_path = os.path.join(save_path, uploaded_pdf_file.name)
216
  # st.session_state.page_count = utils.get_pdf_page_count(st.session_state.uploaded_pdf_path)
217
  # print("page_count=",st.session_state.page_count)
218
 
219
  doc = fitz.open(st.session_state.uploaded_path)
220
 
221
+ st.session_state.restored_paragraphs = convert_pdf_to_paragraph_list(doc)
222
  if isinstance(st.session_state.restored_paragraphs, list):
223
  # Count the restored_paragraphs of top-level elements
224
  st.session_state.list_count = len(st.session_state.restored_paragraphs)
225
  st.write(f'The number of elements at the top level of the hierarchy: {st.session_state.list_count}')
226
  st.rerun()
227
+
228
+
229
+ def contradiction_detection_for_sentence(cd_query):
230
+ query_encoding = encode_sentence(cd_query)
231
 
232
  total_count = len(st.session_state.paragraph_sentence_encodings)
233
  processing_progress_bar = st.progress(0)
 
238
  sorted_paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
239
 
240
  st.write("Top scored paragraphs and their scores:")
241
+ for i, (similarity_score, commonality_score, paragraph_from_sorted_paragraph_scores) in enumerate(
242
  sorted_paragraph_scores[:3]): # number of paragraphs to consider
243
  # st.write("top_three_sentences: ", paragraph['top_three_sentences'])
244
  st.write("paragarph number ***", i)
245
  prev_contradiction_detected = True
246
+ for top_sentence in paragraph_from_sorted_paragraph_scores['top_three_sentences']:
247
 
248
  if prev_contradiction_detected:
249
+ contradiction_detection_result = contradiction_detection(cd_query, top_sentence[1])
250
+ st.write("master document page number ", paragraph_from_sorted_paragraph_scores['original_text']['page_num'])
251
+ st.write("master document sentence: ", top_sentence[1])
252
+ st.write("secondary document sentence: ", cd_query)
253
+ st.write(contradiction_detection_result)
254
  if contradiction_detection_result == {"Contradiction"}:
255
+
 
 
256
  st.write(contradiction_detection_result)
257
  # st.write(contradiction_detection(st.session_state.premise, top_sentence[1]))
258
 
 
260
  prev_contradiction_detected = False
261
  else:
262
  break
263
+
264
+
265
+ def find_sentences_scores(paragraph_sentence_encodings, query_encoding, processing_progress_bar, total_count):
266
  paragraph_scores = []
267
  sentence_scores = []
268
+ for paragraph_sentence_encoding_index, paragraph_sentence_encoding in enumerate(paragraph_sentence_encodings):
269
+ find_sentences_scores_progress_percentage = paragraph_sentence_encoding_index / (total_count - 1)
270
+ processing_progress_bar.progress(find_sentences_scores_progress_percentage)
271
 
272
  sentence_similarities = []
273
  for sentence_encoding in paragraph_sentence_encoding[1]:
274
  if sentence_encoding:
275
  similarity = cosine_similarity(query_encoding, sentence_encoding[1])[0][0]
276
  combined_score, similarity_score, commonality_score = add_commonality_to_similarity_score(similarity,
277
+ sentence_encoding[
278
+ 0],
279
  query)
280
  sentence_similarities.append((combined_score, sentence_encoding[0], commonality_score))
281
  sentence_scores.append((combined_score, sentence_encoding[0]))
 
309
 
310
  sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
311
  return sentence_scores, paragraph_scores
312
+
313
+
314
  if 'paragraph_sentence_encodings' in st.session_state:
315
  query = st.text_input("Enter your query")
 
316
  if query:
317
  if 'prev_query' not in st.session_state or st.session_state.prev_query != query:
318
  st.session_state.prev_query = query
319
  st.session_state.premise = query
320
  contradiction_detection_for_sentence(query)
321
 
322
+ uploaded_text_file = st.file_uploader("Choose a .txt file", type="txt")
323
+ if uploaded_text_file is not None:
324
+ if is_new_txt_file_upload(uploaded_text_file):
325
+ lines = uploaded_text_file.readlines()
326
+
327
+ # Initialize an empty list to store line number and text
328
+ line_list = []
329
+
330
+ # Iterate through each line and add to the list
331
+ for line_number, line in enumerate(lines, start=1):
332
+ # Decode the line (since it will be in bytes)
333
+ decoded_line = line.decode("utf-8").strip()
334
+ line_list.append((line_number, decoded_line))
335
+
336
+ # Display the list of tuples
337
+ st.write("Line Number and Line Content:")
338
+ for item in line_list:
339
+ st.write(f"Line {item[0]}: {item[1]}")
340
+ sentences = sent_tokenize(item[1])
341
+ for sentence in sentences:
342
+ st.write(f"sententce {sentence}")
343
+ contradiction_detection_for_sentence(sentence)
344
+ # print(top_sentence[1])
345
  # st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
346
  # st.write("top_three_sentences: ", paragraph['top_three_sentences'])
347
+ # st.write("Original Paragraph: ", paragraph['original_text'])
348
+ # A Member will be considered Actively at Work if he or she is able and available for active performance of all of his or her regular duties
349
  # A Member will be considered as inactive at Work if he or she is able and available for active performance of all of his or her regular duties
350
+ # A Member shall be deemed inactive at Work if he or she is capable and available to perform all of his or her regular responsibilities.
351
  # st.write("Modified Paragraph: ", paragraph['modified_text'])