dtcda

Sleeping

App Files Files Community

zmbfeng commited on Aug 9

Commit

c7fa677

•

1 Parent(s): 0612a6a

original paragraph info hidden until button pressed

Browse files

Files changed (1) hide show

app.py +79 -69

app.py CHANGED Viewed

@@ -157,73 +157,83 @@ if 'paragraph_sentence_encodings' in st.session_state:
     query = st.text_input("Enter your query")
     if query:
-        query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
-            'cuda')
-        with torch.no_grad():  # Disable gradient calculation for inference
-            query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
-                             :].cpu().numpy()  # Move the result to CPU and convert to NumPy
-        paragraph_scores = []
-        sentence_scores = []
-        total_count = len(st.session_state.paragraph_sentence_encodings)
-        processing_progress_bar = st.progress(0)
-        for index, paragraph_sentence_encoding in enumerate(st.session_state.paragraph_sentence_encodings):
-            progress_percentage = index / (total_count - 1)
-            processing_progress_bar.progress(progress_percentage)
-            sentence_similarities = []
-            for sentence_encoding in paragraph_sentence_encoding[1]:
-                if sentence_encoding:
-                    similarity = cosine_similarity(query_encoding, sentence_encoding[1])[0][0]
-                    combined_score, similarity_score, commonality_score = combined_similarity(similarity,
-                                                                                              sentence_encoding[0],
-                                                                                              query)
-                    sentence_similarities.append((combined_score, sentence_encoding[0], commonality_score))
-                    sentence_scores.append((combined_score, sentence_encoding[0]))
-            sentence_similarities.sort(reverse=True, key=lambda x: x[0])
-            if len(sentence_similarities) >= 3:
-                top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
-                top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
-                top_three_sentences = sentence_similarities[:3]
-            elif sentence_similarities:
-                top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities])
-                top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities])
-                top_three_sentences = sentence_similarities
-            else:
-                top_three_avg_similarity = 0
-                top_three_avg_commonality = 0
-                top_three_sentences = []
-            top_three_texts = [s[1] for s in top_three_sentences]
-            remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
-            reordered_paragraph = top_three_texts + remaining_texts
-            original_paragraph = ' '.join([s[0] for s in paragraph_sentence_encoding[1] if s])
-            modified_paragraph = ' '.join(reordered_paragraph)
-            paragraph_scores.append(
-                (top_three_avg_similarity, top_three_avg_commonality,
-                 {'modified_text': modified_paragraph, 'original_text': paragraph_sentence_encoding[0]})
-            )
-        sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
-        paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
-        st.write("Top scored paragraphs and their scores:")
-        for similarity_score, commonality_score, paragraph in paragraph_scores[:5]:
-            st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
-            output_1 = paraphrase(paragraph['modified_text'])
-            print(output_1)
-            output_2 = paraphrase(output_1)
-            print(output_2)
-            st.write("Paraphrased Paragraph: ", output_2)
-            st.write("Modified Paragraph: ", paragraph['modified_text'])
-            st.write("Original Paragraph: ", paragraph['original_text'])

     query = st.text_input("Enter your query")
     if query:
+        if 'paragraph_scores' not in st.session_state:
+            query_tokens = st.session_state.bert_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(
+                'cuda')
+            with torch.no_grad():  # Disable gradient calculation for inference
+                query_encoding = st.session_state.bert_model(**query_tokens).last_hidden_state[:, 0,
+                                 :].cpu().numpy()  # Move the result to CPU and convert to NumPy
+            paragraph_scores = []
+            sentence_scores = []
+            total_count = len(st.session_state.paragraph_sentence_encodings)
+            processing_progress_bar = st.progress(0)
+            for index, paragraph_sentence_encoding in enumerate(st.session_state.paragraph_sentence_encodings):
+                progress_percentage = index / (total_count - 1)
+                processing_progress_bar.progress(progress_percentage)
+                sentence_similarities = []
+                for sentence_encoding in paragraph_sentence_encoding[1]:
+                    if sentence_encoding:
+                        similarity = cosine_similarity(query_encoding, sentence_encoding[1])[0][0]
+                        combined_score, similarity_score, commonality_score = combined_similarity(similarity,
+                                                                                                  sentence_encoding[0],
+                                                                                                  query)
+                        sentence_similarities.append((combined_score, sentence_encoding[0], commonality_score))
+                        sentence_scores.append((combined_score, sentence_encoding[0]))
+                sentence_similarities.sort(reverse=True, key=lambda x: x[0])
+                if len(sentence_similarities) >= 3:
+                    top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities[:3]])
+                    top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities[:3]])
+                    top_three_sentences = sentence_similarities[:3]
+                elif sentence_similarities:
+                    top_three_avg_similarity = np.mean([s[0] for s in sentence_similarities])
+                    top_three_avg_commonality = np.mean([s[2] for s in sentence_similarities])
+                    top_three_sentences = sentence_similarities
+                else:
+                    top_three_avg_similarity = 0
+                    top_three_avg_commonality = 0
+                    top_three_sentences = []
+                top_three_texts = [s[1] for s in top_three_sentences]
+                remaining_texts = [s[0] for s in paragraph_sentence_encoding[1] if s and s[0] not in top_three_texts]
+                reordered_paragraph = top_three_texts + remaining_texts
+                original_paragraph = ' '.join([s[0] for s in paragraph_sentence_encoding[1] if s])
+                modified_paragraph = ' '.join(reordered_paragraph)
+                paragraph_scores.append(
+                    (top_three_avg_similarity, top_three_avg_commonality,
+                     {'modified_text': modified_paragraph, 'original_text': paragraph_sentence_encoding[0]})
+                )
+            sentence_scores = sorted(sentence_scores, key=lambda x: x[0], reverse=True)
+            st.session_state.paragraph_scores = sorted(paragraph_scores, key=lambda x: x[0], reverse=True)
+        if 'paragraph_scores' in st.session_state:
+            if "paraphrased_paragrpahs" not in st.session_state:
+                st.session_state.paraphrased_paragrpahs = []
+                for i, (similarity_score, commonality_score, paragraph) in enumerate(st.session_state.paragraph_scores[:5]):
+                    output_1 = paraphrase(paragraph['modified_text'])
+                    # print(output_1)
+                    output_2 = paraphrase(output_1)
+                    # print(output_2)
+                    st.session_state.paraphrased_paragrpahs.append(output_2)
+            st.write("Top scored paragraphs and their scores:")
+            for i, (similarity_score, commonality_score, paragraph) in enumerate(
+                    st.session_state.paragraph_scores[:5]):
+                st.write("Paraphrased Paragraph: ", st.session_state.paraphrased_paragrpahs[i])
+                if st.button(f"Show Original Paragraph {i + 1}", key=f"button_{i}"):
+                    st.write(f"Similarity Score: {similarity_score}, Commonality Score: {commonality_score}")
+                    st.write("Original Paragraph: ", paragraph['original_text'])
+                # st.write("Modified Paragraph: ", paragraph['modified_text'])