Spaces:

linhkid91
/

ArxivDigest-extra

Running

linhkid91 commited on Apr 2, 2024

Commit

284474f

1 Parent(s): e3e24ea

Add other fields and fix JSON format errors

Files changed (4) hide show

config.yaml CHANGED Viewed

@@ -9,7 +9,7 @@ categories: ["Artificial Intelligence", "Computation and Language", "Machine Lea
 # will have their papers filtered out.
 #
 # Must be within 1-10
-threshold: 6
 # A natural language statement that the large language model will use to judge which papers are relevant
 #

 # will have their papers filtered out.
 #
 # Must be within 1-10
+threshold: 7
 # A natural language statement that the large language model will use to judge which papers are relevant
 #

src/action.py CHANGED Viewed

@@ -251,7 +251,11 @@ def generate_body(topic, categories, interest, threshold):
         )
         body = "<br><br>".join(
             [
-                f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}'
                 for paper in relevancy
             ]
         )

         )
         body = "<br><br>".join(
             [
+                f'<b>Title:</b> <a href="{paper["main_page"]}">{paper["title"]}</a><br><b>Authors:</b> {paper["authors"]}<br>'
+                f'<b>Score:</b> {paper["Relevancy score"]}<br><b>Reason:</b> {paper["Reasons for match"]}<br>'
+                f'<b>Goal:</b> {paper["Goal"]}<br><b>Data</b>: {paper["Data"]}<br><b>Methodology:</b> {paper["Methodology"]}<br>'
+                f'<b>Experiments & Results</b>: {paper["Experiments & Results"]}<br><b>Git</b>: {paper["Git"]}<br>'
+                f'<b>Discussion & Next steps</b>: {paper["Discussion & Next steps"]}'
                 for paper in relevancy
             ]
         )

src/relevancy.py CHANGED Viewed

@@ -31,12 +31,22 @@ def encode_prompt(query, prompt_papers):
         prompt += f"{idx + 1}. Authors: {authors}\n"
         prompt += f"{idx + 1}. Abstract: {abstract}\n"
     prompt += f"\n Generate response:\n1."
-    print(prompt)
     return prompt
 def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
     selected_data = []
     if response is None:
         return []
     json_items = response['message']['content'].replace("\n\n", "\n").split("\n")
@@ -45,12 +55,16 @@ def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
     try:
         score_items = [
             json.loads(re.sub(pattern, "", line))
-            for line in json_items if "relevancy score" in line.lower()]
     except Exception as e:
-        pprint.pprint([re.sub(pattern, "", line) for line in json_items if "relevancy score" in line.lower()])
-        print(e) #linh edit, test
         raise RuntimeError("failed")
-    pprint.pprint(score_items)
     scores = []
     for item in score_items:
         temp = item["Relevancy score"]

         prompt += f"{idx + 1}. Authors: {authors}\n"
         prompt += f"{idx + 1}. Abstract: {abstract}\n"
     prompt += f"\n Generate response:\n1."
+    #print(prompt)
     return prompt
+def is_json(myjson):
+    try:
+        json.loads(myjson)
+    except ValueError as e:
+        return False
+    return True
 def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
     selected_data = []
+    print("HERE")
+    print(response['message']['content'])
     if response is None:
         return []
     json_items = response['message']['content'].replace("\n\n", "\n").split("\n")
     try:
         score_items = [
             json.loads(re.sub(pattern, "", line))
+            for line in json_items if (is_json(line) and "relevancy score" in line.lower())]
     except Exception as e:
+        #pprint.pprint([re.sub(pattern, "", line) for line in json_items if "relevancy score" in line.lower()])
+        try:
+            score_items = score_items[:-1]
+        except Exception:
+            score_items = []
+        print(e)
         raise RuntimeError("failed")
+    #pprint.pprint(score_items)
     scores = []
     for item in score_items:
         temp = item["Relevancy score"]

src/relevancy_prompt.txt CHANGED Viewed

@@ -3,6 +3,6 @@ Based on my specific research interests, relevancy score out of 10 for each pape
 Additionally, please generate summary, for each paper explaining why it's relevant to my research interests.
 Please keep the paper order the same as in the input list, with one json format per line. Example is:
-{"Relevancy score": "an integer score out of 10", "Reasons for match": "1-2 sentence short reasonings", "Goal":"Goal of the paper/What kind of pain points the paper is trying to solve?", "Data": "Short summary of the data source used in the paper", "Methodology": "Summary of methodologies authors described in the paper", "Experiments & Results": "Summary of results", "Git": "Link to the Github code repo (if available)", "Discussion & Next steps": "Further discussion and next steps of the research"}
 My research interests are: NLP, RAGs, LLM, Optmization in Machine learning, Data science, Generative AI, Optimization in LLM, Finance modelling ...

 Additionally, please generate summary, for each paper explaining why it's relevant to my research interests.
 Please keep the paper order the same as in the input list, with one json format per line. Example is:
+1. {"Relevancy score": "an integer score out of 10", "Reasons for match": "1-2 sentence short reasonings", "Goal": "What kind of pain points the paper is trying to solve?", "Data": "Summary of the data source used in the paper", "Methodology": "Summary of methodologies used in the paper", "Git": "Link to the code repo (if available)", "Experiments & Results": "Summary of any experiments & its results", "Discussion & Next steps": "Further discussion and next steps of the research"}
 My research interests are: NLP, RAGs, LLM, Optmization in Machine learning, Data science, Generative AI, Optimization in LLM, Finance modelling ...