aquibmoin commited on
Commit
36639c5
1 Parent(s): da2a966

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -16
app.py CHANGED
@@ -30,30 +30,22 @@ ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in env
30
  # Define system message with instructions
31
  system_message = """
32
  You are ExosAI, a helpful assistant specializing in Exoplanet and Astrophysics research.
33
-
34
  Generate a detailed structured response based on the following science context and user input, including the necessary observables, physical parameters, and technical requirements for observations. The response should include the following sections:
35
-
36
  Science Objectives: Describe key scientific study objectives related to the science context and user input.
37
-
38
  Physical Parameters: Outline the physical parameters related to the science context and user input.
39
-
40
  Observables: Specify the observables related to the science context and user input.
41
-
42
  Description of Desired Observations: Detail the types of observations related to the science context and user input.
43
-
44
  Technical Requirements Table: Generate a table with the following columns:
45
- - Requirements: The specific observational requirements (e.g., UV observations, Optical observations or Infrared observations. No radio or radar).
46
  - Necessary: The necessary values or parameters (e.g., wavelength ranges, spatial resolution).
47
  - Desired: The desired values or parameters.
48
  - Justification: A scientific explanation of why these requirements are important.
49
  - Comments: Additional notes or remarks regarding each requirement.
50
-
51
  Example:
52
  | Requirements | Necessary | Desired | Justification | Comments |
53
  |----------------------------------|------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
54
  | UV Observations | Wavelength: 1200–2100 Å, 2500–3300 Å | Wavelength: 1200–3300 Å | Characterization of atomic and molecular emissions (H, C, O, S, etc.) from fluorescence and dissociative electron impact | Needed for detecting H2O, CO, CO2, and other volatile molecules relevant for volatile delivery studies. |
55
  | Infrared Observations | Wavelength: 2.5–4.8 μm | Wavelength: 1.5–4.8 μm | Tracks water emissions and CO2 lines in icy bodies and small planetesimals | Also allows detection of 3 μm absorption feature in icy bodies. |
56
-
57
  Ensure the response is structured clearly and the technical requirements table follows this format.
58
  """
59
 
@@ -62,13 +54,54 @@ def encode_text(text):
62
  outputs = bi_model(**inputs)
63
  return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
64
 
65
- def retrieve_relevant_context(user_input, context_texts):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  user_embedding = encode_text(user_input).reshape(1, -1)
67
- context_embeddings = np.array([encode_text(text) for text in context_texts])
68
- context_embeddings = context_embeddings.reshape(len(context_embeddings), -1)
69
- similarities = cosine_similarity(user_embedding, context_embeddings).flatten()
 
 
 
 
 
 
 
 
 
70
  most_relevant_idx = np.argmax(similarities)
71
- return context_texts[most_relevant_idx]
 
 
 
 
72
 
73
  def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
74
  # Define a prompt to ask GPT-4 to extract keywords and important terms
@@ -307,7 +340,7 @@ def gpt_response_to_dataframe(gpt_response):
307
 
308
  def chatbot(user_input, context="", subdomain="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
309
  if use_encoder and context:
310
- context_texts = context.split("\n")
311
  relevant_context = retrieve_relevant_context(user_input, context_texts)
312
  else:
313
  relevant_context = ""
@@ -389,7 +422,7 @@ iface = gr.Interface(
389
  gr.HTML(label="Miro"),
390
  gr.HTML(label="Generate Mind Map on Mapify")
391
  ],
392
- title="ExosAI - NASA SMD SCDD AI Assistant [version-0.8a]",
393
  description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",
394
  )
395
 
 
30
  # Define system message with instructions
31
  system_message = """
32
  You are ExosAI, a helpful assistant specializing in Exoplanet and Astrophysics research.
 
33
  Generate a detailed structured response based on the following science context and user input, including the necessary observables, physical parameters, and technical requirements for observations. The response should include the following sections:
 
34
  Science Objectives: Describe key scientific study objectives related to the science context and user input.
 
35
  Physical Parameters: Outline the physical parameters related to the science context and user input.
 
36
  Observables: Specify the observables related to the science context and user input.
 
37
  Description of Desired Observations: Detail the types of observations related to the science context and user input.
 
38
  Technical Requirements Table: Generate a table with the following columns:
39
+ - Requirements: The specific observational requirements (e.g., UV observations, Optical observations or Infrared observations).
40
  - Necessary: The necessary values or parameters (e.g., wavelength ranges, spatial resolution).
41
  - Desired: The desired values or parameters.
42
  - Justification: A scientific explanation of why these requirements are important.
43
  - Comments: Additional notes or remarks regarding each requirement.
 
44
  Example:
45
  | Requirements | Necessary | Desired | Justification | Comments |
46
  |----------------------------------|------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
47
  | UV Observations | Wavelength: 1200–2100 Å, 2500–3300 Å | Wavelength: 1200–3300 Å | Characterization of atomic and molecular emissions (H, C, O, S, etc.) from fluorescence and dissociative electron impact | Needed for detecting H2O, CO, CO2, and other volatile molecules relevant for volatile delivery studies. |
48
  | Infrared Observations | Wavelength: 2.5–4.8 μm | Wavelength: 1.5–4.8 μm | Tracks water emissions and CO2 lines in icy bodies and small planetesimals | Also allows detection of 3 μm absorption feature in icy bodies. |
 
49
  Ensure the response is structured clearly and the technical requirements table follows this format.
50
  """
51
 
 
54
  outputs = bi_model(**inputs)
55
  return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
56
 
57
+ def get_chunks(text, chunk_size=300):
58
+ """
59
+ Split a long piece of text into smaller chunks of approximately 'chunk_size' characters.
60
+ """
61
+ if not text.strip():
62
+ raise ValueError("The provided context is empty or blank.")
63
+
64
+ # Split the text into chunks of approximately 'chunk_size' characters
65
+ chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
66
+ return chunks
67
+
68
+ def retrieve_relevant_context(user_input, context_texts, chunk_size=300, similarity_threshold=0.3):
69
+ """
70
+ Split the context text into smaller chunks, find the most relevant chunk
71
+ using cosine similarity, and return the most relevant chunk.
72
+ If no chunk meets the similarity threshold, return a fallback message.
73
+ """
74
+ # Check if the context is empty or just whitespace
75
+ if not context_texts.strip():
76
+ return "Error: Context is empty or improperly formatted.", None
77
+
78
+ # Split the long context text into chunks using the chunking function
79
+ context_chunks = get_chunks(context_texts, chunk_size)
80
+
81
+ # Handle single context case
82
+ if len(context_chunks) == 1:
83
+ return context_chunks[0], 1.0 # Return the single chunk with perfect similarity
84
+
85
+ # Encode the user input to create a query embedding
86
  user_embedding = encode_text(user_input).reshape(1, -1)
87
+
88
+ # Encode all context chunks to create embeddings
89
+ chunk_embeddings = np.array([encode_text(chunk) for chunk in context_chunks])
90
+
91
+ # Compute cosine similarity between the user input and each chunk
92
+ similarities = cosine_similarity(user_embedding, chunk_embeddings).flatten()
93
+
94
+ # Check if any similarity scores are above the threshold
95
+ if max(similarities) < similarity_threshold:
96
+ return "No relevant context found for the user input.", None
97
+
98
+ # Identify the most relevant chunk based on the highest cosine similarity score
99
  most_relevant_idx = np.argmax(similarities)
100
+ most_relevant_chunk = context_chunks[most_relevant_idx]
101
+
102
+ # Return the most relevant chunk and the similarity score
103
+ return most_relevant_chunk
104
+
105
 
106
  def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
107
  # Define a prompt to ask GPT-4 to extract keywords and important terms
 
340
 
341
  def chatbot(user_input, context="", subdomain="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
342
  if use_encoder and context:
343
+ context_texts = context
344
  relevant_context = retrieve_relevant_context(user_input, context_texts)
345
  else:
346
  relevant_context = ""
 
422
  gr.HTML(label="Miro"),
423
  gr.HTML(label="Generate Mind Map on Mapify")
424
  ],
425
+ title="ExosAI - NASA SMD SCDD AI Assistant [version-0.9a]",
426
  description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",
427
  )
428