Spaces:
Sleeping
Sleeping
import streamlit as st | |
from huggingface_hub import InferenceClient | |
import requests | |
from bs4 import BeautifulSoup | |
import os | |
import pickle | |
import re | |
from requests.exceptions import HTTPError | |
base_url = "https://api-inference.huggingface.co/models/" | |
API_KEY = os.environ.get('HUGGINGFACE_API_KEY') | |
model_links = { | |
"InsiderInvest📈": base_url + "mistralai/Mistral-7B-Instruct-v0.2", | |
} | |
model_info = { | |
"InsiderInvest📈": { | |
'description': """The InsiderInvest model is a **Large Language Model (LLM)** that's able to predict the success potential of Indian startups based on various factors as a Sucess full startup Founder.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights to th Inevstor.\n""", | |
'logo': './insider.jpg' | |
}, | |
} | |
def format_prompt(message, conversation_history, startup_details, custom_instructions=None): | |
prompt = "" | |
if custom_instructions: | |
prompt += f"[INST] {custom_instructions} [/INST]\n" | |
# Add conversation history to the prompt | |
prompt += "[CONV_HISTORY]\n" | |
for role, content in conversation_history: | |
prompt += f"{role.upper()}: {content}\n" | |
prompt += "[/CONV_HISTORY]\n" | |
# Add the startup details to the prompt | |
prompt += "[STARTUP_DETAILS]\n" | |
for key, value in startup_details.items(): | |
if key == "funding_rounds": | |
prompt += f"{key.capitalize()}:\n" | |
for round_details in value: | |
prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n" | |
else: | |
prompt += f"{key.capitalize()}: {value}\n" | |
prompt += "[/STARTUP_DETAILS]\n" | |
# Add the current message | |
prompt += f"[INST] {message} [/INST]\n" | |
# Add the response format | |
prompt += "[RESPONSE]\n" | |
return prompt | |
def reset_conversation(): | |
''' | |
Resets Conversation | |
''' | |
st.session_state.conversation = [] | |
st.session_state.messages = [] | |
st.session_state.chat_state = "reset" | |
def load_conversation_history(): | |
history_file = "conversation_history.pickle" | |
if os.path.exists(history_file): | |
with open(history_file, "rb") as f: | |
conversation_history = pickle.load(f) | |
else: | |
conversation_history = [] | |
return conversation_history | |
def save_conversation_history(conversation_history): | |
history_file = "conversation_history.pickle" | |
with open(history_file, "wb") as f: | |
pickle.dump(conversation_history, f) | |
def scrape_startup_info(startup_name): | |
startup_details = {} | |
# Scrape from Wikipedia | |
try: | |
wiki_url = f"https://en.wikipedia.org/wiki/{startup_name.replace(' ', '_')}" | |
response = requests.get(wiki_url) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.content, "html.parser") | |
# Extract founded year | |
founded_year_elem = soup.select_one("th:contains('Founded') + td") | |
if founded_year_elem: | |
founded_year = int(founded_year_elem.text.strip()) | |
startup_details["founded_year"] = founded_year | |
# Extract industry | |
industry_elem = soup.select_one("th:contains('Industry') + td") | |
if industry_elem: | |
startup_details["industry"] = industry_elem.text.strip() | |
# Extract team members | |
team_members_elem = soup.select("th:contains('Founder') + td a") | |
team_members = [member.text.strip() for member in team_members_elem] | |
startup_details["team_members"] = team_members | |
# Extract funding rounds (if available) | |
funding_rounds_elem = soup.select("th:contains('Funding') + td") | |
if funding_rounds_elem: | |
funding_rounds = [] | |
for round_elem in funding_rounds_elem: | |
round_details = {} | |
round_type = round_elem.find(text=lambda text: "round" in text.lower()) | |
if round_type: | |
round_details["type"] = round_type.strip() | |
round_amount = round_elem.find(text=lambda text: "$" in text) | |
if round_amount: | |
round_details["amount"] = round_amount.strip() | |
funding_rounds.append(round_details) | |
startup_details["funding_rounds"] = funding_rounds | |
# Extract user growth (if available) | |
user_growth_elem = soup.select_one("th:contains('Users') + td") | |
if user_growth_elem: | |
startup_details["user_growth"] = user_growth_elem.text.strip() | |
startup_details["name"] = startup_name | |
return startup_details | |
except Exception as e: | |
st.error(f"Error scraping startup information: {e}") | |
return {} | |
models = [key for key in model_links.keys()] | |
selected_model = st.sidebar.selectbox("Select Model", models) | |
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5) | |
st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button | |
st.sidebar.write(f"You're now chatting with **{selected_model}**") | |
st.sidebar.markdown(model_info[selected_model]['description']) | |
st.sidebar.image(model_info[selected_model]['logo']) | |
if "prev_option" not in st.session_state: | |
st.session_state.prev_option = selected_model | |
if st.session_state.prev_option != selected_model: | |
st.session_state.messages = [] | |
st.session_state.prev_option = selected_model | |
if "chat_state" not in st.session_state: | |
st.session_state.chat_state = "normal" | |
if "messages" not in st.session_state: | |
st.session_state.messages = load_conversation_history() | |
repo_id = model_links[selected_model] | |
st.subheader(f'{selected_model}') | |
if st.session_state.chat_state == "normal": | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"): | |
if "predict success of" in prompt.lower(): | |
# Extract the startup name from the prompt | |
startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE) | |
if startup_name_match: | |
startup_name = startup_name_match.group(1).strip() | |
startup_details = scrape_startup_info(startup_name) | |
if startup_details: | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages] | |
custom_instruction = "Act as a knowledgeable advisor to provide valuable insights and information for investors interested in startups. Help investors discover new promising startups, analyze their financial performance and funding details, evaluate investment opportunities, and offer guidance on the investment process." | |
formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction) | |
with st.chat_message("assistant"): | |
client = InferenceClient(model=model_links[selected_model]) | |
max_new_tokens = 3000 # Adjust this value as needed | |
try: | |
output = client.text_generation( | |
formatted_text, | |
temperature=temp_values, | |
max_new_tokens=max_new_tokens, | |
stream=True | |
) | |
response = "" | |
for output_chunk in output: | |
if isinstance(output_chunk, dict) and "text" in output_chunk: | |
response += output_chunk["text"] | |
else: | |
response += output_chunk # Handle the case where output_chunk might be a string | |
st.markdown(f"**Success Analysis for {startup_details['name']}**\n\n{response}") | |
except ValueError as e: | |
if "Input validation error" in str(e): | |
st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
else: | |
st.error(f"An error occurred: {e}") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
else: | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
save_conversation_history(st.session_state.messages) | |
else: | |
st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.") | |
else: | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages] | |
formatted_text = format_prompt(prompt, conversation_history, {}) | |
with st.chat_message("assistant"): | |
client = InferenceClient(model=model_links[selected_model]) | |
max_new_tokens = 3000 # Adjust this value as needed | |
try: | |
output = client.text_generation( | |
formatted_text, | |
temperature=temp_values, | |
max_new_tokens=max_new_tokens, | |
stream=True | |
) | |
response = "" | |
for output_chunk in output: | |
if isinstance(output_chunk, dict) and "text" in output_chunk: | |
response += output_chunk["text"] | |
else: | |
response += output_chunk # Handle the case where output_chunk might be a string | |
st.markdown(response) | |
except ValueError as e: | |
if "Input validation error" in str(e): | |
st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
else: | |
st.error(f"An error occurred: {e}") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
else: | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
save_conversation_history(st.session_state.messages) | |
elif st.session_state.chat_state == "reset": | |
st.session_state.chat_state = "normal" | |
st.experimental_rerun() |