File size: 11,134 Bytes
12fd3f8
 
 
 
 
 
dcc010b
12fd3f8
 
 
 
 
 
b9d7f47
12fd3f8
 
 
b9d7f47
 
 
12fd3f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcc010b
12fd3f8
 
dcc010b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12fd3f8
 
 
dcc010b
12fd3f8
dcc010b
 
12fd3f8
dcc010b
12fd3f8
 
dcc010b
 
 
 
 
 
 
 
 
 
 
12fd3f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9d7f47
 
12fd3f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import streamlit as st
from huggingface_hub import InferenceClient
import requests
from bs4 import BeautifulSoup
import os
import pickle
import re
from requests.exceptions import HTTPError

base_url = "https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HUGGINGFACE_API_KEY')

model_links = {
    "InsiderInvest📈": base_url + "mistralai/Mistral-7B-Instruct-v0.2",
}

model_info = {
    "InsiderInvest📈": {
        'description': """The InsiderInvest model is a **Large Language Model (LLM)** that's able to predict the success potential of Indian startups based on various factors as a Sucess full startup Founder.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights to th Inevstor.\n""",
        'logo': './insider.jpg'
    },
}

def format_prompt(message, conversation_history, startup_details, custom_instructions=None):
    prompt = ""
    if custom_instructions:
        prompt += f"[INST] {custom_instructions} [/INST]\n"
    
    # Add conversation history to the prompt
    prompt += "[CONV_HISTORY]\n"
    for role, content in conversation_history:
        prompt += f"{role.upper()}: {content}\n"
    prompt += "[/CONV_HISTORY]\n"
    
    # Add the startup details to the prompt
    prompt += "[STARTUP_DETAILS]\n"
    for key, value in startup_details.items():
        if key == "funding_rounds":
            prompt += f"{key.capitalize()}:\n"
            for round_details in value:
                prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n"
        else:
            prompt += f"{key.capitalize()}: {value}\n"
    prompt += "[/STARTUP_DETAILS]\n"
    
    # Add the current message
    prompt += f"[INST] {message} [/INST]\n"
    
    # Add the response format
    prompt += "[RESPONSE]\n"
    
    return prompt

def reset_conversation():
    '''
    Resets Conversation
    '''
    st.session_state.conversation = []
    st.session_state.messages = []
    st.session_state.chat_state = "reset"

def load_conversation_history():
    history_file = "conversation_history.pickle"
    if os.path.exists(history_file):
        with open(history_file, "rb") as f:
            conversation_history = pickle.load(f)
    else:
        conversation_history = []
    return conversation_history

def save_conversation_history(conversation_history):
    history_file = "conversation_history.pickle"
    with open(history_file, "wb") as f:
        pickle.dump(conversation_history, f)

def scrape_startup_info(startup_name):
    startup_details = {}

    # Scrape from Wikipedia
    try:
        wiki_url = f"https://en.wikipedia.org/wiki/{startup_name.replace(' ', '_')}"
        response = requests.get(wiki_url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")

            # Extract founded year
            founded_year_elem = soup.select_one("th:contains('Founded') + td")
            if founded_year_elem:
                founded_year = int(founded_year_elem.text.strip())
                startup_details["founded_year"] = founded_year

            # Extract industry
            industry_elem = soup.select_one("th:contains('Industry') + td")
            if industry_elem:
                startup_details["industry"] = industry_elem.text.strip()

            # Extract team members
            team_members_elem = soup.select("th:contains('Founder') + td a")
            team_members = [member.text.strip() for member in team_members_elem]
            startup_details["team_members"] = team_members

            # Extract funding rounds (if available)
            funding_rounds_elem = soup.select("th:contains('Funding') + td")
            if funding_rounds_elem:
                funding_rounds = []
                for round_elem in funding_rounds_elem:
                    round_details = {}
                    round_type = round_elem.find(text=lambda text: "round" in text.lower())
                    if round_type:
                        round_details["type"] = round_type.strip()
                    round_amount = round_elem.find(text=lambda text: "$" in text)
                    if round_amount:
                        round_details["amount"] = round_amount.strip()
                    funding_rounds.append(round_details)
                startup_details["funding_rounds"] = funding_rounds

            # Extract user growth (if available)
            user_growth_elem = soup.select_one("th:contains('Users') + td")
            if user_growth_elem:
                startup_details["user_growth"] = user_growth_elem.text.strip()

            startup_details["name"] = startup_name
            return startup_details
    except Exception as e:
        st.error(f"Error scraping startup information: {e}")
        return {}

models = [key for key in model_links.keys()]
selected_model = st.sidebar.selectbox("Select Model", models)
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
st.sidebar.button('Reset Chat', on_click=reset_conversation)  # Reset button

st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])

if "prev_option" not in st.session_state:
    st.session_state.prev_option = selected_model

if st.session_state.prev_option != selected_model:
    st.session_state.messages = []
    st.session_state.prev_option = selected_model

if "chat_state" not in st.session_state:
    st.session_state.chat_state = "normal"

if "messages" not in st.session_state:
    st.session_state.messages = load_conversation_history()

repo_id = model_links[selected_model]
st.subheader(f'{selected_model}')

if st.session_state.chat_state == "normal":
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):
        if "predict success of" in prompt.lower():
            # Extract the startup name from the prompt
            startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
            if startup_name_match:
                startup_name = startup_name_match.group(1).strip()
                startup_details = scrape_startup_info(startup_name)
                if startup_details:
                    with st.chat_message("user"):
                        st.markdown(prompt)

                    st.session_state.messages.append({"role": "user", "content": prompt})
                    conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
                    custom_instruction = "Act as a knowledgeable advisor to provide valuable insights and information for investors interested in startups. Help investors discover new promising startups, analyze their financial performance and funding details, evaluate investment opportunities, and offer guidance on the investment process."


                    formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction)

                    with st.chat_message("assistant"):
                        client = InferenceClient(model=model_links[selected_model])
                        max_new_tokens = 3000  # Adjust this value as needed
                        try:
                            output = client.text_generation(
                                formatted_text,
                                temperature=temp_values,
                                max_new_tokens=max_new_tokens,
                                stream=True
                            )
                            response = ""
                            for output_chunk in output:
                                if isinstance(output_chunk, dict) and "text" in output_chunk:
                                    response += output_chunk["text"]
                                else:
                                    response += output_chunk  # Handle the case where output_chunk might be a string
                            st.markdown(f"**Success Analysis for {startup_details['name']}**\n\n{response}")
                        except ValueError as e:
                            if "Input validation error" in str(e):
                                st.error("Error: The input prompt is too long. Please try a shorter prompt.")
                            else:
                                st.error(f"An error occurred: {e}")
                        except Exception as e:
                            st.error(f"An unexpected error occurred: {e}")
                        else:
                            st.session_state.messages.append({"role": "assistant", "content": response})
                            save_conversation_history(st.session_state.messages)
                else:
                    st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.")
        else:
            with st.chat_message("user"):
                st.markdown(prompt)

            st.session_state.messages.append({"role": "user", "content": prompt})
            conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]

            formatted_text = format_prompt(prompt, conversation_history, {})

            with st.chat_message("assistant"):
                client = InferenceClient(model=model_links[selected_model])
                max_new_tokens = 3000  # Adjust this value as needed
                try:
                    output = client.text_generation(
                        formatted_text,
                        temperature=temp_values,
                        max_new_tokens=max_new_tokens,
                        stream=True
                    )
                    response = ""
                    for output_chunk in output:
                        if isinstance(output_chunk, dict) and "text" in output_chunk:
                            response += output_chunk["text"]
                        else:
                            response += output_chunk  # Handle the case where output_chunk might be a string
                    st.markdown(response)
                except ValueError as e:
                    if "Input validation error" in str(e):
                        st.error("Error: The input prompt is too long. Please try a shorter prompt.")
                    else:
                        st.error(f"An error occurred: {e}")
                except Exception as e:
                    st.error(f"An unexpected error occurred: {e}")
                else:
                    st.session_state.messages.append({"role": "assistant", "content": response})
                    save_conversation_history(st.session_state.messages)

elif st.session_state.chat_state == "reset":
    st.session_state.chat_state = "normal"
    st.experimental_rerun()