Spaces:
Sleeping
Sleeping
justalphie
commited on
Commit
·
9a5450e
1
Parent(s):
3b56fe8
Add app.py
Browse files- app.py +56 -0
- scraping_agenda.py +2 -19
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import cohere
|
4 |
+
|
5 |
+
|
6 |
+
COHERE_KEY = os.getenv('COHERE_KEY')
|
7 |
+
co = cohere.Client(COHERE_KEY)
|
8 |
+
|
9 |
+
#list_history = [["question", "answer"], ["how", "how what..."]]
|
10 |
+
def convert_history(list_history):
|
11 |
+
"""
|
12 |
+
Applies the prompt.
|
13 |
+
Converts the chat history structure taken by Gradio to the structure suitable for Cohere.
|
14 |
+
"""
|
15 |
+
chat_history = [
|
16 |
+
{"role": "SYSTEM", "text": open("prompt.md","r",encoding="UTF-8").read()}
|
17 |
+
]
|
18 |
+
for item in list_history:
|
19 |
+
dict_chat = {"role": "USER", "text": item[0]}
|
20 |
+
chat_history.append(dict_chat)
|
21 |
+
dict_chat = {"role": "CHATBOT", "text": item[1]}
|
22 |
+
chat_history.append(dict_chat)
|
23 |
+
return chat_history
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
def reply(message:str, history:list):
|
29 |
+
"""
|
30 |
+
Takes the input message of the user and chat history and streams the reply of the chatbot.
|
31 |
+
"""
|
32 |
+
chat_history = convert_history(history)
|
33 |
+
response = co.chat_stream(
|
34 |
+
message=message,
|
35 |
+
chat_history=chat_history,
|
36 |
+
model="command-nightly",
|
37 |
+
temperature=0.25
|
38 |
+
)
|
39 |
+
text_so_far = ""
|
40 |
+
for event in response:
|
41 |
+
if event.event_type == 'text-generation':
|
42 |
+
text_so_far += event.text
|
43 |
+
yield text_so_far
|
44 |
+
|
45 |
+
|
46 |
+
description = """
|
47 |
+
Hello!
|
48 |
+
💬 Use the text box below to ask questions the Technorama 2024 conference!
|
49 |
+
<nobr> 🗣️ Talk to me in English, Dutch, or French. </nobr>
|
50 |
+
<nobr> 🔗 [This chatbot was made by Alfiya Khabibullina](https://www.linkedin.com/in/alfiya-khabibullina-7b13131b8/) </nobr>
|
51 |
+
"""
|
52 |
+
|
53 |
+
gr.ChatInterface(reply,
|
54 |
+
title="Technorama Assistant",
|
55 |
+
description=description
|
56 |
+
).launch()
|
scraping_agenda.py
CHANGED
@@ -1,30 +1,13 @@
|
|
1 |
import playwright
|
|
|
2 |
from playwright.sync_api import sync_playwright
|
3 |
import json
|
4 |
-
|
5 |
|
6 |
|
7 |
url = "https://techorama.be/agenda/"
|
8 |
selector_links = "div.m-subject__container-inner.a-box-simple__inner-2 > a"
|
9 |
|
10 |
-
with sync_playwright() as p:
|
11 |
-
browser = p.chromium.launch(headless=True)
|
12 |
-
page = browser.new_page()
|
13 |
-
page.goto(url)
|
14 |
-
page.wait_for_timeout(3000)
|
15 |
-
links_elements = page.query_selector_all(selector_links)
|
16 |
-
links = [element.get_property("href").json_value() for element in links_elements] # .get_attribute("href")
|
17 |
-
filepath = "links_tue.csv"
|
18 |
-
#with open(filepath, "w") as f:
|
19 |
-
#f.write("\n".join(links))
|
20 |
-
page.locator("button:has-text(\"wednesday\")").click()
|
21 |
-
links_elements = page.query_selector_all(selector_links)
|
22 |
-
links = [element.get_property("href").json_value() for element in links_elements] # .get_attribute("href")
|
23 |
-
filepath = "links_wed.csv"
|
24 |
-
#with open(filepath, "w") as f:
|
25 |
-
#f.write("\n".join(links))
|
26 |
-
|
27 |
-
browser.close()
|
28 |
|
29 |
#TODO
|
30 |
#1 save the structure [{"time":"...", "activities":[{"name_of_activity":"...", "speaker_name": "...", general_topic": "topic name", "room_number":"..". "url":"...", "date":"...", "time":"time"}]}
|
|
|
1 |
import playwright
|
2 |
+
import playwright.sync_api
|
3 |
from playwright.sync_api import sync_playwright
|
4 |
import json
|
5 |
+
|
6 |
|
7 |
|
8 |
url = "https://techorama.be/agenda/"
|
9 |
selector_links = "div.m-subject__container-inner.a-box-simple__inner-2 > a"
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
#TODO
|
13 |
#1 save the structure [{"time":"...", "activities":[{"name_of_activity":"...", "speaker_name": "...", general_topic": "topic name", "room_number":"..". "url":"...", "date":"...", "time":"time"}]}
|