TheJimmy commited on
Commit
e1e1146
·
verified ·
1 Parent(s): 0a92c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -67
app.py CHANGED
@@ -1,68 +1,63 @@
1
- import streamlit as st
2
- from FlagEmbedding import BGEM3FlagModel
3
- from FlagEmbedding import FlagReranker
4
- import pandas as pd
5
- import numpy as np
6
-
7
- @st.cache_resource
8
- def load_model():
9
- return BGEM3FlagModel('BAAI/bge-m3',
10
- use_fp16=True)
11
- @st.cache_resource
12
- def load_reranker():
13
- return FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
14
-
15
- @st.cache_data
16
- def load_embed(path):
17
- embeddings_2 = np.load(path)
18
- return embeddings_2
19
-
20
- model = load_model()
21
- reranker = load_reranker()
22
-
23
- embeddings_2 = load_embed('D:/AI_Builder/BGE_embeddings_2.npy')
24
-
25
- data = pd.DataFrame(pd.read_csv('D:/AI_Builder/ActualProject/DataCollection/TESTUNCLEANbookquestions.csv'))
26
- data2 = pd.DataFrame(pd.read_csv('D:/AI_Builder/ActualProject/DataCollection/TRAINbookquestions.csv'))
27
- data3 = pd.read_csv("D:/AI_Builder/ActualProject/DataCollection/booksummaries.txt",
28
- header=None,sep="\t",
29
- names=["ID", "Freebase ID", "Book Name", "Book Author", "Pub date", "Genres", "Summary"])
30
- df = pd.concat([data, data2])
31
- df = df.merge(data3, on='ID', how='left')
32
- df = df.rename(columns={'Book Name_x': 'Book Name'})
33
- df = df[['ID', 'Book Name', 'Book Author', 'Questions', 'Summary']]
34
-
35
- st.header(":books: Book Identifier")
36
-
37
- k = 10
38
- with st.form(key='my_form'):
39
- sen1 = st.text_area("Book description:")
40
- submit_button = st.form_submit_button(label='Submit')
41
-
42
- if submit_button:
43
- embeddings_1 = model.encode(sen1,
44
- batch_size=12,
45
- max_length=8192,
46
- )['dense_vecs']
47
- similarity = embeddings_1 @ embeddings_2.T
48
-
49
- top_k_qs = []
50
- topk = np.argsort(similarity)[-k:]
51
-
52
- for t in topk:
53
- pred_sum = df['Summary'].iloc[t]
54
- pred_ques = sen1
55
- pred = [pred_ques, pred_sum]
56
- top_k_qs.append(pred)
57
- rrscore = reranker.compute_score(top_k_qs, normalize=True)
58
- rrscore_index = np.argsort(rrscore)
59
-
60
- pred_book = []
61
- for rr in rrscore_index:
62
- pred_book.append(f"{df['Book Name'][topk[rr]]} by {df['Book Author'][topk[rr]]}")
63
-
64
- finalpred = []
65
- pred_book.reverse()
66
- st.write("Here is your prediction")
67
- for n, pred in enumerate(pred_book):
68
  st.write(f"{n+1}: {pred}")
 
1
+ import streamlit as st
2
+ from FlagEmbedding import BGEM3FlagModel
3
+ from FlagEmbedding import FlagReranker
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ @st.cache_resource
8
+ def load_model():
9
+ return BGEM3FlagModel('BAAI/bge-m3',
10
+ use_fp16=True)
11
+ @st.cache_resource
12
+ def load_reranker():
13
+ return FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
14
+
15
+ @st.cache_data
16
+ def load_df(path):
17
+ df = pd.read_csv(path)
18
+ return df
19
+ @st.cache_data
20
+ def load_embed(path):
21
+ embeddings_2 = np.load(path)
22
+ return embeddings_2
23
+
24
+ model = load_model()
25
+ reranker = load_reranker()
26
+
27
+ df = load_df('D:/AI_Builder/BookDataFrame.csv')
28
+ embeddings_2 = load_embed('D:/AI_Builder/BGE_embeddings_2.npy')
29
+
30
+ st.header(":books: Book Identifier")
31
+
32
+ k = 10
33
+ with st.form(key='my_form'):
34
+ sen1 = st.text_area("Book description:")
35
+ submit_button = st.form_submit_button(label='Submit')
36
+
37
+ if submit_button:
38
+ embeddings_1 = model.encode(sen1,
39
+ batch_size=12,
40
+ max_length=8192,
41
+ )['dense_vecs']
42
+ similarity = embeddings_1 @ embeddings_2.T
43
+
44
+ top_k_qs = []
45
+ topk = np.argsort(similarity)[-k:]
46
+
47
+ for t in topk:
48
+ pred_sum = df['Summary'].iloc[t]
49
+ pred_ques = sen1
50
+ pred = [pred_ques, pred_sum]
51
+ top_k_qs.append(pred)
52
+ rrscore = reranker.compute_score(top_k_qs, normalize=True)
53
+ rrscore_index = np.argsort(rrscore)
54
+
55
+ pred_book = []
56
+ for rr in rrscore_index:
57
+ pred_book.append(f"{df['Book Name'][topk[rr]]} by {df['Book Author'][topk[rr]]}")
58
+
59
+ finalpred = []
60
+ pred_book.reverse()
61
+ st.write("Here is your prediction")
62
+ for n, pred in enumerate(pred_book):
 
 
 
 
 
63
  st.write(f"{n+1}: {pred}")