[email protected] commited on
Commit
2c7b20a
·
1 Parent(s): 794b78a

test source eval model

Browse files
Files changed (1) hide show
  1. app.py +47 -46
app.py CHANGED
@@ -3,54 +3,55 @@ import requests
3
  from bs4 import BeautifulSoup
4
  from bs4.element import Comment
5
  from transformers import pipeline
6
-
7
- def tag_visible(element):
8
- if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
9
- return False
10
- if isinstance(element, Comment):
11
- return False
12
- return True
13
-
14
-
15
- def getTalkPage(wiki_page):
16
- wiki_page = "https://en.wikipedia.org/"
17
- if "wikipedia.org" in wiki_page:
18
- response = requests.get(wiki_page)
19
- soup = BeautifulSoup(response.content, 'html.parser')
20
-
21
- talk_url = soup.find_all("a", {"rel": "discussion"})
22
- if len(talk_url) > 0:
23
- talk_url = talk_url[0]["href"]
24
- try:
25
- talk_response = requests.get("https://en.wikipedia.org" + talk_url)
26
- talk_soup = BeautifulSoup(talk_response.content, 'html.parser')
27
- talk_texts = talk_soup.findAll(text=True)
28
- visible_texts = filter(tag_visible, talk_texts)
29
- return u" ".join(t.strip() for t in visible_texts)
30
-
31
- except Exception as error:
32
- print('Error occured: {}'.format(error))
33
-
34
- classifier = pipeline(model="amitkayal/bert-finetuned-sem_eval-english", top_k=None)
35
-
36
- def tone_talkpage(url):
37
- talk_content = getTalkPage(url)
38
- tone_labels = {'anger': 0, 'anticipation': 0, 'disgust': 0, 'fear': 0, 'joy': 0, 'love': 0, 'optimism': 0, 'pessimism': 0, 'sadness': 0, 'surprise': 0, 'trust': 0}
39
- if talk_content:
40
- breakdown = talk_content.split()
41
- n = 200 #because the max amount of sequence length is 512
42
- breakdown_lst = [' '.join(breakdown[i:i+n]) for i in range(0,len(talk_content),n)]
43
- for ele in breakdown_lst:
44
- res = classifier(ele)[0]
45
- for tone_res in res:
46
- tone_labels[tone_res["label"]] += tone_res["score"]
 
47
 
48
- lst_len = len(breakdown_lst)
49
- for key, val in tone_labels.items():
50
- tone_labels[key] = val/lst_len
51
 
52
- return tone_labels
53
 
54
 
55
- iface = gr.Interface(fn=tone_talkpage, inputs="text", outputs="text")
56
  iface.launch()
 
3
  from bs4 import BeautifulSoup
4
  from bs4.element import Comment
5
  from transformers import pipeline
6
+ from source_eval_model.source_eval_model import check_source_quality
7
+
8
+ # def tag_visible(element):
9
+ # if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
10
+ # return False
11
+ # if isinstance(element, Comment):
12
+ # return False
13
+ # return True
14
+
15
+
16
+ # def getTalkPage(wiki_page):
17
+ # wiki_page = "https://en.wikipedia.org/"
18
+ # if "wikipedia.org" in wiki_page:
19
+ # response = requests.get(wiki_page)
20
+ # soup = BeautifulSoup(response.content, 'html.parser')
21
+
22
+ # talk_url = soup.find_all("a", {"rel": "discussion"})
23
+ # if len(talk_url) > 0:
24
+ # talk_url = talk_url[0]["href"]
25
+ # try:
26
+ # talk_response = requests.get("https://en.wikipedia.org" + talk_url)
27
+ # talk_soup = BeautifulSoup(talk_response.content, 'html.parser')
28
+ # talk_texts = talk_soup.findAll(text=True)
29
+ # visible_texts = filter(tag_visible, talk_texts)
30
+ # return u" ".join(t.strip() for t in visible_texts)
31
+
32
+ # except Exception as error:
33
+ # print('Error occured: {}'.format(error))
34
+
35
+ # classifier = pipeline(model="amitkayal/bert-finetuned-sem_eval-english", top_k=None)
36
+
37
+ # def tone_talkpage(url):
38
+ # talk_content = getTalkPage(url)
39
+ # tone_labels = {'anger': 0, 'anticipation': 0, 'disgust': 0, 'fear': 0, 'joy': 0, 'love': 0, 'optimism': 0, 'pessimism': 0, 'sadness': 0, 'surprise': 0, 'trust': 0}
40
+ # if talk_content:
41
+ # breakdown = talk_content.split()
42
+ # n = 200 #because the max amount of sequence length is 512
43
+ # breakdown_lst = [' '.join(breakdown[i:i+n]) for i in range(0,len(talk_content),n)]
44
+ # for ele in breakdown_lst:
45
+ # res = classifier(ele)[0]
46
+ # for tone_res in res:
47
+ # tone_labels[tone_res["label"]] += tone_res["score"]
48
 
49
+ # lst_len = len(breakdown_lst)
50
+ # for key, val in tone_labels.items():
51
+ # tone_labels[key] = val/lst_len
52
 
53
+ # return tone_labels
54
 
55
 
56
+ iface = gr.Interface(fn=check_source_quality, inputs="text", outputs="text")
57
  iface.launch()