Spaces:
Sleeping
Sleeping
Add untrue information detector
Browse files
main.py
CHANGED
|
@@ -11,12 +11,18 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
| 11 |
def load_models():
|
| 12 |
st.session_state.loaded = True
|
| 13 |
|
| 14 |
-
with open(
|
| 15 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
|
| 18 |
st.session_state.gpt_detector = pickle.load(f)
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
st.session_state.bert = pipeline(task="text-classification",
|
| 21 |
model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'], num_labels=2),
|
| 22 |
tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
|
|
@@ -38,9 +44,15 @@ def switch_lang(lang):
|
|
| 38 |
if 'lang' not in st.session_state:
|
| 39 |
st.session_state.lang = 'bg'
|
| 40 |
|
| 41 |
-
if 'gpt_detector_result' not in st.session_state
|
|
|
|
|
|
|
| 42 |
st.session_state.gpt_detector_result = ''
|
| 43 |
st.session_state.gpt_detector_probability = [1, 0]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
st.session_state.bert_result = [{'label': '', 'score': 1}]
|
| 45 |
|
| 46 |
content = load_content()
|
|
@@ -78,10 +90,14 @@ if st.session_state.agree:
|
|
| 78 |
content['text_placeholder'][st.session_state.lang]).strip('\n')
|
| 79 |
|
| 80 |
if st.button(content['analyze_button'][st.session_state.lang]):
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
st.session_state.
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
st.session_state.bert_result = st.session_state.bert(user_input)
|
| 86 |
|
| 87 |
if st.session_state.gpt_detector_result == 1:
|
|
@@ -92,6 +108,15 @@ if st.session_state.agree:
|
|
| 92 |
st.success(content['gpt_getect_no'][st.session_state.lang] +
|
| 93 |
str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
|
| 94 |
content['gpt_no_proba'][st.session_state.lang], icon="✅")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
if st.session_state.bert_result[0]['label'] == 'LABEL_1':
|
| 97 |
st.warning(content['bert_yes_1'][st.session_state.lang] +
|
|
|
|
| 11 |
def load_models():
|
| 12 |
st.session_state.loaded = True
|
| 13 |
|
| 14 |
+
with open('models/tfidf_vectorizer_svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
|
| 15 |
+
st.session_state.tfidf_vectorizer_disinformation = pickle.load(f)
|
| 16 |
+
|
| 17 |
+
with open('models/tfidf_vectorizer_untrue_inform_detection_tfidf_bg_0.96_F1_score', 'rb') as f:
|
| 18 |
+
st.session_state.tfidf_vectorizer_untrue_inf = pickle.load(f)
|
| 19 |
|
| 20 |
with open('models/svm_model_2_classes_gpt_chatgpt_detection_tfidf_bg_0.886_F1_score.pkl', 'rb') as f:
|
| 21 |
st.session_state.gpt_detector = pickle.load(f)
|
| 22 |
|
| 23 |
+
with open('models/SVM_model_untrue_inform_detection_tfidf_bg_0.96_F1_score.pkl', 'rb') as f:
|
| 24 |
+
st.session_state.untrue_detector = pickle.load(f)
|
| 25 |
+
|
| 26 |
st.session_state.bert = pipeline(task="text-classification",
|
| 27 |
model=BertForSequenceClassification.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN'], num_labels=2),
|
| 28 |
tokenizer=AutoTokenizer.from_pretrained("TRACES/private-bert", use_auth_token=os.environ['ACCESS_TOKEN']))
|
|
|
|
| 44 |
if 'lang' not in st.session_state:
|
| 45 |
st.session_state.lang = 'bg'
|
| 46 |
|
| 47 |
+
if 'gpt_detector_result' not in st.session_state \
|
| 48 |
+
and 'untrue_detector_result' not in st.session_state and \
|
| 49 |
+
and 'bert_result' not in st.session_state:
|
| 50 |
st.session_state.gpt_detector_result = ''
|
| 51 |
st.session_state.gpt_detector_probability = [1, 0]
|
| 52 |
+
|
| 53 |
+
st.session_state.untrue_detector_result = ''
|
| 54 |
+
st.session_state.untrue_detector_probability = [1, 0]
|
| 55 |
+
|
| 56 |
st.session_state.bert_result = [{'label': '', 'score': 1}]
|
| 57 |
|
| 58 |
content = load_content()
|
|
|
|
| 90 |
content['text_placeholder'][st.session_state.lang]).strip('\n')
|
| 91 |
|
| 92 |
if st.button(content['analyze_button'][st.session_state.lang]):
|
| 93 |
+
user_tfidf_disinformation = st.session_state.tfidf_vectorizer_disinformation.transform([user_input])
|
| 94 |
+
st.session_state.gpt_detector_result = st.session_state.gpt_detector.predict(user_tfidf_disinformation)[0]
|
| 95 |
+
st.session_state.gpt_detector_probability = st.session_state.gpt_detector.predict_proba(user_tfidf_disinformation)[0]
|
| 96 |
+
|
| 97 |
+
user_tfidf_untrue_inf = st.session_state.tfidf_vectorizer_untrue_inf.transform([user_input])
|
| 98 |
+
st.session_state.untrue_detector_result = st.session_state.untrue_detector.predict(user_tfidf_untrue_inf)[0]
|
| 99 |
+
st.session_state.untrue_detector_probability = st.session_state.untrue_detector.predict_proba(user_tfidf_untrue_inf)[0]
|
| 100 |
+
|
| 101 |
st.session_state.bert_result = st.session_state.bert(user_input)
|
| 102 |
|
| 103 |
if st.session_state.gpt_detector_result == 1:
|
|
|
|
| 108 |
st.success(content['gpt_getect_no'][st.session_state.lang] +
|
| 109 |
str(round(st.session_state.gpt_detector_probability[0] * 100, 2)) +
|
| 110 |
content['gpt_no_proba'][st.session_state.lang], icon="✅")
|
| 111 |
+
|
| 112 |
+
if st.session_state.untrue_detector_result == 1:
|
| 113 |
+
st.warning(content['untrue_getect_yes'][st.session_state.lang] +
|
| 114 |
+
str(round(st.session_state.untrue_detector_probability[1] * 100, 2)) +
|
| 115 |
+
content['untrue_yes_proba'][st.session_state.lang], icon="⚠️")
|
| 116 |
+
else:
|
| 117 |
+
st.success(content['untrue_getect_no'][st.session_state.lang] +
|
| 118 |
+
str(round(st.session_state.untrue_detector_probability[0] * 100, 2)) +
|
| 119 |
+
content['untrue_no_proba'][st.session_state.lang], icon="✅")
|
| 120 |
|
| 121 |
if st.session_state.bert_result[0]['label'] == 'LABEL_1':
|
| 122 |
st.warning(content['bert_yes_1'][st.session_state.lang] +
|