MahmoudNasser commited on
Commit
0901162
·
1 Parent(s): 87943e2

Upload 8 files

Browse files
Files changed (8) hide show
  1. Classes.py +159 -0
  2. Cleaning.py +84 -0
  3. Database.py +68 -0
  4. main.py +47 -0
  5. models.py +43 -0
  6. requirements.txt +18 -0
  7. schemas.py +19 -0
  8. test.py +9 -0
Classes.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from firebase_admin import credentials
2
+ from firebase_admin import firestore
3
+ import threading
4
+ from firebase_admin import db
5
+ # Import the Firebase Admin SDK
6
+ import firebase_admin
7
+ db = firestore.client()
8
+
9
+ class Shakwa(object):
10
+ def __init__(self, address, complaintbody, date, governorate, id,organization,summary,title,userid):
11
+ self.address = address
12
+ self.complaintbody = complaintbody
13
+ self.date = date
14
+ self.governorate = governorate
15
+ self.id = id
16
+ self.organization = organization
17
+ self.summary = summary
18
+ self.title = title
19
+ self.userid = userid
20
+
21
+
22
+ # Get the user data from Firestore
23
+ def get_data(self):
24
+ # Get a document reference with the user's email as the ID
25
+ doc_ref = db.collection('complaints').document(self.id)
26
+ # Get the document snapshot
27
+ doc = doc_ref.get()
28
+ # Check if the document exists
29
+ if doc.exists:
30
+ # Return the document data as a User object
31
+ return Shakwa.from_dict(doc.to_dict())
32
+ else:
33
+ # Return None if the document does not exist
34
+ return None
35
+
36
+ # Convert a dictionary to a User object
37
+ @staticmethod
38
+ def from_dict(source):
39
+ # Check if the source is a valid dictionary
40
+ if not isinstance(source, dict):
41
+ raise ValueError('Source is not a dictionary')
42
+ # Create a User object with the source values
43
+ shakwa = Shakwa(
44
+ source['address'],
45
+ source['complaintbody'],
46
+ source['date'],
47
+ source['governorate'],
48
+ source['organization'],
49
+ source['summary'],
50
+ source['title'],
51
+ source['userid'],
52
+ source['id']
53
+ )
54
+ # Return the User object
55
+ return shakwa
56
+
57
+ # Convert a User object to a dictionary
58
+ def to_dict(self):
59
+ # Create a dictionary with the user's attributes
60
+ dest = {
61
+ 'address': self.address,
62
+ 'complaintbody': self.complaintbody,
63
+ 'date': self.date,
64
+ 'governorate': self.governorate,
65
+ 'organization':self.organization,
66
+ 'summary': self.summary,
67
+ 'title': self.title,
68
+ 'userid': self.userid,
69
+ 'id': self.id,
70
+ }
71
+ # Return the dictionary
72
+ return dest
73
+
74
+ # Represent a User object as a string
75
+ def __repr__(self):
76
+ return (
77
+ f'Shakwa('
78
+ f'address={self.address}, '
79
+ f'complaintbody={self.complaintbody}, '
80
+ f'date={self.date}, '
81
+ f'governorate={self.governorate}, '
82
+ f'organization={self.organization}'
83
+ f'summary={self.summary}'
84
+ f'title={self.title}'
85
+ f'userid={self.userid}'
86
+ f'id={self.id}'
87
+ f')'
88
+ )
89
+
90
+
91
+
92
+
93
+ class Feedback(object):
94
+ def __init__(self,date, feedback, id,review,userid):
95
+ self.date = date
96
+ self.feedback = feedback
97
+ self.id=id
98
+ self.review=review
99
+ self.userid=userid
100
+
101
+
102
+
103
+
104
+ # Get the Feedback data from Firestore
105
+ def get_data(self):
106
+ # Get a document reference with the Feedback's Id
107
+ doc_ref = db.collection('feedbacks').document(self.id)
108
+ # Get the document snapshot
109
+ doc = doc_ref.get()
110
+ # Check if the document exists
111
+ if doc.exists:
112
+ # Return the document data as a Feedback object
113
+ return Feedback.from_dict(doc.to_dict())
114
+ else:
115
+ # Return None if the document does not exist
116
+ return None
117
+
118
+ # Convert a dictionary to a Feedback object
119
+ @staticmethod
120
+ def from_dict(source):
121
+ # Check if the source is a valid dictionary
122
+ if not isinstance(source, dict):
123
+ raise ValueError('Source is not a dictionary')
124
+ # Create a Feedback object with the source values
125
+ shakwa = Feedback(
126
+ source['date'],
127
+ source['feedback'],
128
+ source['id'],
129
+ source['review'],
130
+ source['userid'],
131
+ )
132
+ # Return the User object
133
+ return Feedback
134
+
135
+ # Convert a Feedback object to a dictionary
136
+ def to_dict(self):
137
+ # Create a dictionary with the Feedback's attributes
138
+ dest = {
139
+ 'date': self.date,
140
+ 'feedback': self.feedback,
141
+ 'id': self.id,
142
+ 'review': self.review,
143
+ 'userid': self.userid,
144
+
145
+ }
146
+ # Return the dictionary
147
+ return dest
148
+
149
+ # Represent a Feedback object as a string
150
+ def __repr__(self):
151
+ return (
152
+ f'Feedback('
153
+ f'date={self.date}, '
154
+ f'feedback={self.feedback}, '
155
+ f'id={self.id}, '
156
+ f'review={self.review}, '
157
+ f'userid={self.userid}'
158
+ f')'
159
+ )
Cleaning.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from nltk.stem.isri import ISRIStemmer
2
+ from pyarabic.araby import strip_tashkeel, strip_tatweel
3
+ import numpy as np
4
+ import pandas as pd
5
+ import json
6
+ import re
7
+ import time
8
+ import os
9
+ import math
10
+ import random
11
+
12
+ isristemmer = ISRIStemmer()
13
+ def stemming(txt):
14
+ return isristemmer.stem(txt)
15
+
16
+
17
+ def remove_singleCharacter(text):
18
+ text_tokenized = ar.tokenize(text)
19
+ clean_txt = ''
20
+ for word in text_tokenized:
21
+ if len(word) != 1:
22
+ clean_txt = clean_txt + word + ' '
23
+
24
+ return clean_txt[:-1]
25
+
26
+ # remove_punctuations
27
+ def remove_punctuations(text):
28
+ punc = '''()-[]{};:'"\,<>./@#$%^&*،؛_~'''
29
+ arabic_punctuations = '''`÷×؛_ـ،/:".,'~¦+|”…“–ـ=﴾﴿ ﹱ ﹹ ⸀˓• ב'''
30
+ punctuations_list = punc + arabic_punctuations
31
+ for x in punctuations_list:
32
+ text = text.replace(x, ' ')
33
+ return text
34
+
35
+
36
+ def normalize_text(txt):
37
+ txt = strip_tashkeel(txt)
38
+ txt = strip_tatweel(txt)
39
+ txt = ''.join(txt[i] for i in range(len(txt)) if i ==
40
+ 0 or txt[i-1] != txt[i]) # remove repeated characters
41
+ return txt
42
+
43
+
44
+ def remove_stopwords(txt, path="stopword.txt"):
45
+ text_tokenized = txt.split(' ')
46
+ clean_txt = ''
47
+ # useful_words=[]
48
+ # filtered_sentence=" "
49
+ arabic_stop_words_file = open(path, 'r', encoding='utf-8')
50
+ arabic_stop_words = arabic_stop_words_file.read().split('\n')
51
+ for word in text_tokenized:
52
+ if word not in arabic_stop_words:
53
+ clean_txt = clean_txt + word + ' '
54
+
55
+ return clean_txt[:-1]
56
+
57
+
58
+ def Remove_unwanted(text):
59
+ # removing the extra spacing and links
60
+
61
+ text = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE)
62
+ text = re.sub(r'^http?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE)
63
+ text = re.sub(r"http\S+", " ", text)
64
+ text = re.sub(r"https\S+", " ", text)
65
+ text = re.sub(r'\s+', ' ', text)
66
+ text = re.sub(r'[a-zA-Z]+', ' ', text)
67
+ text = re.sub(r"^\s+|\s+$", "", text)
68
+ text = re.sub(r"(\s\d+)", " ", text)
69
+ text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text)
70
+ text = re.sub(r"\d+", " ", text)
71
+ text = re.sub(r'[إأٱآا]', 'ا', text)
72
+ text = re.sub(r'ى', '[ي]', text)
73
+ text = re.sub(r'ء', '[ؤئ]', text)
74
+ text = re.sub(r' +', ' ', text)
75
+ return text
76
+
77
+
78
+ def txt_preprocess(text):
79
+ text = normalize_text(text)
80
+ text = stemming(text)
81
+ text = remove_stopwords(text)
82
+ text = remove_punctuations(text)
83
+ text = Remove_unwanted(text)
84
+ return text
Database.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from firebase_admin import credentials
2
+ from firebase_admin import firestore
3
+ import threading
4
+ from firebase_admin import db
5
+ # Import the Firebase Admin SDK
6
+ import firebase_admin
7
+ from datetime import datetime
8
+ import time
9
+ import Cleaning
10
+ from collections import Counter
11
+ def current_milli_time():
12
+ return round(time.time() * 1000)
13
+
14
+
15
+ import Server.Classes as Classes
16
+ db=firestore
17
+ # Firebase ininlaziton
18
+ cred = credentials.Certificate(
19
+ "text-to-emotions-firebase-adminsdk-8isbn-dffbdf01e8.json")
20
+ firebase_admin.initialize_app(cred)
21
+
22
+ # for doc in docs:
23
+ # print(f"{doc.id} => {doc.to_dict()}")
24
+ all_reviews = db.collection("complaints")
25
+ # Create a query against the collection
26
+ today_date = current_milli_time()
27
+ documents_to_summarize = all_reviews.where("summary","==",None).where("date",'==',today_date).get()
28
+ feedbacks = db.collection("feedbacks").where("feedbacks","==",False)
29
+
30
+
31
+ documents=[]
32
+ #get all documents for today that have no summary
33
+ def get_all_document():
34
+ for doc in documents_to_summarize:
35
+ document =Classes.Shakwa.from_dict(source=doc.to_dict())
36
+ documents.append(document)
37
+ return documents
38
+
39
+
40
+
41
+ def get_num_of_words():
42
+ for doc in documents_to_summarize:
43
+ print(len(doc.complaintbody))
44
+
45
+ def shakwa_common_words():
46
+ for doc in documents_to_summarize:
47
+ words_in_docs=" ".join(doc)
48
+ words_in_docs = Cleaning.txt_preprocess(words_in_docs)
49
+ most_common_words=Counter(words_in_docs)
50
+ return dict(most_common_words)
51
+
52
+ def feedback_common_words():
53
+ for feedback in feedbacks:
54
+ words_in_feedback=" ".join(feedback)
55
+ words_in_feedback = Cleaning.txt_preprocess(words_in_feedback)
56
+ most_common_words=Counter(words_in_feedback)
57
+ return dict(most_common_words)
58
+
59
+ def get_most_common_places():
60
+ dic_place_count={}
61
+ governorates=all_reviews.governorate
62
+ for gov in governorates:
63
+ if gov not in dic_place_count.keys():
64
+ dic_place_count[gov]=0
65
+ else :
66
+ dic_place_count[gov]+=1
67
+ return dic_place_count
68
+
main.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI,status,HTTPException
2
+ from .models import lr
3
+ import schemas
4
+ import Database
5
+ import models
6
+ import Cleaning
7
+
8
+
9
+
10
+ app=FastAPI()
11
+
12
+ @app.get('/')
13
+ def index():
14
+ return "This is the defult page"
15
+
16
+ @app.get('/reviews',status_code=status.HTTP_200_OK)
17
+ def get_reviews():
18
+ return "Reviews"
19
+
20
+ #Porcessing Shakwa
21
+ @app.post('/predict_summary')
22
+ async def get_reviews(request : schemas.shakwa):
23
+ if 'text' in request:
24
+ return models.modelsummary(request.complaintbody)
25
+ else:
26
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing text value")
27
+
28
+ #Porcessing Feedback
29
+ @app.post('/predict_sentiment')
30
+ def get_reviews(request : schemas.feedback):
31
+ if 'text' in request:
32
+ return models.modelpredict(request.text)
33
+ else:
34
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing mathod value")
35
+
36
+
37
+ #Analysis
38
+ @app.get('/CommonWords')
39
+ def get_reviews():
40
+ data={"Most Common Words":{"انهردا":11,"بكرا":12}}
41
+ return data
42
+
43
+ @app.get('/CommonPlaces')
44
+ def get_reviews():
45
+ data={"Most Common Words":{"مدينة نصر":11,"الدقي":12}}
46
+ return data
47
+
models.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.preprocessing.text import Tokenizer
2
+ from tensorflow.keras.utils import to_categorical
3
+ from tensorflow.keras.models import Sequential, load_model
4
+ from sklearn.model_selection import train_test_split
5
+ from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline
6
+ from arabert.preprocess import ArabertPreprocessor
7
+ from huggingface_hub import from_pretrained_keras
8
+ from collections import Counter
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
+ from .Cleaning import *
11
+ import threading
12
+
13
+ # Model summury
14
+ model_name="abdalrahmanshahrour/auto-arabic-summarization"
15
+ preprocessor = ArabertPreprocessor(model_name="")
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ modelsummary =AutoModelForSeq2SeqLM.from_pretrained(model_name)
19
+ pipeline1 = pipeline("text2text-generation",model=modelsummary,tokenizer=tokenizer)
20
+
21
+ model_sentiment = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-76jP')
22
+
23
+
24
+ #summary model
25
+ def modelsummary(data):
26
+ result = pipeline1(data,
27
+ pad_token_id= tokenizer.eos_token_id,
28
+ num_beams=4,
29
+ repetition_penalty=3.0,
30
+ max_length=600,
31
+ length_penalty=.50,
32
+ no_repeat_ngram_size = 3)[0]['generated_text']
33
+ result = remove_punctuations(result)
34
+ return { 'summary':result}
35
+
36
+
37
+ #Sentiment model
38
+ def modelpredict(data):
39
+ data = txt_preprocess(data)
40
+ pred = model_sentiment.predict(pd.Series([data]))
41
+ return {'anger': float(pred[0][0]), 'sadness': float(pred[0][1]), 'joy': float(pred[0][2]), 'surprise': float(pred[0][3]),
42
+ 'love': float(pred[0][4]), 'sympathy': float(pred[0][5]), 'fear': float(pred[0][6])}
43
+
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ requests
2
+ keras
3
+ tensorflow
4
+ farasapy
5
+ Arabic-Stopwords
6
+ swifter
7
+ pyarabic
8
+ lime
9
+ huggingface_hub
10
+ nltk
11
+ transformers
12
+ arabert
13
+ torch==1.13.1
14
+ fastapi==0.74.*
15
+ requests==2.27.*
16
+ sentencepiece==0.1.*
17
+ firebase-admin
18
+ uvicorn[standard]==0.17.*
schemas.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class shakwa(BaseModel):
5
+ address:str
6
+ complaintbody:str
7
+ date:int
8
+ governorate:str
9
+ doc_id:str
10
+ organization:str
11
+ summary:str
12
+ title:str
13
+ user_id:str
14
+
15
+
16
+ class feedback(BaseModel):
17
+ Date:str
18
+ Uid:int
19
+ text:str
test.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import schemas
3
+
4
+ Feedback=schemas.feedback(Date="Mahmoud",Uid=12,text="Nasser")
5
+
6
+ print(Feedback)
7
+
8
+ response=requests.post(url="http://localhost:8000/req",data=Feedback)
9
+ print(response.json())