Spaces:

Na2seenWahedd
/

EmotionDetectionOrganized

Sleeping

App Files Files Community

MahmoudNasser commited on Jun 29, 2023

Commit

0901162

1 Parent(s): 87943e2

Upload 8 files

Browse files

Files changed (8) hide show

Classes.py +159 -0
Cleaning.py +84 -0
Database.py +68 -0
main.py +47 -0
models.py +43 -0
requirements.txt +18 -0
schemas.py +19 -0
test.py +9 -0

Classes.py ADDED Viewed

	@@ -0,0 +1,159 @@

+from firebase_admin import credentials
+from firebase_admin import firestore
+import threading
+from firebase_admin import db
+# Import the Firebase Admin SDK
+import firebase_admin
+db = firestore.client()
+class Shakwa(object):
+    def __init__(self, address, complaintbody, date, governorate, id,organization,summary,title,userid):
+        self.address = address
+        self.complaintbody = complaintbody
+        self.date = date
+        self.governorate = governorate
+        self.id = id
+        self.organization = organization
+        self.summary = summary
+        self.title = title
+        self.userid = userid
+    # Get the user data from Firestore
+    def get_data(self):
+        # Get a document reference with the user's email as the ID
+        doc_ref = db.collection('complaints').document(self.id)
+        # Get the document snapshot
+        doc = doc_ref.get()
+        # Check if the document exists
+        if doc.exists:
+            # Return the document data as a User object
+            return Shakwa.from_dict(doc.to_dict())
+        else:
+            # Return None if the document does not exist
+            return None
+    # Convert a dictionary to a User object
+    @staticmethod
+    def from_dict(source):
+        # Check if the source is a valid dictionary
+        if not isinstance(source, dict):
+            raise ValueError('Source is not a dictionary')
+        # Create a User object with the source values
+        shakwa = Shakwa(
+            source['address'],
+            source['complaintbody'],
+            source['date'],
+            source['governorate'],
+            source['organization'],
+            source['summary'],
+            source['title'],
+            source['userid'],
+            source['id']
+        )
+        # Return the User object
+        return shakwa
+    # Convert a User object to a dictionary
+    def to_dict(self):
+        # Create a dictionary with the user's attributes
+        dest = {
+            'address': self.address,
+            'complaintbody': self.complaintbody,
+            'date': self.date,
+            'governorate': self.governorate,
+            'organization':self.organization,
+            'summary': self.summary,
+            'title': self.title,
+            'userid': self.userid,
+            'id': self.id,
+        }
+        # Return the dictionary
+        return dest
+    # Represent a User object as a string
+    def __repr__(self):
+        return (
+            f'Shakwa('
+            f'address={self.address}, '
+            f'complaintbody={self.complaintbody}, '
+            f'date={self.date}, '
+            f'governorate={self.governorate}, '
+            f'organization={self.organization}'
+            f'summary={self.summary}'
+            f'title={self.title}'
+            f'userid={self.userid}'
+            f'id={self.id}'
+            f')'
+        )
+class Feedback(object):
+    def __init__(self,date, feedback, id,review,userid):
+        self.date = date
+        self.feedback = feedback
+        self.id=id
+        self.review=review
+        self.userid=userid
+    # Get the Feedback data from Firestore
+    def get_data(self):
+        # Get a document reference with the Feedback's Id
+        doc_ref = db.collection('feedbacks').document(self.id)
+        # Get the document snapshot
+        doc = doc_ref.get()
+        # Check if the document exists
+        if doc.exists:
+            # Return the document data as a Feedback object
+            return Feedback.from_dict(doc.to_dict())
+        else:
+            # Return None if the document does not exist
+            return None
+    # Convert a dictionary to a Feedback object
+    @staticmethod
+    def from_dict(source):
+        # Check if the source is a valid dictionary
+        if not isinstance(source, dict):
+            raise ValueError('Source is not a dictionary')
+        # Create a Feedback object with the source values
+        shakwa = Feedback(
+            source['date'],
+            source['feedback'],
+            source['id'],
+            source['review'],
+            source['userid'],
+        )
+        # Return the User object
+        return Feedback
+    # Convert a Feedback object to a dictionary
+    def to_dict(self):
+        # Create a dictionary with the Feedback's attributes
+        dest = {
+            'date': self.date,
+            'feedback': self.feedback,
+            'id': self.id,
+            'review': self.review,
+            'userid': self.userid,
+        }
+        # Return the dictionary
+        return dest
+    # Represent a Feedback object as a string
+    def __repr__(self):
+        return (
+            f'Feedback('
+            f'date={self.date}, '
+            f'feedback={self.feedback}, '
+            f'id={self.id}, '
+            f'review={self.review}, '
+            f'userid={self.userid}'
+            f')'
+        )

Cleaning.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from nltk.stem.isri import ISRIStemmer
+from pyarabic.araby import strip_tashkeel, strip_tatweel
+import numpy as np
+import pandas as pd
+import json
+import re
+import time
+import os
+import math
+import random
+isristemmer = ISRIStemmer()
+def stemming(txt):
+    return isristemmer.stem(txt)
+def remove_singleCharacter(text):
+    text_tokenized = ar.tokenize(text)
+    clean_txt = ''
+    for word in text_tokenized:
+        if len(word) != 1:
+            clean_txt = clean_txt + word + ' '
+    return clean_txt[:-1]
+# remove_punctuations
+def remove_punctuations(text):
+    punc = '''()-[]{};:'"\,<>./@#$%^&*،؛_~'''
+    arabic_punctuations = '''`÷×؛_ـ،/:".,'~¦+|”…“–ـ=﴾﴿ ﹱ ﹹ ⸀˓• ב'''
+    punctuations_list = punc + arabic_punctuations
+    for x in punctuations_list:
+        text = text.replace(x, ' ')
+    return text
+def normalize_text(txt):
+    txt = strip_tashkeel(txt)
+    txt = strip_tatweel(txt)
+    txt = ''.join(txt[i] for i in range(len(txt)) if i ==
+                  0 or txt[i-1] != txt[i])  # remove repeated characters
+    return txt
+def remove_stopwords(txt, path="stopword.txt"):
+    text_tokenized = txt.split(' ')
+    clean_txt = ''
+#   useful_words=[]
+#   filtered_sentence=" "
+    arabic_stop_words_file = open(path, 'r', encoding='utf-8')
+    arabic_stop_words = arabic_stop_words_file.read().split('\n')
+    for word in text_tokenized:
+        if word not in arabic_stop_words:
+            clean_txt = clean_txt + word + ' '
+    return clean_txt[:-1]
+def Remove_unwanted(text):
+    # removing the extra spacing and links
+    text = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE)
+    text = re.sub(r'^http?:\/\/.*[\r\n]*', ' ', text, flags=re.MULTILINE)
+    text = re.sub(r"http\S+", " ", text)
+    text = re.sub(r"https\S+", " ", text)
+    text = re.sub(r'\s+', ' ', text)
+    text = re.sub(r'[a-zA-Z]+', ' ', text)
+    text = re.sub(r"^\s+|\s+$", "", text)
+    text = re.sub(r"(\s\d+)", " ", text)
+    text = re.sub(r"$\d+\W+|\b\d+\b|\W+\d+$", " ", text)
+    text = re.sub(r"\d+", " ", text)
+    text = re.sub(r'[إأٱآا]', 'ا', text)
+    text = re.sub(r'ى', '[ي]', text)
+    text = re.sub(r'ء', '[ؤئ]', text)
+    text = re.sub(r' +', ' ', text)
+    return text
+def txt_preprocess(text):
+    text = normalize_text(text)
+    text = stemming(text)
+    text = remove_stopwords(text)
+    text = remove_punctuations(text)
+    text = Remove_unwanted(text)
+    return text

Database.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from firebase_admin import credentials
+from firebase_admin import firestore
+import threading
+from firebase_admin import db
+# Import the Firebase Admin SDK
+import firebase_admin
+from datetime import datetime
+import time
+import Cleaning
+from collections import Counter
+def current_milli_time():
+    return round(time.time() * 1000)
+import Server.Classes as Classes
+db=firestore
+# Firebase ininlaziton
+cred = credentials.Certificate(
+    "text-to-emotions-firebase-adminsdk-8isbn-dffbdf01e8.json")
+firebase_admin.initialize_app(cred)
+# for doc in docs:
+#     print(f"{doc.id} => {doc.to_dict()}")
+all_reviews = db.collection("complaints")
+# Create a query against the collection
+today_date = current_milli_time()
+documents_to_summarize = all_reviews.where("summary","==",None).where("date",'==',today_date).get()
+feedbacks = db.collection("feedbacks").where("feedbacks","==",False)
+documents=[]
+#get all documents for today that have no summary
+def get_all_document():
+    for doc in documents_to_summarize:
+        document =Classes.Shakwa.from_dict(source=doc.to_dict())
+        documents.append(document)
+    return documents
+def get_num_of_words():
+    for doc in documents_to_summarize:
+        print(len(doc.complaintbody))
+def shakwa_common_words():
+    for doc in documents_to_summarize:
+        words_in_docs=" ".join(doc)
+        words_in_docs = Cleaning.txt_preprocess(words_in_docs)
+    most_common_words=Counter(words_in_docs)
+    return dict(most_common_words)
+def feedback_common_words():
+    for feedback in feedbacks:
+        words_in_feedback=" ".join(feedback)
+        words_in_feedback = Cleaning.txt_preprocess(words_in_feedback)
+    most_common_words=Counter(words_in_feedback)
+    return dict(most_common_words)
+def get_most_common_places():
+    dic_place_count={}
+    governorates=all_reviews.governorate
+    for gov in governorates:
+        if gov not in dic_place_count.keys():
+            dic_place_count[gov]=0
+        else :
+            dic_place_count[gov]+=1
+    return dic_place_count

main.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from fastapi import FastAPI,status,HTTPException
+from .models import lr
+import schemas
+import Database
+import models
+import Cleaning
+app=FastAPI()
+@app.get('/')
+def index():
+    return "This is the defult page"
+@app.get('/reviews',status_code=status.HTTP_200_OK)
+def get_reviews():
+    return "Reviews"
+#Porcessing Shakwa
+@app.post('/predict_summary')
+async def get_reviews(request : schemas.shakwa):
+    if 'text' in request:
+        return models.modelsummary(request.complaintbody)
+    else:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing text value")
+#Porcessing Feedback
+@app.post('/predict_sentiment')
+def get_reviews(request : schemas.feedback):
+    if 'text' in request:
+        return models.modelpredict(request.text)
+    else:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Missing mathod value")
+#Analysis
+@app.get('/CommonWords')
+def get_reviews():
+    data={"Most Common Words":{"انهردا":11,"بكرا":12}}
+    return data
+@app.get('/CommonPlaces')
+def get_reviews():
+    data={"Most Common Words":{"مدينة نصر":11,"الدقي":12}}
+    return data

models.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from keras.preprocessing.text import Tokenizer
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.models import Sequential, load_model
+from sklearn.model_selection import train_test_split
+from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline
+from arabert.preprocess import ArabertPreprocessor
+from huggingface_hub import from_pretrained_keras
+from collections import Counter
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from .Cleaning import *
+import threading
+# Model summury
+model_name="abdalrahmanshahrour/auto-arabic-summarization"
+preprocessor = ArabertPreprocessor(model_name="")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+modelsummary =AutoModelForSeq2SeqLM.from_pretrained(model_name)
+pipeline1 = pipeline("text2text-generation",model=modelsummary,tokenizer=tokenizer)
+model_sentiment = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-76jP')
+#summary model
+def modelsummary(data):
+    result = pipeline1(data,
+    pad_token_id= tokenizer.eos_token_id,
+    num_beams=4,
+    repetition_penalty=3.0,
+    max_length=600,
+    length_penalty=.50,
+    no_repeat_ngram_size = 3)[0]['generated_text']
+    result = remove_punctuations(result)
+    return { 'summary':result}
+#Sentiment model
+def modelpredict(data):
+    data = txt_preprocess(data)
+    pred = model_sentiment.predict(pd.Series([data]))
+    return {'anger': float(pred[0][0]), 'sadness': float(pred[0][1]), 'joy': float(pred[0][2]), 'surprise': float(pred[0][3]),
+            'love': float(pred[0][4]), 'sympathy': float(pred[0][5]), 'fear': float(pred[0][6])}

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+requests
+keras
+tensorflow
+farasapy
+Arabic-Stopwords
+swifter
+pyarabic
+lime
+huggingface_hub
+nltk
+transformers
+arabert
+torch==1.13.1
+fastapi==0.74.*
+requests==2.27.*
+sentencepiece==0.1.*
+firebase-admin
+uvicorn[standard]==0.17.*

schemas.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pydantic import BaseModel
+class shakwa(BaseModel):
+    address:str
+    complaintbody:str
+    date:int
+    governorate:str
+    doc_id:str
+    organization:str
+    summary:str
+    title:str
+    user_id:str
+class feedback(BaseModel):
+    Date:str
+    Uid:int
+    text:str

test.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import requests
+import schemas
+Feedback=schemas.feedback(Date="Mahmoud",Uid=12,text="Nasser")
+print(Feedback)
+response=requests.post(url="http://localhost:8000/req",data=Feedback)
+print(response.json())