|
from keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras.utils import to_categorical |
|
from tensorflow.keras.models import Sequential, load_model |
|
from sklearn.model_selection import train_test_split |
|
from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
from arabert.preprocess import ArabertPreprocessor |
|
from huggingface_hub import from_pretrained_keras |
|
from collections import Counter |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from .Cleaning import * |
|
import threading |
|
|
|
|
|
model_name="abdalrahmanshahrour/auto-arabic-summarization" |
|
preprocessor = ArabertPreprocessor(model_name="") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
modelsummary =AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
pipeline1 = pipeline("text2text-generation",model=modelsummary,tokenizer=tokenizer) |
|
|
|
model_sentiment = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-76jP') |
|
|
|
|
|
|
|
def modelsummary(data): |
|
result = pipeline1(data, |
|
pad_token_id= tokenizer.eos_token_id, |
|
num_beams=4, |
|
repetition_penalty=3.0, |
|
max_length=600, |
|
length_penalty=.50, |
|
no_repeat_ngram_size = 3)[0]['generated_text'] |
|
result = remove_punctuations(result) |
|
return { 'summary':result} |
|
|
|
|
|
|
|
def modelpredict(data): |
|
data = txt_preprocess(data) |
|
pred = model_sentiment.predict(pd.Series([data])) |
|
return {'anger': float(pred[0][0]), 'sadness': float(pred[0][1]), 'joy': float(pred[0][2]), 'surprise': float(pred[0][3]), |
|
'love': float(pred[0][4]), 'sympathy': float(pred[0][5]), 'fear': float(pred[0][6])} |
|
|
|
|