|
from keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras.utils import to_categorical |
|
from tensorflow.keras.models import Sequential, load_model |
|
from sklearn.model_selection import train_test_split |
|
from transformers import BertTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
from arabert.preprocess import ArabertPreprocessor |
|
from huggingface_hub import from_pretrained_keras |
|
from collections import Counter |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import Cleaning |
|
import threading |
|
import pandas as pd |
|
import numpy as np |
|
|
|
|
|
model_name="abdalrahmanshahrour/auto-arabic-summarization" |
|
preprocessor = ArabertPreprocessor(model_name="") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
modelsummary =AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
pipeline1 = pipeline("text2text-generation",model=modelsummary,tokenizer=tokenizer) |
|
|
|
model_sentiment = from_pretrained_keras('MahmoudNasser/GRU-MODEL-EMOTION-AR-TEXT-76jP') |
|
|
|
|
|
|
|
def modelsummary(data): |
|
result = pipeline1(data, |
|
pad_token_id= tokenizer.eos_token_id, |
|
num_beams=4, |
|
repetition_penalty=3.0, |
|
max_length=600, |
|
length_penalty=.50, |
|
no_repeat_ngram_size = 3)[0]['generated_text'] |
|
result = remove_punctuations(result) |
|
return { 'summary':result} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def modelpredict(data): |
|
map = {0:'anger', 1:'sadness', 2:'joy', 3:'surprise', 4:'love', 5:'sympathy', 6:'fear'} |
|
data = Cleaning.txt_preprocess(data) |
|
pred = model_sentiment.predict(pd.Series([data])) |
|
return {"label":map[np.argmax(pred,axis=-1)[0]]} |
|
|
|
|
|
|
|
|