Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import GPT2TokenizerFast, AutoModelForCausalLM | |
from arabert.preprocess import ArabertPreprocessor | |
# Load model and tokenizer and the model | |
model_name = "malmarjeh/gpt2" | |
tokenizer = GPT2TokenizerFast.from_pretrained("aubmindlab/aragpt2-base") | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
preprocessor = ArabertPreprocessor(model_name=model_name) | |
# Streamlit UI | |
st.title('Arabic Text Summarizer | By M.Araby') | |
text = st.text_area("Paste your Arabic text here:") | |
if st.button('Summarize'): | |
if text: | |
# Preprocess and tokenize input text | |
processed_text = preprocessor.preprocess(text) | |
formatted_text = '\n النص: ' + processed_text + ' \n الملخص: \n ' | |
tokenizer.add_special_tokens({'pad_token': '<pad>'}) | |
tokens = tokenizer.batch_encode_plus([formatted_text], return_tensors='pt', padding='max_length', | |
max_length=150) | |
# Generate summary | |
output = model.generate( | |
input_ids=tokens['input_ids'], | |
repetition_penalty=2.0, | |
num_beams=5, | |
max_length=600, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
bos_token_id=tokenizer.bos_token_id, | |
) | |
# Decode and display the summarized text | |
result = tokenizer.decode(output[0][150:], skip_special_tokens=True).strip() | |
st.subheader("Original Text Input") | |
st.write(text) | |
st.subheader("Summarized Text Idea") | |
st.write(result) | |
else: | |
st.warning("Please enter Arabic text to summarize.") | |