Vijayendra
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -64,3 +64,66 @@ summary_ids = model.generate(inputs, max_length=150, min_length=100, length_pena
|
|
64 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
65 |
print("Summary:")
|
66 |
print(summary)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
65 |
print("Summary:")
|
66 |
print(summary)
|
67 |
+
|
68 |
+
#Compare with some other models
|
69 |
+
|
70 |
+
from transformers import T5ForConditionalGeneration, T5Tokenizer, PegasusTokenizer, PegasusForConditionalGeneration, BartForConditionalGeneration, BartTokenizer
|
71 |
+
|
72 |
+
# Function to summarize with any model
|
73 |
+
def summarize_article(article, model, tokenizer):
|
74 |
+
inputs = tokenizer.encode("summarize: " + article, return_tensors="pt", max_length=512, truncation=True)
|
75 |
+
summary_ids = model.generate(inputs, max_length=150, min_length=100, length_penalty=2.0, num_beams=4, early_stopping=True)
|
76 |
+
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
77 |
+
return summary
|
78 |
+
|
79 |
+
# Load your fine-tuned T5 model and tokenizer (your custom model)
|
80 |
+
t5_model_custom = T5ForConditionalGeneration.from_pretrained("Vijayendra/T5-Base-Sum")
|
81 |
+
t5_tokenizer_custom = T5Tokenizer.from_pretrained("Vijayendra/T5-Base-Sum")
|
82 |
+
|
83 |
+
# Load a different pretrained T5 model for summarization (e.g., "t5-small" fine-tuned on CNN/DailyMail)
|
84 |
+
t5_model_pretrained = T5ForConditionalGeneration.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
|
85 |
+
t5_tokenizer_pretrained = T5Tokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
|
86 |
+
|
87 |
+
# Load Pegasus model and tokenizer
|
88 |
+
pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
|
89 |
+
pegasus_tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
|
90 |
+
|
91 |
+
# Load BART model and tokenizer
|
92 |
+
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
|
93 |
+
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
|
94 |
+
|
95 |
+
# Example article for summarization
|
96 |
+
article = """
|
97 |
+
Videos that say approved vaccines are dangerous and cause autism, cancer or infertility are among those that will be taken down, the company
|
98 |
+
said. The policy includes the termination of accounts of anti-vaccine influencers. Tech giants have been criticised for not doing more to
|
99 |
+
counter false health information on their sites. In July, US PresidentJoe Biden said social media platforms were largely responsible for
|
100 |
+
people's scepticism in getting vaccinated by spreading misinformation, and appealed for them to address the issue. YouTube, which is owned
|
101 |
+
by Google, said 130,000 videos were removed from its platform since last year, when it implemented a ban on content spreading misinformation
|
102 |
+
about Covid vaccines. In a blog post, the company said it had seen false claims about Covid jabs "spill over into misinformation about
|
103 |
+
vaccines in general". The new policy covers long-approved vaccines, such as those against measles or hepatitis B."We're expanding our medical
|
104 |
+
misinformation policies on YouTube with new guidelines on currently administered vaccines that are approved and confirmed to be safe and
|
105 |
+
effective by local health authorities and the WHO," the post said, referring to the World Health Organization.
|
106 |
+
"""
|
107 |
+
|
108 |
+
# Summarize with your fine-tuned T5 model
|
109 |
+
t5_summary_custom = summarize_article(article, t5_model_custom, t5_tokenizer_custom)
|
110 |
+
|
111 |
+
# Summarize with the pretrained T5 model for summarization
|
112 |
+
t5_summary_pretrained = summarize_article(article, t5_model_pretrained, t5_tokenizer_pretrained)
|
113 |
+
|
114 |
+
# Summarize with Pegasus model
|
115 |
+
pegasus_summary = summarize_article(article, pegasus_model, pegasus_tokenizer)
|
116 |
+
|
117 |
+
# Summarize with BART model
|
118 |
+
bart_summary = summarize_article(article, bart_model, bart_tokenizer)
|
119 |
+
|
120 |
+
# Print summaries for comparison
|
121 |
+
print("T5 base with Cyclic Attention Summary:")
|
122 |
+
print(t5_summary_custom)
|
123 |
+
print("\nPretrained mT5_multilingual_XLSum Summary:")
|
124 |
+
print(t5_summary_pretrained)
|
125 |
+
print("\nPegasus Xsum Summary:")
|
126 |
+
print(pegasus_summary)
|
127 |
+
print("\nBART Large CNN Summary:")
|
128 |
+
print(bart_summary)
|
129 |
+
|