Vijayendra commited on
Commit
9330a0d
·
verified ·
1 Parent(s): 3e5c877

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +63 -0
README.md CHANGED
@@ -64,3 +64,66 @@ summary_ids = model.generate(inputs, max_length=150, min_length=100, length_pena
64
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
65
  print("Summary:")
66
  print(summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
65
  print("Summary:")
66
  print(summary)
67
+
68
+ #Compare with some other models
69
+
70
+ from transformers import T5ForConditionalGeneration, T5Tokenizer, PegasusTokenizer, PegasusForConditionalGeneration, BartForConditionalGeneration, BartTokenizer
71
+
72
+ # Function to summarize with any model
73
+ def summarize_article(article, model, tokenizer):
74
+ inputs = tokenizer.encode("summarize: " + article, return_tensors="pt", max_length=512, truncation=True)
75
+ summary_ids = model.generate(inputs, max_length=150, min_length=100, length_penalty=2.0, num_beams=4, early_stopping=True)
76
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
77
+ return summary
78
+
79
+ # Load your fine-tuned T5 model and tokenizer (your custom model)
80
+ t5_model_custom = T5ForConditionalGeneration.from_pretrained("Vijayendra/T5-Base-Sum")
81
+ t5_tokenizer_custom = T5Tokenizer.from_pretrained("Vijayendra/T5-Base-Sum")
82
+
83
+ # Load a different pretrained T5 model for summarization (e.g., "t5-small" fine-tuned on CNN/DailyMail)
84
+ t5_model_pretrained = T5ForConditionalGeneration.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
85
+ t5_tokenizer_pretrained = T5Tokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
86
+
87
+ # Load Pegasus model and tokenizer
88
+ pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
89
+ pegasus_tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
90
+
91
+ # Load BART model and tokenizer
92
+ bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
93
+ bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
94
+
95
+ # Example article for summarization
96
+ article = """
97
+ Videos that say approved vaccines are dangerous and cause autism, cancer or infertility are among those that will be taken down, the company
98
+ said. The policy includes the termination of accounts of anti-vaccine influencers. Tech giants have been criticised for not doing more to
99
+ counter false health information on their sites. In July, US PresidentJoe Biden said social media platforms were largely responsible for
100
+ people's scepticism in getting vaccinated by spreading misinformation, and appealed for them to address the issue. YouTube, which is owned
101
+ by Google, said 130,000 videos were removed from its platform since last year, when it implemented a ban on content spreading misinformation
102
+ about Covid vaccines. In a blog post, the company said it had seen false claims about Covid jabs "spill over into misinformation about
103
+ vaccines in general". The new policy covers long-approved vaccines, such as those against measles or hepatitis B."We're expanding our medical
104
+ misinformation policies on YouTube with new guidelines on currently administered vaccines that are approved and confirmed to be safe and
105
+ effective by local health authorities and the WHO," the post said, referring to the World Health Organization.
106
+ """
107
+
108
+ # Summarize with your fine-tuned T5 model
109
+ t5_summary_custom = summarize_article(article, t5_model_custom, t5_tokenizer_custom)
110
+
111
+ # Summarize with the pretrained T5 model for summarization
112
+ t5_summary_pretrained = summarize_article(article, t5_model_pretrained, t5_tokenizer_pretrained)
113
+
114
+ # Summarize with Pegasus model
115
+ pegasus_summary = summarize_article(article, pegasus_model, pegasus_tokenizer)
116
+
117
+ # Summarize with BART model
118
+ bart_summary = summarize_article(article, bart_model, bart_tokenizer)
119
+
120
+ # Print summaries for comparison
121
+ print("T5 base with Cyclic Attention Summary:")
122
+ print(t5_summary_custom)
123
+ print("\nPretrained mT5_multilingual_XLSum Summary:")
124
+ print(t5_summary_pretrained)
125
+ print("\nPegasus Xsum Summary:")
126
+ print(pegasus_summary)
127
+ print("\nBART Large CNN Summary:")
128
+ print(bart_summary)
129
+