Spaces:
Sleeping
Sleeping
File size: 2,474 Bytes
37430cf b2c3642 6ee5d7e 37430cf 6ee5d7e 8c60672 37430cf 6ee5d7e 37430cf 8c60672 37430cf 8c60672 37430cf 6ee5d7e 8c60672 37430cf 8c60672 6ee5d7e 37430cf 8c60672 6ee5d7e 8c60672 6ee5d7e 8c60672 6ee5d7e 8c60672 6ee5d7e 8c60672 6ee5d7e 37430cf 8c60672 37430cf 6ee5d7e 37430cf 6ee5d7e 8c60672 37430cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
from collections import Counter
from transformers import pipeline
from keybert import KeyBERT
from src.utils import extract_keywords_keybert, analyze_sentiment
from src.summarization import summarize_overall_sentiment
def comparison_analysis(articles):
"""
Compares articles based on sentiment and topics, providing a final sentiment summary.
Args:
articles (list[dict]): A list of articles, each containing a "summary" key.
Returns:
dict: A dictionary containing sentiment analysis, topic overlap, and final sentiment summary.
"""
if len(articles) < 10:
return {"error": "Not enough articles for a full comparison."}
# Extract keywords from all articles
article_keywords = [extract_keywords_keybert(article["summary"]) for article in articles]
# Count occurrences of each keyword
all_keywords = [kw for sublist in article_keywords for kw in sublist]
keyword_counts = Counter(all_keywords)
# Identify common and unique topics
common_topics = [kw for kw, count in keyword_counts.items() if count >= 3] # Common if in ≥3 articles
unique_topics_per_article = [
{"Article": i + 1, "Unique Topics": list(set(article_keywords[i]) - set(common_topics))}
for i in range(len(articles))
]
# Perform sentiment analysis
sentiments = [analyze_sentiment(article["summary"]) for article in articles]
sentiment_counts = Counter(sentiments)
# Format sentiment counts for readability
formatted_counts = {sent.capitalize(): count for sent, count in sentiment_counts.items()}
# Determine overall sentiment
overall_sentiment = max(sentiment_counts, key=sentiment_counts.get, default="Neutral").capitalize()
sentiment_summary = (
f"Overall sentiment is {overall_sentiment} "
f"({formatted_counts.get('Negative', 0)} Negative, {formatted_counts.get('Positive', 0)} Positive)."
)
# Generate LLM-based sentiment summary
overall_summary = summarize_overall_sentiment(articles)
# Return the final comparative analysis
return {
"Sentiment Analysis": {
"Sentiment Distribution": formatted_counts,
"Final Sentiment Summary": sentiment_summary
},
"Topic Overlap": {
"Common Topics": common_topics,
"Unique Topics Per Article": unique_topics_per_article
},
"Final Sentiment Analysis": overall_summary # LLM-generated summary
} |