File size: 2,474 Bytes
37430cf
 
 
b2c3642
6ee5d7e
37430cf
6ee5d7e
8c60672
 
 
 
 
 
 
 
 
37430cf
6ee5d7e
 
37430cf
8c60672
37430cf
 
8c60672
37430cf
 
6ee5d7e
8c60672
 
37430cf
8c60672
6ee5d7e
37430cf
 
8c60672
6ee5d7e
 
8c60672
 
 
6ee5d7e
8c60672
6ee5d7e
8c60672
 
 
 
6ee5d7e
8c60672
6ee5d7e
37430cf
8c60672
37430cf
6ee5d7e
 
 
 
37430cf
 
 
6ee5d7e
8c60672
37430cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from collections import Counter
from transformers import pipeline
from keybert import KeyBERT
from src.utils import extract_keywords_keybert, analyze_sentiment
from src.summarization import summarize_overall_sentiment

def comparison_analysis(articles):
    """
    Compares articles based on sentiment and topics, providing a final sentiment summary.

    Args:
        articles (list[dict]): A list of articles, each containing a "summary" key.

    Returns:
        dict: A dictionary containing sentiment analysis, topic overlap, and final sentiment summary.
    """

    if len(articles) < 10:
        return {"error": "Not enough articles for a full comparison."}

    # Extract keywords from all articles
    article_keywords = [extract_keywords_keybert(article["summary"]) for article in articles]

    # Count occurrences of each keyword
    all_keywords = [kw for sublist in article_keywords for kw in sublist]
    keyword_counts = Counter(all_keywords)

    # Identify common and unique topics
    common_topics = [kw for kw, count in keyword_counts.items() if count >= 3]  # Common if in ≥3 articles
    unique_topics_per_article = [
        {"Article": i + 1, "Unique Topics": list(set(article_keywords[i]) - set(common_topics))}
        for i in range(len(articles))
    ]

    # Perform sentiment analysis
    sentiments = [analyze_sentiment(article["summary"]) for article in articles]
    sentiment_counts = Counter(sentiments)
    
    # Format sentiment counts for readability
    formatted_counts = {sent.capitalize(): count for sent, count in sentiment_counts.items()}

    # Determine overall sentiment
    overall_sentiment = max(sentiment_counts, key=sentiment_counts.get, default="Neutral").capitalize()
    sentiment_summary = (
        f"Overall sentiment is {overall_sentiment} "
        f"({formatted_counts.get('Negative', 0)} Negative, {formatted_counts.get('Positive', 0)} Positive)."
    )

    # Generate LLM-based sentiment summary
    overall_summary = summarize_overall_sentiment(articles)

    # Return the final comparative analysis
    return {
        "Sentiment Analysis": {
            "Sentiment Distribution": formatted_counts,
            "Final Sentiment Summary": sentiment_summary
        },
        "Topic Overlap": {
            "Common Topics": common_topics,
            "Unique Topics Per Article": unique_topics_per_article
        },
        "Final Sentiment Analysis": overall_summary  # LLM-generated summary
    }