carlosdimare commited on
Commit
31c2313
verified
1 Parent(s): bf9e8b0

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +169 -0
  2. gitattributes +35 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import feedparser
4
+ from datetime import datetime, timedelta
5
+ import pytz
6
+ from bs4 import BeautifulSoup
7
+ import hashlib
8
+ import threading
9
+ import pandas as pd
10
+
11
+ # Global settings
12
+ SUMMARIZER_MODELS = {
13
+ "Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn",
14
+ "Free Model (distilbart-cnn-6-6)": "sshleifer/distilbart-cnn-6-6"
15
+ }
16
+ CACHE_SIZE = 500
17
+ RSS_FETCH_INTERVAL = timedelta(hours=8)
18
+ ARTICLE_LIMIT = 5
19
+
20
+ NEWS_SOURCES = {
21
+ "Movilizaciones Sindicales": {
22
+
23
+ "Pagina12": "https://www.pagina12.com.ar/rss/edicion-impresa",
24
+
25
+ }
26
+ }
27
+
28
+ class NewsCache:
29
+ def __init__(self, size):
30
+ self.cache = {}
31
+ self.size = size
32
+ self.lock = threading.Lock()
33
+
34
+ def get(self, key):
35
+ with self.lock:
36
+ return self.cache.get(key)
37
+
38
+ def set(self, key, value):
39
+ with self.lock:
40
+ if len(self.cache) >= self.size:
41
+ oldest_key = next(iter(self.cache))
42
+ del self.cache[oldest_key]
43
+ self.cache[key] = value
44
+
45
+ cache = NewsCache(CACHE_SIZE)
46
+
47
+ def fetch_rss_news(categories):
48
+ articles = []
49
+ cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
50
+ for category in categories:
51
+ for source, url in NEWS_SOURCES.get(category, {}).items():
52
+ try:
53
+ feed = feedparser.parse(url)
54
+ for entry in feed.entries:
55
+ published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
56
+ if published > cutoff_time:
57
+ articles.append({
58
+ "title": entry.title,
59
+ "description": BeautifulSoup(entry.description, "html.parser").get_text(),
60
+ "link": entry.link,
61
+ "category": category,
62
+ "source": source,
63
+ "published": published
64
+ })
65
+ except Exception:
66
+ continue
67
+ articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
68
+ return articles
69
+
70
+ def summarize_text(text, model_name):
71
+ summarizer = pipeline("summarization", model=model_name, device=-1)
72
+ content_hash = hashlib.md5(text.encode()).hexdigest()
73
+ cached_summary = cache.get(content_hash)
74
+ if cached_summary:
75
+ return cached_summary
76
+ try:
77
+ result = summarizer(text, max_length=120, min_length=40, truncation=True)
78
+ summary = result[0]['summary_text']
79
+ cache.set(content_hash, summary)
80
+ return summary
81
+ except Exception:
82
+ return "Summary unavailable."
83
+
84
+ def summarize_articles(articles, model_name):
85
+ summaries = []
86
+ for article in articles:
87
+ content = article["description"]
88
+ summary = summarize_text(content, model_name)
89
+ summaries.append(f"""
90
+ 馃摪 {article['title']}
91
+ - 馃搧 Category: {article['category']}
92
+ - 馃挕 Source: {article['source']}
93
+ - 馃敆 Read More: {article['link']}
94
+ 馃搩 Summary: {summary}
95
+ """)
96
+ return "\n".join(summaries)
97
+
98
+ def generate_summary(selected_categories, model_name):
99
+ if not selected_categories:
100
+ return "Please select at least one category."
101
+ articles = fetch_rss_news(selected_categories)
102
+ if not articles:
103
+ return "No recent news found in the selected categories."
104
+ return summarize_articles(articles, model_name)
105
+
106
+ def fetch_union_mobilizations():
107
+ articles = []
108
+ cutoff_time = datetime.now(pytz.UTC) - timedelta(days=1)
109
+ for source, url in NEWS_SOURCES["Movilizaciones Sindicales"].items():
110
+ try:
111
+ feed = feedparser.parse(url)
112
+ for entry in feed.entries:
113
+ published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
114
+ if published > cutoff_time:
115
+ # Filtrar por movilizaciones sindicales
116
+ if "movilizaci贸n" in entry.title.lower() or "sindical" in entry.title.lower():
117
+ articles.append({
118
+ "title": entry.title,
119
+ "description": BeautifulSoup(entry.description, "html.parser").get_text(),
120
+ "link": entry.link,
121
+ "source": source,
122
+ "published": published
123
+ })
124
+ except Exception:
125
+ continue
126
+ return articles
127
+
128
+ def create_mobilization_table():
129
+ articles = fetch_union_mobilizations()
130
+ if not articles:
131
+ return "No se encontraron movilizaciones sindicales recientes."
132
+
133
+ # Crear una tabla con pandas
134
+ df = pd.DataFrame(articles)
135
+ return df.to_string(index=False)
136
+
137
+ # Gradio Interface
138
+ demo = gr.Blocks()
139
+
140
+ with demo:
141
+ gr.Markdown("# 馃摪 AI News Summarizer")
142
+ with gr.Row():
143
+ categories = gr.CheckboxGroup(
144
+ choices=list(NEWS_SOURCES.keys()),
145
+ label="Select News Categories"
146
+ )
147
+ model_selector = gr.Radio(
148
+ choices=list(SUMMARIZER_MODELS.keys()),
149
+ label="Choose Summarization Model",
150
+ value="Default (facebook/bart-large-cnn)"
151
+ )
152
+ summarize_button = gr.Button("Get News Summary")
153
+ summary_output = gr.Textbox(label="News Summary", lines=20)
154
+
155
+ def get_summary(selected_categories, selected_model):
156
+ model_name = SUMMARIZER_MODELS[selected_model]
157
+ return generate_summary(selected_categories, model_name)
158
+
159
+ summarize_button.click(get_summary, inputs=[categories, model_selector], outputs=summary_output)
160
+
161
+ if __name__ == "__main__":
162
+ demo.launch()
163
+
164
+
165
+
166
+
167
+
168
+
169
+
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers>=4.30.0
2
+ gradio>=3.0.0
3
+ feedparser>=6.0.0
4
+ torch>=1.8.0
5
+ beautifulsoup4>=4.9.0
6
+ pytz>=2023.3
7
+ requests>=2.31.0