|
import gradio as gr |
|
import json |
|
import pandas as pd |
|
import spacy |
|
import subprocess |
|
import sys |
|
import logging |
|
from pathlib import Path |
|
from seo_analyzer import SEOSpaceAnalyzer |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
def setup_spacy_model(): |
|
"""Carga o descarga el modelo spaCy necesario.""" |
|
try: |
|
spacy.load("es_core_news_lg") |
|
logger.info("Modelo spaCy 'es_core_news_lg' cargado correctamente.") |
|
except OSError: |
|
logger.info("Descargando spaCy model es_core_news_lg...") |
|
subprocess.run([sys.executable, "-m", "spacy", "download", "es_core_news_lg"], check=True) |
|
|
|
def list_content_storage_files() -> list: |
|
"""Devuelve la lista de archivos en la carpeta content_storage.""" |
|
base_dir = Path("content_storage") |
|
if not base_dir.exists(): |
|
return [] |
|
return [str(file.relative_to(base_dir)) for file in base_dir.glob("**/*") if file.is_file()] |
|
|
|
def download_storage_file(selected_file: str) -> str: |
|
"""Dado el nombre del archivo (relativo a content_storage), devuelve la ruta para descarga.""" |
|
if not selected_file: |
|
return "" |
|
file_path = Path("content_storage") / selected_file |
|
return str(file_path) if file_path.exists() else "" |
|
|
|
def refresh_file_list() -> list: |
|
"""Actualiza la lista de archivos disponibles en content_storage.""" |
|
return list_content_storage_files() |
|
|
|
|
|
def create_interface() -> gr.Blocks: |
|
analyzer = SEOSpaceAnalyzer() |
|
|
|
|
|
def analyze_with_callbacks(sitemap_url: str): |
|
status_msgs = [] |
|
|
|
def status_callback(msg: str): |
|
status_msgs.append(msg) |
|
logger.info(msg) |
|
|
|
def progress_callback(current: int, total: int): |
|
logger.info(f"Batch {current} de {total} procesado.") |
|
|
|
|
|
results = analyzer.analyze_sitemap(sitemap_url, progress_callback=progress_callback, status_callback=status_callback) |
|
final_status = "\n".join(status_msgs) if status_msgs else "Análisis completado." |
|
|
|
|
|
|
|
return (*results, final_status) |
|
|
|
with gr.Blocks(title="SEO Analyzer Pro", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown(""" |
|
# 🧠 SEO Analyzer Pro |
|
Este espacio analiza contenido web orientado a normativa bancaria y genera: |
|
- Temas inferidos automáticamente |
|
- Títulos y meta descripciones SEO |
|
- Alertas por lenguaje de riesgo |
|
""") |
|
with gr.Row(): |
|
sitemap_input = gr.Textbox(label="📍 URL del Sitemap", placeholder="https://ejemplo.com/sitemap.xml") |
|
analyze_btn = gr.Button("🔍 Analizar") |
|
clear_btn = gr.Button("🧹 Limpiar") |
|
download_json_btn = gr.Button("📥 Descargar JSON") |
|
download_csv_btn = gr.Button("📤 Descargar CSV") |
|
status_output = gr.Textbox(label="Estado del análisis", interactive=False) |
|
with gr.Tabs(): |
|
with gr.Tab("📊 Resumen"): |
|
stats_output = gr.JSON(label="Estadísticas") |
|
recommendations_output = gr.JSON(label="Recomendaciones SEO") |
|
with gr.Tab("📝 Contenido"): |
|
content_output = gr.JSON(label="Análisis de contenido") |
|
with gr.Tab("🔗 Enlaces"): |
|
links_output = gr.JSON(label="Análisis de enlaces") |
|
links_plot = gr.Plot(label="Visualización de enlaces internos") |
|
with gr.Tab("📄 Detalles"): |
|
details_output = gr.JSON(label="Detalles por página") |
|
with gr.Tab("🧠 SEO y Temas"): |
|
seo_tags_output = gr.JSON(label="Metadatos SEO generados") |
|
|
|
topics_output = gr.JSON(label="Temas inferidos") |
|
flags_output = gr.JSON(label="Términos prohibidos detectados") |
|
with gr.Tab("🔗 Similitud"): |
|
similarity_output = gr.JSON(label="Similitud entre URLs") |
|
with gr.Tab("📁 Archivos"): |
|
file_dropdown = gr.Dropdown(label="Archivos en content_storage", choices=list_content_storage_files()) |
|
refresh_btn = gr.Button("Actualizar lista") |
|
download_file_btn = gr.Button("Descargar Archivo Seleccionado", variant="secondary") |
|
file_download = gr.File(label="Archivo Seleccionado") |
|
def export_json() -> str: |
|
if analyzer.current_analysis: |
|
path = Path("content_storage/seo_report.json") |
|
with open(path, "w", encoding="utf-8") as f: |
|
json.dump(analyzer.current_analysis, f, indent=2, ensure_ascii=False) |
|
return str(path) |
|
return "" |
|
def export_csv() -> str: |
|
if not analyzer.current_analysis: |
|
return "" |
|
path = Path("content_storage/seo_summary.csv") |
|
data = [] |
|
for url, seo in analyzer.current_analysis.get("seo_tags", {}).items(): |
|
data.append({ |
|
"url": url, |
|
"title": seo.get("title", ""), |
|
"meta_description": seo.get("meta_description", ""), |
|
"flags": ", ".join(seo.get("flags", [])), |
|
"topics": ", ".join(analyzer.current_analysis.get("topics", {}).get(url, [])), |
|
"summary": analyzer.current_analysis.get("summaries", {}).get(url, "") |
|
}) |
|
pd.DataFrame(data).to_csv(path, index=False) |
|
return str(path) |
|
analyze_btn.click( |
|
fn=analyze_with_callbacks, |
|
inputs=sitemap_input, |
|
outputs=[ |
|
stats_output, recommendations_output, content_output, |
|
links_output, details_output, similarity_output, |
|
seo_tags_output, status_output |
|
], |
|
show_progress=True |
|
) |
|
clear_btn.click(fn=lambda: [None]*8, outputs=[ |
|
stats_output, recommendations_output, content_output, |
|
links_output, details_output, similarity_output, |
|
seo_tags_output, status_output |
|
]) |
|
download_json_btn.click(fn=export_json, outputs=status_output) |
|
download_csv_btn.click(fn=export_csv, outputs=status_output) |
|
links_output.change(fn=analyzer.plot_internal_links, inputs=links_output, outputs=links_plot) |
|
seo_tags_output.change(fn=lambda: analyzer.current_analysis.get("topics", {}), outputs=topics_output) |
|
seo_tags_output.change(fn=lambda: analyzer.current_analysis.get("flags", {}), outputs=flags_output) |
|
refresh_btn.click(fn=refresh_file_list, outputs=file_dropdown) |
|
download_file_btn.click(fn=download_storage_file, inputs=file_dropdown, outputs=file_download) |
|
return demo |
|
|
|
if __name__ == "__main__": |
|
setup_spacy_model() |
|
app = create_interface() |
|
app.launch(server_name="0.0.0.0", server_port=7860) |
|
|