Spaces:
Sleeping
Sleeping
Update modules/text_analysis/semantic_analysis.py
Browse files
modules/text_analysis/semantic_analysis.py
CHANGED
|
@@ -14,13 +14,22 @@ import matplotlib.pyplot as plt
|
|
| 14 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 15 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# 4. Importaciones locales
|
| 25 |
from .stopwords import (
|
| 26 |
process_text,
|
|
@@ -101,12 +110,24 @@ def perform_semantic_analysis(text, nlp, lang_code):
|
|
| 101 |
"""
|
| 102 |
Realiza el análisis semántico completo del texto.
|
| 103 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
try:
|
| 105 |
logger.info(f"Starting semantic analysis for language: {lang_code}")
|
| 106 |
|
| 107 |
# Procesar texto y remover stopwords
|
| 108 |
doc = nlp(text)
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
# Identificar conceptos clave
|
| 112 |
logger.info("Identificando conceptos clave...")
|
|
@@ -121,9 +142,16 @@ def perform_semantic_analysis(text, nlp, lang_code):
|
|
| 121 |
}
|
| 122 |
|
| 123 |
# Crear grafo de conceptos
|
| 124 |
-
logger.info("Creando grafo de conceptos...")
|
| 125 |
concept_graph = create_concept_graph(doc, key_concepts)
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
# Visualizar grafo
|
| 128 |
logger.info("Visualizando grafo...")
|
| 129 |
plt.clf() # Limpiar figura actual
|
|
@@ -144,13 +172,15 @@ def perform_semantic_analysis(text, nlp, lang_code):
|
|
| 144 |
plt.close(concept_graph_fig)
|
| 145 |
plt.close('all')
|
| 146 |
|
| 147 |
-
|
| 148 |
-
return {
|
| 149 |
'success': True,
|
| 150 |
'key_concepts': key_concepts,
|
| 151 |
'concept_graph': graph_bytes
|
| 152 |
}
|
| 153 |
|
|
|
|
|
|
|
|
|
|
| 154 |
except Exception as e:
|
| 155 |
logger.error(f"Error in perform_semantic_analysis: {str(e)}")
|
| 156 |
plt.close('all') # Asegurarse de limpiar recursos
|
|
@@ -158,6 +188,8 @@ def perform_semantic_analysis(text, nlp, lang_code):
|
|
| 158 |
'success': False,
|
| 159 |
'error': str(e)
|
| 160 |
}
|
|
|
|
|
|
|
| 161 |
|
| 162 |
############################################################
|
| 163 |
|
|
@@ -367,7 +399,7 @@ __all__ = [
|
|
| 367 |
'identify_key_concepts',
|
| 368 |
'create_concept_graph',
|
| 369 |
'visualize_concept_graph',
|
| 370 |
-
'fig_to_bytes'
|
| 371 |
'ENTITY_LABELS',
|
| 372 |
'POS_COLORS',
|
| 373 |
'POS_TRANSLATIONS'
|
|
|
|
| 14 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 15 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 16 |
|
| 17 |
+
# Solo configurar si no hay handlers ya configurados
|
| 18 |
+
if not logger.handlers:
|
| 19 |
+
# Configurar handler de consola
|
| 20 |
+
console_handler = logging.StreamHandler()
|
| 21 |
+
console_handler.setLevel(logging.INFO)
|
| 22 |
+
|
| 23 |
+
# Configurar formato
|
| 24 |
+
formatter = logging.Formatter(
|
| 25 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 26 |
+
)
|
| 27 |
+
console_handler.setFormatter(formatter)
|
| 28 |
+
|
| 29 |
+
# Agregar handler al logger
|
| 30 |
+
logger.addHandler(console_handler)
|
| 31 |
+
logger.setLevel(logging.INFO)
|
| 32 |
+
|
| 33 |
# 4. Importaciones locales
|
| 34 |
from .stopwords import (
|
| 35 |
process_text,
|
|
|
|
| 110 |
"""
|
| 111 |
Realiza el análisis semántico completo del texto.
|
| 112 |
"""
|
| 113 |
+
if not text or not nlp or not lang_code:
|
| 114 |
+
logger.error("Parámetros inválidos para el análisis semántico")
|
| 115 |
+
return {
|
| 116 |
+
'success': False,
|
| 117 |
+
'error': 'Parámetros inválidos'
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
try:
|
| 121 |
logger.info(f"Starting semantic analysis for language: {lang_code}")
|
| 122 |
|
| 123 |
# Procesar texto y remover stopwords
|
| 124 |
doc = nlp(text)
|
| 125 |
+
if not doc:
|
| 126 |
+
logger.error("Error al procesar el texto con spaCy")
|
| 127 |
+
return {
|
| 128 |
+
'success': False,
|
| 129 |
+
'error': 'Error al procesar el texto'
|
| 130 |
+
}
|
| 131 |
|
| 132 |
# Identificar conceptos clave
|
| 133 |
logger.info("Identificando conceptos clave...")
|
|
|
|
| 142 |
}
|
| 143 |
|
| 144 |
# Crear grafo de conceptos
|
| 145 |
+
logger.info(f"Creando grafo de conceptos con {len(key_concepts)} conceptos...")
|
| 146 |
concept_graph = create_concept_graph(doc, key_concepts)
|
| 147 |
|
| 148 |
+
if not concept_graph.nodes():
|
| 149 |
+
logger.warning("Se creó un grafo vacío")
|
| 150 |
+
return {
|
| 151 |
+
'success': False,
|
| 152 |
+
'error': 'No se pudo crear el grafo de conceptos'
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
# Visualizar grafo
|
| 156 |
logger.info("Visualizando grafo...")
|
| 157 |
plt.clf() # Limpiar figura actual
|
|
|
|
| 172 |
plt.close(concept_graph_fig)
|
| 173 |
plt.close('all')
|
| 174 |
|
| 175 |
+
result = {
|
|
|
|
| 176 |
'success': True,
|
| 177 |
'key_concepts': key_concepts,
|
| 178 |
'concept_graph': graph_bytes
|
| 179 |
}
|
| 180 |
|
| 181 |
+
logger.info("Análisis semántico completado exitosamente")
|
| 182 |
+
return result
|
| 183 |
+
|
| 184 |
except Exception as e:
|
| 185 |
logger.error(f"Error in perform_semantic_analysis: {str(e)}")
|
| 186 |
plt.close('all') # Asegurarse de limpiar recursos
|
|
|
|
| 188 |
'success': False,
|
| 189 |
'error': str(e)
|
| 190 |
}
|
| 191 |
+
finally:
|
| 192 |
+
plt.close('all') # Asegurar limpieza incluso si hay error
|
| 193 |
|
| 194 |
############################################################
|
| 195 |
|
|
|
|
| 399 |
'identify_key_concepts',
|
| 400 |
'create_concept_graph',
|
| 401 |
'visualize_concept_graph',
|
| 402 |
+
'fig_to_bytes', # Faltaba esta coma
|
| 403 |
'ENTITY_LABELS',
|
| 404 |
'POS_COLORS',
|
| 405 |
'POS_TRANSLATIONS'
|