|
import json |
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
MODEL_NAME = "gpt2" |
|
|
|
|
|
|
|
try: |
|
model = pipeline("text-generation", model=MODEL_NAME, tokenizer=MODEL_NAME, trust_remote_code=True) |
|
except Exception as e: |
|
print(f"Error al cargar el modelo: {str(e)}") |
|
model = None |
|
|
|
|
|
data = [ |
|
{ |
|
"hashName": "a953bc44-2ff0-47c3-b91b-01c5670097f1", |
|
"path": "/documents/contrato/contrato_juan_perez.pdf", |
|
"title": "Contrato Juan Pérez", |
|
"type": "pdf", |
|
"sizeBytes": 38930412, |
|
"userId": 8, |
|
"metadata": { |
|
"company": "Empresa ABC", |
|
"contractDate": "2025-01-15", |
|
"contractValue": "5000 USD" |
|
}, |
|
"localCreateTime": "2025-02-06T12:18:23.973", |
|
"localUpdateTime": "null", |
|
"label": "contrato" |
|
}, |
|
{ |
|
"hashName": "b197e6be-4b9a-45c8-b497-b0e8a6877b29", |
|
"path": "/documents/factura/factura_12345.pdf", |
|
"title": "Factura 12345", |
|
"type": "pdf", |
|
"sizeBytes": 20830451, |
|
"userId": 9, |
|
"metadata": { |
|
"customer": "Juan Pérez", |
|
"amount": "1000 USD", |
|
"invoiceDate": "2025-02-01" |
|
}, |
|
"localCreateTime": "2025-02-06T12:30:14.200", |
|
"localUpdateTime": "null", |
|
"label": "factura" |
|
}, |
|
{ |
|
"hashName": "d27ab21f-b07f-4e5e-9b19-cf5f86cd0741", |
|
"path": "/documents/cv/cv_luis_gomez.pdf", |
|
"title": "CV Luis Gómez", |
|
"type": "pdf", |
|
"sizeBytes": 14920482, |
|
"userId": 10, |
|
"metadata": { |
|
"skills": ["Python", "JavaScript", "Django"], |
|
"experienceYears": 5, |
|
"education": "Ingeniería en Sistemas" |
|
}, |
|
"localCreateTime": "2025-02-06T14:00:45.120", |
|
"localUpdateTime": "null", |
|
"label": "cv" |
|
} |
|
] |
|
|
|
|
|
def classify_document(title, doc_type, size, metadata): |
|
prompt = f""" |
|
Dado el siguiente documento: |
|
- **Título:** {title} |
|
- **Tipo de archivo:** {doc_type} |
|
- **Tamaño:** {size} bytes |
|
- **Metadatos:** {json.dumps(metadata, indent=2)} |
|
|
|
¿Qué tipo de documento es? Responde con una sola palabra: contrato, factura, cv o boleta. |
|
""" |
|
|
|
try: |
|
if model: |
|
|
|
result = model(prompt, max_length=100) |
|
classification = result[0]['generated_text'].strip() |
|
|
|
|
|
return classification if classification in ["contrato", "factura", "cv", "boleta"] else "desconocido" |
|
else: |
|
return "Error al cargar el modelo." |
|
|
|
except Exception as e: |
|
return f"Error al clasificar el documento: {str(e)}" |
|
|
|
|
|
def predict(index): |
|
doc = data[index] |
|
prediction = classify_document(doc["title"], doc["type"], doc["sizeBytes"], doc["metadata"]) |
|
return f"Título: {doc['title']} → Predicción: {prediction}" |
|
|
|
|
|
interface = gr.Interface( |
|
fn=predict, |
|
inputs=gr.Dropdown(choices=list(range(len(data))), label="Selecciona un documento"), |
|
outputs="text", |
|
title="Clasificación de Documentos", |
|
description="Selecciona un documento y el modelo DeepSeek-R1 lo clasificará." |
|
) |
|
|
|
|
|
interface.launch() |
|
|