maslionok
commited on
Commit
·
66d1427
1
Parent(s):
21f4048
reverted
Browse files
app.py
CHANGED
@@ -30,46 +30,16 @@ def normalize(text, lang_choice):
|
|
30 |
print("❌ Pipeline error:", e)
|
31 |
return f"Error: {e}"
|
32 |
|
33 |
-
# Define example inputs for different languages
|
34 |
-
examples = [
|
35 |
-
["The quick brown fox jumps over the lazy dog. This is a sample text for testing.", "en"],
|
36 |
-
["Der schnelle braune Fuchs springt über den faulen Hund. Dies ist ein Beispieltext zum Testen.", "de"],
|
37 |
-
["Le renard brun rapide saute par-dessus le chien paresseux. Ceci est un texte d'exemple pour les tests.", "fr"],
|
38 |
-
["El zorro marrón rápido salta sobre el perro perezoso. Este es un texto de ejemplo para pruebas.", "es"],
|
39 |
-
["La volpe marrone veloce salta sopra il cane pigro. Questo è un testo di esempio per i test.", "it"],
|
40 |
-
["Auto-detect language: Mixed content with English and Français words together!", "Auto-detect"]
|
41 |
-
]
|
42 |
-
|
43 |
demo = gr.Interface(
|
44 |
fn=normalize,
|
45 |
inputs=[
|
46 |
-
gr.Textbox(
|
47 |
-
|
48 |
-
placeholder="Type your text here or try one of the examples below...",
|
49 |
-
lines=3
|
50 |
-
),
|
51 |
-
gr.Dropdown(choices=["Auto-detect"] + LANGUAGES, value="Auto-detect", label="Language")
|
52 |
],
|
53 |
-
outputs=gr.Textbox(label="Normalized Output"
|
54 |
-
|
55 |
-
|
56 |
-
description="""
|
57 |
-
<div style="text-align: center; margin-bottom: 20px;">
|
58 |
-
<img src="file/logo.jpeg" alt="Logo" style="max-width: 200px; height: auto; border-radius: 8px;">
|
59 |
-
</div>
|
60 |
-
|
61 |
-
**Solr normalization is intended to give an idea of what kind of normalization is happening behind Impresso.**
|
62 |
-
|
63 |
-
This demo replicates Solr's text analysis functionality, showing how text is processed through various normalization steps including tokenization, stopword removal, and language-specific analysis.
|
64 |
-
|
65 |
-
Try the examples below or enter your own text to see how different languages are processed!
|
66 |
-
""",
|
67 |
-
article="""
|
68 |
-
### About
|
69 |
-
This tool demonstrates the text normalization pipeline used in the Impresso project, which mirrors Apache Solr's text analysis capabilities.
|
70 |
-
""",
|
71 |
-
theme=gr.themes.Soft(),
|
72 |
allow_flagging="never"
|
73 |
)
|
74 |
|
75 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
30 |
print("❌ Pipeline error:", e)
|
31 |
return f"Error: {e}"
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
demo = gr.Interface(
|
34 |
fn=normalize,
|
35 |
inputs=[
|
36 |
+
gr.Textbox(label="Enter Text"),
|
37 |
+
gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
|
|
|
|
|
|
|
|
|
38 |
],
|
39 |
+
outputs=gr.Textbox(label="Normalized Output"),
|
40 |
+
title="Solr Normalization Pipeline",
|
41 |
+
description="Text normalization replicating Solr functionality.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
allow_flagging="never"
|
43 |
)
|
44 |
|
45 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|