maslionok
commited on
Commit
·
442b8bb
1
Parent(s):
38629ba
fixed about the pipeline part
Browse files
app.py
CHANGED
@@ -81,8 +81,13 @@ with gr.Blocks(title="Solr Normalization Demo") as demo:
|
|
81 |
gr.Markdown(
|
82 |
"""
|
83 |
- **Tokenization**: Splits text into individual tokens
|
84 |
-
- **
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
86 |
"""
|
87 |
)
|
88 |
|
|
|
81 |
gr.Markdown(
|
82 |
"""
|
83 |
- **Tokenization**: Splits text into individual tokens
|
84 |
+
- **Tokenfilter**: Applies various transformations like lowercasing:
|
85 |
+
- elison: removes apostrophes
|
86 |
+
- lowercase: converts to lowercase
|
87 |
+
- asciifolding: converts accented characters to ASCII
|
88 |
+
- stop: removes common stopwords
|
89 |
+
- stemmer: reduces words to their root form
|
90 |
+
- normalization: applies language-specific normalization
|
91 |
"""
|
92 |
)
|
93 |
|