(づ˶•༝•˶)づ
Browse files- app.py +48 -0
- data.parquet +3 -0
- requirements.txt +3 -0
- txt2parq.py +19 -0
app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
PARQUET_FILE = 'data.parquet'
|
5 |
+
|
6 |
+
preset_values = [
|
7 |
+
"1girl, kamado nezuko, kimetsu no yaiba",
|
8 |
+
"1girl, kanroji mitsuri, kimetsu no yaiba",
|
9 |
+
"1girl, fern (sousou no frieren), sousou no frieren",
|
10 |
+
"1girl, elaina (majo no tabitabi), majo no tabitabi"
|
11 |
+
]
|
12 |
+
|
13 |
+
def search_parquet(search_term):
|
14 |
+
try:
|
15 |
+
df = pd.read_parquet(PARQUET_FILE)
|
16 |
+
if search_term.strip():
|
17 |
+
search_results = df[df['teks'].str.lower().str.contains(search_term.lower(), na=False)]
|
18 |
+
else:
|
19 |
+
search_results = df[df['teks'].str.lower().str.contains("|".join(preset_values).lower(), na=False)]
|
20 |
+
if len(search_results.columns) > 12:
|
21 |
+
search_results = search_results.iloc[:, :12]
|
22 |
+
return search_results
|
23 |
+
except FileNotFoundError:
|
24 |
+
return pd.DataFrame({'Error': ['Parquet file not found. Please check the file path.']})
|
25 |
+
except Exception as e:
|
26 |
+
return pd.DataFrame({'Error': [f'An error occurred: {e}']})
|
27 |
+
|
28 |
+
if __name__ == "__main__":
|
29 |
+
with gr.Blocks() as app:
|
30 |
+
gr.Markdown("## 🔍 Text Search for Animagine tag characters")
|
31 |
+
with gr.Column():
|
32 |
+
search_input = gr.Textbox(
|
33 |
+
label="Search for characters or series:",
|
34 |
+
placeholder="sousou no frieren",
|
35 |
+
)
|
36 |
+
search_output = gr.Dataframe(
|
37 |
+
label="Search Results",
|
38 |
+
value=pd.DataFrame({'Characters tag': preset_values}),
|
39 |
+
headers="auto",
|
40 |
+
)
|
41 |
+
|
42 |
+
search_input.change(
|
43 |
+
fn=search_parquet,
|
44 |
+
inputs=search_input,
|
45 |
+
outputs=search_output,
|
46 |
+
)
|
47 |
+
|
48 |
+
app.launch()
|
data.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f502782f0d567da57db6b24f46db80899b61767abd09b4c25c8839fff6cb680
|
3 |
+
size 102173
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.16.0
|
2 |
+
pandas==2.1.0
|
3 |
+
pyarrow==14.0.1
|
txt2parq.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def txt_to_parquet(file):
|
5 |
+
with open(file.name, "r", encoding="utf-8") as f:
|
6 |
+
lines = f.readlines()
|
7 |
+
df = pd.DataFrame({"text": [line.strip() for line in lines]})
|
8 |
+
output_file = "output.parquet"
|
9 |
+
df.to_parquet(output_file, engine="pyarrow", index=False)
|
10 |
+
return output_file
|
11 |
+
|
12 |
+
with gr.Blocks() as app:
|
13 |
+
gr.Markdown("## 📝 Convert TXT File to Parquet")
|
14 |
+
txt_file = gr.File(label="Upload .txt file", file_types=[".txt"])
|
15 |
+
convert_button = gr.Button("Convert to Parquet")
|
16 |
+
parquet_file = gr.File(label="Download .parquet file", interactive=False)
|
17 |
+
convert_button.click(txt_to_parquet, inputs=txt_file, outputs=parquet_file)
|
18 |
+
|
19 |
+
app.launch()
|