Spaces:
Runtime error
Runtime error
dinhquangson
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,11 @@ from itertools import islice
|
|
12 |
|
13 |
FILEPATH_PATTERN = "structured_data_doc.parquet"
|
14 |
NUM_PROC = os.cpu_count()
|
|
|
15 |
|
|
|
|
|
|
|
16 |
|
17 |
# Determine device based on GPU availability
|
18 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -69,6 +73,7 @@ async def create_upload_file(file: UploadFile = File(...)):
|
|
69 |
full_dataset = load_dataset("parquet",
|
70 |
data_files=FILEPATH_PATTERN,
|
71 |
split="train",
|
|
|
72 |
keep_in_memory=True,
|
73 |
num_proc=NUM_PROC*2)
|
74 |
# Generate and append embeddings to the train split
|
|
|
12 |
|
13 |
FILEPATH_PATTERN = "structured_data_doc.parquet"
|
14 |
NUM_PROC = os.cpu_count()
|
15 |
+
parent_path = dirname(getcwd())
|
16 |
|
17 |
+
cache_path = join(parent_path,'.cache')
|
18 |
+
if not exists(cache_path):
|
19 |
+
makedirs(cache_path)
|
20 |
|
21 |
# Determine device based on GPU availability
|
22 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
73 |
full_dataset = load_dataset("parquet",
|
74 |
data_files=FILEPATH_PATTERN,
|
75 |
split="train",
|
76 |
+
cache_path=cache_path,
|
77 |
keep_in_memory=True,
|
78 |
num_proc=NUM_PROC*2)
|
79 |
# Generate and append embeddings to the train split
|