dinhquangson commited on
Commit
eae13a2
·
verified ·
1 Parent(s): a7a8d61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -12,7 +12,11 @@ from itertools import islice
12
 
13
  FILEPATH_PATTERN = "structured_data_doc.parquet"
14
  NUM_PROC = os.cpu_count()
 
15
 
 
 
 
16
 
17
  # Determine device based on GPU availability
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -69,6 +73,7 @@ async def create_upload_file(file: UploadFile = File(...)):
69
  full_dataset = load_dataset("parquet",
70
  data_files=FILEPATH_PATTERN,
71
  split="train",
 
72
  keep_in_memory=True,
73
  num_proc=NUM_PROC*2)
74
  # Generate and append embeddings to the train split
 
12
 
13
  FILEPATH_PATTERN = "structured_data_doc.parquet"
14
  NUM_PROC = os.cpu_count()
15
+ parent_path = dirname(getcwd())
16
 
17
+ cache_path = join(parent_path,'.cache')
18
+ if not exists(cache_path):
19
+ makedirs(cache_path)
20
 
21
  # Determine device based on GPU availability
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
73
  full_dataset = load_dataset("parquet",
74
  data_files=FILEPATH_PATTERN,
75
  split="train",
76
+ cache_path=cache_path,
77
  keep_in_memory=True,
78
  num_proc=NUM_PROC*2)
79
  # Generate and append embeddings to the train split