Spaces:
Running
Running
Update app.py
Browse fileson upload dans un datasethuggingface
app.py
CHANGED
@@ -419,14 +419,68 @@ def ComputeStatistics(df):
|
|
419 |
|
420 |
|
421 |
|
422 |
-
|
423 |
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
|
429 |
-
files = [f.name for f in DATASET_DIR.iterdir() if f.is_file()]
|
430 |
-
return {"files": files}
|
431 |
|
432 |
|
|
|
419 |
|
420 |
|
421 |
|
422 |
+
from huggingface_hub import HfApi
|
423 |
|
424 |
+
UPLOAD_DIR = Path("/app/uploads") # Dossier temporaire pour stocker les chunks
|
425 |
+
HF_TOKEN = "hf_your_api_token" # 🔥 Remplace par ton token Hugging Face
|
426 |
+
DATASET_REPO = "your-username/your-dataset" # 🔥 Remplace par ton dataset
|
427 |
+
|
428 |
+
api = HfApi()
|
429 |
+
|
430 |
+
@app.post("/upload-dataset/")
|
431 |
+
async def upload_file(
|
432 |
+
file: UploadFile = File(...),
|
433 |
+
chunkIndex: int = Form(...),
|
434 |
+
totalChunks: int = Form(...),
|
435 |
+
fileName: str = Form(...),
|
436 |
+
directory: str = Form(...),
|
437 |
+
):
|
438 |
+
try:
|
439 |
+
print(f"Received: chunkIndex={chunkIndex}, totalChunks={totalChunks}, fileName={fileName}, directory={directory}")
|
440 |
+
|
441 |
+
# Créer le dossier temporaire si nécessaire
|
442 |
+
target_dir = UPLOAD_DIR / directory
|
443 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
444 |
+
|
445 |
+
# Sauvegarder le chunk
|
446 |
+
chunk_path = target_dir / f"{fileName}.part{chunkIndex}"
|
447 |
+
with open(chunk_path, "wb") as f:
|
448 |
+
f.write(await file.read())
|
449 |
+
|
450 |
+
# Reconstruction si dernier chunk reçu
|
451 |
+
if chunkIndex + 1 == totalChunks:
|
452 |
+
final_file_path = target_dir / fileName
|
453 |
+
with open(final_file_path, "wb") as final_file:
|
454 |
+
for i in range(totalChunks):
|
455 |
+
part_path = target_dir / f"{fileName}.part{i}"
|
456 |
+
with open(part_path, "rb") as part_file:
|
457 |
+
final_file.write(part_file.read())
|
458 |
+
os.remove(part_path) # Supprimer les chunks après fusion
|
459 |
+
|
460 |
+
print(f"Final file created: {final_file_path}")
|
461 |
+
|
462 |
+
# 🔥 Upload vers Hugging Face
|
463 |
+
api.upload_file(
|
464 |
+
path_or_fileobj=str(final_file_path),
|
465 |
+
path_in_repo=f"{directory}/{fileName}", # Stocker dans un sous-dossier du dataset
|
466 |
+
repo_id=DATASET_REPO,
|
467 |
+
repo_type="dataset",
|
468 |
+
token=HF_TOKEN,
|
469 |
+
)
|
470 |
+
|
471 |
+
# Supprimer le fichier local après upload
|
472 |
+
os.remove(final_file_path)
|
473 |
+
|
474 |
+
return {
|
475 |
+
"status": "success",
|
476 |
+
"message": "File uploaded successfully to Hugging Face.",
|
477 |
+
"hf_url": f"https://huggingface.co/datasets/{DATASET_REPO}/blob/main/{directory}/{fileName}"
|
478 |
+
}
|
479 |
+
|
480 |
+
return {"status": "success", "message": "Chunk uploaded successfully."}
|
481 |
+
|
482 |
+
except Exception as e:
|
483 |
+
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
484 |
|
|
|
|
|
485 |
|
486 |
|