import pandas as pd from datasets import load_dataset import numpy as np import tqdm.auto as tqdm import os import io import torch import time # Import your model and anything else you want # You can even install other packages included in your repo # However, during the evaluation the container will not have access to the internet. # So you must include everything you need in your model repo. Common python libraries will be installed. # Feel free to contact us to add dependencies to the requiremnts.txt # For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile # It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags from models import Model from preprocess import preprocess # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation DATASET_PATH = "/tmp/data" dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True) # this should fail since there is not network access during model run try: import requests r = requests.get("https://stresearch.github.io/SAFE/logo.jpg") print(r.text) except Exception as e: print("download test faild as expected") print(e) # load your model device = "cuda:0" model = Model().to(device) # iterate over the dataset out = [] for el in tqdm.tqdm(dataset_remote): start_time = time.time() # each element is a dict # el["id"] id of example and el["audio"] contains the audio file # el["audio"]["bytes"] contains bytes from reading the raw audio # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it # if you are using libraries that expect a file. You can use BytesIO object try: file_like = io.BytesIO(el["audio"]["bytes"]) tensor = preprocess(file_like) with torch.no_grad(): # soft decision (such as log likelihood score) # positive score correspond to synthetic prediction # negative score correspond to pristine prediction score = model(tensor.to(device)).cpu().item() # we require a hard decision to be submited. so you need to pick a threshold pred = "generated" if score > model.threshold else "pristine" # append your prediction # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time)) except Exception as e: print(e) print("failed", el["id"]) out.append(dict(id = el["id"], pred = "none", score = None)) # save the final result and that's it pd.DataFrame(out).to_csv("submission.csv",index = False)