import pandas as pd | |
from datasets import load_dataset | |
import numpy as np | |
import tqdm.auto as tqdm | |
import os | |
import io | |
import torch | |
import time | |
# Import your model and anything else you want | |
# You can even install other packages included in your repo | |
# However, during the evaluation the container will not have access to the internet. | |
# So you must include everything you need in your model repo. Common python libraries will be installed. | |
# Feel free to contact us to add dependencies to the requiremnts.txt | |
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile | |
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags | |
from models import Model | |
from preprocess import preprocess | |
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation | |
DATASET_PATH = "/tmp/data" | |
dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True) | |
# this should fail since there is not network access during model run | |
try: | |
import requests | |
r = requests.get("https://stresearch.github.io/SAFE/logo.jpg") | |
print(r.text) | |
except Exception as e: | |
print("download test faild as expected") | |
print(e) | |
# load your model | |
device = "cuda:0" | |
model = Model().to(device) | |
# iterate over the dataset | |
out = [] | |
for el in tqdm.tqdm(dataset_remote): | |
start_time = time.time() | |
# each element is a dict | |
# el["id"] id of example and el["audio"] contains the audio file | |
# el["audio"]["bytes"] contains bytes from reading the raw audio | |
# el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it | |
# if you are using libraries that expect a file. You can use BytesIO object | |
try: | |
file_like = io.BytesIO(el["audio"]["bytes"]) | |
tensor = preprocess(file_like) | |
with torch.no_grad(): | |
# soft decision (such as log likelihood score) | |
# positive score correspond to synthetic prediction | |
# negative score correspond to pristine prediction | |
score = model(tensor.to(device)).cpu().item() | |
# we require a hard decision to be submited. so you need to pick a threshold | |
pred = "generated" if score > model.threshold else "pristine" | |
# append your prediction | |
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results | |
out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time)) | |
except Exception as e: | |
print(e) | |
print("failed", el["id"]) | |
out.append(dict(id = el["id"], pred = "none", score = None)) | |
# save the final result and that's it | |
pd.DataFrame(out).to_csv("submission.csv",index = False) |