File size: 2,569 Bytes
5ae57bc 7fce61a 4875545 c74c9ba 7fce61a 71736ce 7fce61a 4875545 7fce61a fe1bf49 7fce61a 5ae57bc c74c9ba 7fce61a c74c9ba 7fce61a c74c9ba 7fce61a c74c9ba 7fce61a c74c9ba 7fce61a c74c9ba efa7621 7fce61a 5ae57bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import pandas as pd
from datasets import load_dataset
import numpy as np
import tqdm.auto as tqdm
import os
import io
import torch
import time
# Import your model and anything else you want
# You can even install other packages included in your repo
# However, during the evaluation the container will not have access to the internet.
# So you must include everything you need in your model repo. Common python libraries will be installed.
# Feel free to contact us to add dependencies to the requiremnts.txt
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
from models import Model
from preprocess import preprocess
# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
DATASET_PATH = "/tmp/data"
dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
# load your model
device = "cuda:0"
model = Model().to(device)
# iterate over the dataset
out = []
for el in tqdm.tqdm(dataset_remote):
start_time = time.time()
# each element is a dict
# el["id"] id of example and el["audio"] contains the audio file
# el["audio"]["bytes"] contains bytes from reading the raw audio
# el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
# if you are using libraries that expect a file. You can use BytesIO object
try:
file_like = io.BytesIO(el["audio"]["bytes"])
tensor = preprocess(file_like)
with torch.no_grad():
# soft decision (such as log likelihood score)
# positive score correspond to synthetic prediction
# negative score correspond to pristine prediction
score = model(tensor.to(device)).cpu().item()
# we require a hard decision to be submited. so you need to pick a threshold
pred = "generated" if score > model.threshold else "pristine"
# append your prediction
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
except Exception as e:
print(e)
print("failed", el["id"])
out.append(dict(id = el["id"], pred = "none", score = None))
# save the final result and that's it
pd.DataFrame(out).to_csv("submission.csv",index = False) |