safe-challenge
/

safe-example-submission

Model card Files Files and versions

safe-example-submission / script.py

ktrapeznikov's picture

Update script.py

838bef0 verified 3 months ago

history blame contribute delete

2.83 kB

	import pandas as pd
	from datasets import load_dataset
	import numpy as np
	import tqdm.auto as tqdm
	import os
	import io
	import torch
	import time

	# Import your model and anything else you want
	# You can even install other packages included in your repo
	# However, during the evaluation the container will not have access to the internet.
	# So you must include everything you need in your model repo. Common python libraries will be installed.
	# Feel free to contact us to add dependencies to the requiremnts.txt
	# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
	# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags

	from models import Model
	from preprocess import preprocess


	# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
	DATASET_PATH = "/tmp/data"
	dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)

	# this should fail since there is not network access during model run
	try:
	import requests
	r = requests.get("https://stresearch.github.io/SAFE/logo.jpg")
	print(r.text)
	except Exception as e:
	print("download test faild as expected")
	print(e)

	# load your model
	device = "cuda:0"
	model = Model().to(device)


	# iterate over the dataset
	out = []
	for el in tqdm.tqdm(dataset_remote):

	start_time = time.time()

	# each element is a dict
	# el["id"] id of example and el["audio"] contains the audio file
	# el["audio"]["bytes"] contains bytes from reading the raw audio
	# el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it

	# if you are using libraries that expect a file. You can use BytesIO object
	try:
	file_like = io.BytesIO(el["audio"]["bytes"])
	tensor = preprocess(file_like)

	with torch.no_grad():
	# soft decision (such as log likelihood score)
	# positive score correspond to synthetic prediction
	# negative score correspond to pristine prediction
	score = model(tensor.to(device)).cpu().item()

	# we require a hard decision to be submited. so you need to pick a threshold
	pred = "generated" if score > model.threshold else "pristine"

	# append your prediction
	# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results

	out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
	except Exception as e:
	print(e)
	print("failed", el["id"])
	out.append(dict(id = el["id"], pred = "none", score = None))

	# save the final result and that's it
	pd.DataFrame(out).to_csv("submission.csv",index = False)