Spaces:

mskov
/

test

Runtime error

App Files Files Community

test / app.py

mskov

Update app.py

6b7e790 over 1 year ago

raw

history blame

2.18 kB

	import os
	import sys
	os.system("pip install transformers==4.27.0")
	os.system("pip install numpy==1.23")
	from transformers import pipeline, WhisperModel, WhisperTokenizer, WhisperFeatureExtractor, AutoFeatureExtractor, AutoProcessor, WhisperConfig, WhisperProcessor, WhisperForConditionalGeneration
	os.system("pip install jiwer")
	from jiwer import wer
	os.system("pip install datasets[audio]")
	from evaluate import evaluator, load
	from transformers import AutoModelForSequenceClassification, pipeline, BertTokenizer, AutoTokenizer, GPT2Model
	from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
	import gradio as gr
	import torch
	from datasets import load_dataset
	from transformers import WhisperForConditionalGeneration, WhisperProcessor


	processor = WhisperProcessor.from_pretrained("mskov/whisper-small-esc50")
	model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc50").to("cuda")

	def map_to_pred(batch):
	audio = batch["audio"]
	input_features = processor(audio["array"], sampling_rate=16000, return_tensors="pt").input_features
	batch["reference"] = processor.tokenizer._normalize(batch['category'])

	with torch.no_grad():
	predicted_ids = model.generate(input_features.to("cuda"))[0]
	transcription = processor.decode(predicted_ids)
	batch["prediction"] = processor.tokenizer._normalize(transcription)
	print(batch["prediction"])
	return batch

	result = librispeech_test_clean.map(map_to_pred)

	wer = load("wer")
	print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))



	def transcribe(audio):
	text = pipe(audio)["text"]
	return text, test

	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(source="microphone", type="filepath"),
	outputs="text",
	title="Whisper Small ESC50 Test",
	)

	iface.launch()


	'''
	print("check check")
	print(inputs)
	input_features = inputs.input_features
	decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
	last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
	list(last_hidden_state.shape)
	print(list(last_hidden_state.shape))
	'''