sentis-audio-frequency-to-16khz / AudioResample.cs

Paul Bird

Upload 3 files

125ecbf verified 12 months ago

3 kB

	using System.Collections;
	using System.Collections.Generic;
	using UnityEngine;
	using Unity.Sentis;

	/*
	* Model to turn 44kHz and 22kHz audio to 16kHz
	* ============================================
	*
	* Place the audioClip in the inputAudio field and press play
	* The results will appear in the console
	*/

	public class AudioResample : MonoBehaviour
	{
	//Place the audio clip to resample here
	public AudioClip inputAudio;

	public AudioClip outputAudio;

	public bool playFinalAudio = true;

	IWorker engine;

	BackendType backend = BackendType.GPUCompute;

	Ops ops;
	ITensorAllocator allocator;

	void Start()
	{
	allocator = new TensorCachingAllocator();
	ops = WorkerFactory.CreateOps(backend, allocator);

	ConvertAudio();
	}

	// Update is called once per frame
	void Update()
	{
	if (Input.GetKeyDown(KeyCode.Space))
	{
	ConvertAudio();
	}
	}

	void ConvertAudio()
	{
	Debug.Log($"The frequency of the input audio clip is {inputAudio.frequency} Hz with {inputAudio.channels} channels.");
	Model model;
	if (inputAudio.frequency == 44100)
	{
	model = ModelLoader.Load(Application.streamingAssetsPath + "/audio_resample_44100_16000.sentis");
	}
	else if (inputAudio.frequency == 22050)
	{
	model = ModelLoader.Load(Application.streamingAssetsPath + "/audio_resample_22050_16000.sentis");
	}
	else
	{
	Debug.Log("Only frequencies of 44kHz and 22kHz are compatible");
	return;
	}

	engine = WorkerFactory.CreateWorker(backend, model);

	int channels = inputAudio.channels;
	int size = inputAudio.samples * channels;
	float[] data = new float[size];
	inputAudio.GetData(data, 0);
	using var input = new TensorFloat(new TensorShape(1, size), data);

	engine.Execute(input);

	float[] outData;

	var output = engine.PeekOutput() as TensorFloat;
	if (inputAudio.frequency == 44100)
	{
	using var A = output.ShallowReshape(new TensorShape( output.shape[1] / 2 , 2)) as TensorFloat;
	using var B = ops.Slice(A, new[] { 0 }, new[] { 1 }, new[] { 1 }, new[] { 1 });
	B.MakeReadable();
	outData = B.ToReadOnlyArray();
	}
	else
	{
	output.MakeReadable();
	outData = output.ToReadOnlyArray();
	}

	int samplesOut = outData.Length / channels;

	outputAudio = AudioClip.Create("outputAudio", samplesOut, channels, 16000, false);
	outputAudio.SetData(outData, 0);

	Debug.Log($"The audio has been converted to 16Khz with {channels} channels.");

	if (playFinalAudio)
	{
	GetComponent<AudioSource>().PlayOneShot(outputAudio);
	}
	}

	private void OnDestroy()
	{
	ops?.Dispose();
	allocator?.Dispose();
	}
	}