|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
|
<dataset_metadata>
|
|
|
<title>Quran Speech Recognition Dataset</title>
|
|
|
<description>
|
|
|
This dataset is designed for training and evaluating Quranic speech recognition models, with a focus on syllable-based transcription. It includes audio recordings and corresponding syllabized transcriptions from professional reciters and regular users, ensuring coverage of real-world scenarios.
|
|
|
</description>
|
|
|
<creator>Research Group for Quranic Speech Recognition</creator>
|
|
|
<contributors>
|
|
|
<contributor>Tarek ELDEEB</contributor>
|
|
|
<contributor>Dr.Moustafa Elshafi - Zewailcity</contributor>
|
|
|
</contributors>
|
|
|
<license>Creative Commons Attribution 4.0 International (CC BY 4.0)</license>
|
|
|
<languages>Arabic (ar)</languages>
|
|
|
<dataset_link>https://archive.org/details/quran-speech-dataset</dataset_link>
|
|
|
<torrent_file>
|
|
|
<complete_dataset_torrent>https://utweb.rainberrytv.com/gui/share.html#link=magnet%3A%3Fxt%3Durn%3Abtih%3A34e50d0fd9afb7f308883b14e5e60f6532e30141%26dn%3Dquran-speech-dataset%26ws%3Dhttp%253a%252f%252fia601500.us.archive.org%252f23%252fitems%252f%26tr%3Dhttp%253a%252f%252fbt1.archive.org%253a6969%252fannounce%26tr%3Dhttp%253a%252f%252fbt2.archive.org%253a6969%252fannounce</complete_dataset_torrent>
|
|
|
</torrent_file>
|
|
|
<train_set>
|
|
|
<number_of_samples>11004</number_of_samples>
|
|
|
<total_duration_seconds>63691.96</total_duration_seconds>
|
|
|
<total_duration_hours>17.69</total_duration_hours>
|
|
|
<maximum_audio_duration>24.71</maximum_audio_duration>
|
|
|
<sampling_rate>16000 Hz</sampling_rate>
|
|
|
<bit_depth>16-bit</bit_depth>
|
|
|
<channels>Mono (1)</channels>
|
|
|
<notes>
|
|
|
Audio recordings from professional Quranic reciters, syllabized using automated Tajweed rule-based software. Preprocessing steps included text normalization and resampling to match the required format.
|
|
|
</notes>
|
|
|
</train_set>
|
|
|
<test_set>
|
|
|
<number_of_samples>2823</number_of_samples>
|
|
|
<total_duration_seconds>21161.93</total_duration_seconds>
|
|
|
<total_duration_hours>5.88</total_duration_hours>
|
|
|
<maximum_audio_duration>24.83</maximum_audio_duration>
|
|
|
<sampling_rate>16000 Hz</sampling_rate>
|
|
|
<bit_depth>16-bit</bit_depth>
|
|
|
<channels>Mono (1)</channels>
|
|
|
<composition>
|
|
|
<professional_reciters>1169</professional_reciters>
|
|
|
<regular_users>1654</regular_users>
|
|
|
</composition>
|
|
|
<notes>
|
|
|
Test set includes a mix of professional reciters and regular users to simulate real-world usage scenarios. Transcriptions were syllabized using automated software based on Tajweed rules.
|
|
|
</notes>
|
|
|
</test_set>
|
|
|
<processing_notes>
|
|
|
<text_normalization>
|
|
|
Removed unnecessary characters and ensured syllable alignment with Quranic Tajweed rules.
|
|
|
</text_normalization>
|
|
|
<audio_resampling>
|
|
|
Resampled all audio files to 16 kHz to match the Wav2Vec 2.0 pre-trained model requirements.
|
|
|
</audio_resampling>
|
|
|
</processing_notes>
|
|
|
</dataset_metadata>
|
|
|
|