phonalign / vowel_length.py
cati
.
c71076c
raw
history blame
4.04 kB
import os
import numpy as np
from collections import defaultdict
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# return phones from the start to end time of one word
def getwps(start,end,phones):
return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
# read align data from mfa file
def read_mfa(apath):
with open(apath,'r') as handle:
f = handle.read().splitlines()
f = [l.split(',') for l in f]
wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
return aligns
# key specific to MFA pronunciation dictionary -
# which phones are relevant per word
def read_ph_key(fpath):
def _winfo(l):
def _f(n):
return tuple([int(i) if i != 'X' else i for i in n.split(',')])
return (_f(l[3]),_f(l[4]))
def _d2d(dic):
if isinstance(dic, dict):
dic = {k: _d2d(v) for k, v in dic.items()}
return dic
with open(fpath,'r') as handle:
f = handle.read().splitlines()
f = [l.split('\t') for l in f[1:]]
vcdict = {'w2v2': defaultdict(lambda: defaultdict(tuple)), \
'mfa': defaultdict(lambda: defaultdict(tuple))}
for wline in f:
vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
return _d2d(vcdict)
def get_vc_dur(kwd,atype,dat,vcd,adir):
d = [l for l in dat if kwd in l[-1]]
prlist = []
if atype=='w2v2': # only one pronunciation-spelling for ctc
pspel = kwd
for l in d:
apath = f'{adir}{l[2]}/{l[3].split(".")[0]}.csv'
if os.path.exists(apath):
aligns = read_mfa(apath)
aligns = [a for a in aligns if a[0] == kwd]
for al in aligns:
if atype == 'mfa': # get this recording's phone spelling
pspel = ' '.join([a for a,s,e in al[3]])
vstart = al[3][vcd[pspel][0][0]][1]
vend = al[3][vcd[pspel][0][-1]][2]
cstart = al[3][vcd[pspel][1][0]][1]
cend = al[3][vcd[pspel][1][-1]][2]
vdur = vend-vstart
cdur = cend-cstart
prlist.append(tuple([vdur,cdur]))
return prlist
# TODO:
# pass word, lang, aln info for fig title;
# pass r/l info for point colour
def displ(prinfo):
rto = np.mean([v/c for v,c in prinfo])
vs = [1000*v for v,c in prinfo]
cs = [1000*c for v,c in prinfo]
fig = plt.figure(figsize=(6,5))
plt.xlim([0.0, max(500,min(max(vs),1000))])
plt.ylim([0.0, max(500,min(max(cs),1000))])
plt.scatter(vs,cs)
plt.axline((0,0),slope=1,color="darkgray")
plt.xlabel("Vowel length (ms)")
plt.ylabel("Consonant length (ms)")
plt.title(f'TITLE\nRatio: {round(rto,2)}')
return(rto,fig)
#run analysis
# for keyword, speaker-background, align-source,
# vowel/consonant index key, and dataset
def runan(kwd,spl,aln,vck,dat,sources):
kwd = kwd.lower()
spl = spl.lower()
aln = aln.lower()
print(kwd,aln,spl)
vcd = vck[aln][kwd]
if any([ tuple('X') in el for el in vcd.values()]):
#print("EXCLUDED WORD FOR THIS ALIGNMENT TYPE")
return "EXCLUDED WORD FOR THIS ALIGNMENT TYPE"
if spl == 'l1':
d = [l for l in dat if l[8].lower() == 'icelandic']
if spl == 'l2':
d = [l for l in dat if l[8].lower() != 'icelandic']
prinfo = get_vc_dur(kwd,aln,d,vcd,sources[aln])
if len(prinfo) >5:
r,f = displ(prinfo)
return(f)
else:
return 0
# TODO THIS CASE
def setup(meta,phkey):
with open(meta,'r') as handle:
dat = handle.read().splitlines()
dat = [l.split('\t') for l in dat[1:]]
dat = [l[:-1] + [tuple(l[-1].split(' '))] for l in dat]
kws = [l[-1] for l in dat]
kws = [i for t in kws for i in t]
kws = sorted(list(set(kws)))
return dat, read_ph_key(phkey), kws