import os, pickle
import numpy as np
from collections import defaultdict
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


# return phones from the start to end time of one word
def getwps(start,end,phones):
    return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]

# read align data from mfa file
def read_mfa(afile):
    #with open(apath,'r') as handle:
       # f = handle.read().#splitlines()
    f = [l.split(',') for l in afile.splitlines()]
    wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
    plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
    aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
    return aligns

# key specific to MFA pronunciation dictionary -
# which phones are relevant per word
def read_ph_key(fpath):
    def _winfo(l):
        def _f(n):
            return tuple([int(i) if i != 'X' else i for i in n.split(',')])
        return (_f(l[3]),_f(l[4]))
    
    def _d2d(dic):
        if isinstance(dic, dict):
            dic = {k: _d2d(v) for k, v in dic.items()}
        return dic
    
    with open(fpath,'r') as handle:
        f = handle.read().splitlines()
    f = [l.split('\t') for l in f[1:]]
    
    vcdict = {'ctc': defaultdict(lambda: defaultdict(tuple)), \
                  'mfa': defaultdict(lambda: defaultdict(tuple))}
    for wline in f:
        vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)

    return _d2d(vcdict)


def get_vc_dur(kwd,atype,dat,vcd,csvdict):
    d = [l for l in dat if kwd in l[-1]]
    prlist = []
    
    if atype=='ctc': # only one pronunciation-spelling for ctc
        pspel = kwd
    
    for l in d:
        akey = f'{atype}_csv/{l[3].split(".")[0]}.csv'
        if akey in csvdict.keys():#os.path.exists(apath):
            aligns = read_mfa(csvdict[akey])
            aligns = [a for a in aligns if a[0] == kwd]
            for al in aligns:
                if atype == 'mfa': # get this recording's phone spelling
                    pspel = ' '.join([a for a,s,e in al[3]])
                    
                vstart = al[3][vcd[pspel][0][0]][1]
                vend = al[3][vcd[pspel][0][-1]][2]
                cstart = al[3][vcd[pspel][1][0]][1]
                cend = al[3][vcd[pspel][1][-1]][2]
                vdur = vend-vstart
                cdur = cend-cstart
                prlist.append(tuple([vdur,cdur]))
    return prlist


# TODO:
# pass word, lang, aln info for fig title;
# pass r/l info for point colour
def displ(prinfo,kwd):
    rto = np.mean([v/c for v,c in prinfo])
    vs = [1000*v for v,c in prinfo]
    cs = [1000*c for v,c in prinfo]
    
    fig = plt.figure(figsize=(6,5))

    plt.xlim([0.0, max(500,min(max(vs),1000))])
    plt.ylim([0.0, max(500,min(max(cs),1000))])
    plt.scatter(vs,cs)
    plt.axline((0,0),slope=1,color="darkgray")
    
    plt.xlabel("Vowel length (ms)")
    plt.ylabel("Consonant length (ms)")
    plt.title(f'{kwd.upper()}\nV/C duration ratio: {round(rto,2)}')
    return(rto,fig)


#run analysis
# for keyword, speaker-background, align-source,
# vowel/consonant index key, and dataset
def runan(kwd,spl,aln,vck,dat,csvs):
    kwd = kwd.lower()
    spl = spl.lower()
    aln = aln.lower()
    print(kwd,aln,spl)
    vcd = vck[aln][kwd]

    if any([ tuple('X') in el for el in vcd.values()]):
        #print("EXCLUDED WORD FOR THIS ALIGNMENT TYPE")
        return "EXCLUDED WORD FOR THIS ALIGNMENT TYPE"
    d = dat
    if spl == 'l1':
        d = [l for l in d if l[8].lower() == 'icelandic']
    if spl == 'l2':
        d = [l for l in d if l[8].lower() != 'icelandic']
    prinfo = get_vc_dur(kwd,aln,d,vcd,csvs)
    if len(prinfo) >5:
        r,f = displ(prinfo,kwd)
        return(f)
    else:
        return 0
    # TODO THIS CASE
    

def setup(metadatas,phkey,align_csvs):
    def _loadr(meta):
        with open(meta,'r') as handle:
            d = handle.read().splitlines()
        d = [l.split('\t') for l in d[1:]]
        d = [l[:-1] + [tuple(l[-1].split(' '))] for l in d]
        return d
    metas = [_loadr(tsv) for tsv in metadatas]
    dat = []
    for tsv in metas:
        dat +=  tsv
    
    vck = read_ph_key(phkey)
    kws = sorted(list(vck['ctc'].keys()))

    with open(align_csvs, 'rb') as handle:
        csvs = pickle.load(handle)

    return dat, vck, kws, csvs