import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn as sb
torch.manual_seed(1337)

<torch._C.Generator at 0x120d01cd0>

csv_path = 'ailn.csv'
df = pd.read_csv(csv_path)
df

# to make plotting easier
color_map = {
    'a': 'purple',
    'i': 'blue',
    'l': 'green',
    'n': 'red',
}
df['color']=df['phone'].map(color_map)

# try swapping `x` and `y` for other features!
sb.scatterplot(data=df, x='f1', y='f2', hue='phone', palette=color_map)
plt.show()

from sklearn.manifold import TSNE

# define features, then shrink with TSNE
feat_cols = ['f1', 'f2', 'f3', 'amp']
X_4d = df[feat_cols].to_numpy()
X = TSNE().fit_transform(X_4d)
X = torch.tensor(X)

# store feature for each phone separately
a_mask = df['phone']=='a'
i_mask = df['phone']=='i'
l_mask = df['phone']=='l'
n_mask = df['phone']=='n'

a_feats = X[a_mask]
i_feats = X[i_mask]
l_feats = X[l_mask]
n_feats = X[n_mask]

sb.scatterplot(x=X[:,0], y=X[:,1], hue=df['phone'], palette=color_map)
plt.show()

# define labels
phones='ailn'
phone_labels = df['phone'].to_numpy()
Y=df['phone'].apply(phones.index).to_numpy()
np.array([*zip(phone_labels,Y)])

array([['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['l', '2'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['i', '1'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['n', '3'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0'],
       ['a', '0']], dtype='<U21')

from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, v_measure_score

# fit model
kmeans = KMeans(n_clusters=4)
y_hat = kmeans.fit_predict(X)
v_measure_score(y_hat, Y)

/Users/markjos/projects/forced_align_writeup/.venv/lib/python3.12/site-packages/threadpoolctl.py:1226: RuntimeWarning: 
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

  warnings.warn(msg, RuntimeWarning)

0.7217863187793011

sb.scatterplot(
    x=X[:,0],
    y=X[:,1],
    hue=df['phone'],
    style=y_hat,
)
plt.show()

# define centroids
sample_row = lambda m: m[torch.randint(0,len(m), (1,))]
sample_a = sample_row(a_feats)
sample_i = sample_row(i_feats)
sample_l = sample_row(l_feats)
sample_n = sample_row(n_feats)
start_centroids = torch.concat([sample_a, sample_i, sample_l, sample_n])

# fit model
kmeans_seeded = KMeans(init=start_centroids, n_clusters=4)
y_hat = kmeans_seeded.fit_predict(X)
accuracy_score(y_hat, Y)

0.6371428571428571

sb.scatterplot(
    x=X[:,0],
    y=X[:,1],
    hue=df['phone'],
    style=y_hat,
)
plt.show()

from pomegranate.gmm import GeneralMixtureModel
from pomegranate.distributions import *
import matplotlib.pyplot as plt


X_2d = X
phone_tuples = []
for phone, features, color in [
    ('a', a_feats, 'purple'),
    ('i', i_feats, 'blue'),
    ('l', l_feats, 'green'),
    ('n', n_feats, 'red')
]:
    bi_model = GeneralMixtureModel([Normal(), Normal()]).fit(features)
    uni_model = Normal().fit(features)
    phone_tuples.append((phone, features, color, bi_model, uni_model))

fig,axes=plt.subplots(2, sharex=True, sharey=True)

x_min = X_2d[:,0].min()
x_max = X_2d[:,0].max()
x = np.linspace(x_min, x_max, num=100)
y_min = X_2d[:,1].min()
y_max = X_2d[:,1].max()
y = np.linspace(y_min, y_max, num=100)

assert len(x)==100
assert len(y)==100

xx, yy = np.meshgrid(x, y)
x_ = np.array(list(zip(xx.flatten(), yy.flatten())))


for phone, features, color, bi_model, uni_model in phone_tuples:

    p1 = uni_model.probability(x_).reshape(len(x), len(y))
    p2 = bi_model.probability(x_).reshape(len(x), len(y))

    for prob, ax in zip([p1,p2], axes):
        # only show probability above 90th quantile, to minimize overlap
        quantile20 = prob.quantile(0.90)
        prob[prob<quantile20]=float('-inf')

        # ax.title("Single Gaussian", fontsize=12)
        ax.contourf(xx, yy, prob, cmap=color.capitalize()+'s', alpha=0.5)
        ax.scatter(features[:,0], features[:,1], s=10, color=color, alpha=0.2, label=phone)
plt.legend()
plt.show()

from pomegranate.hmm import DenseHMM

hmm = DenseHMM()

phones = 'ailn'
states = []

for features in [a_feats, i_feats, l_feats, n_feats]:
    states.append(GeneralMixtureModel([Normal(), Normal(), Normal()]).fit(features))
hmm.add_distributions(states)

def plot_gaussians(states, X=X, Y=Y, phones='ailn', colors=['purple', 'blue', 'green', 'red']):
    x_min = X_2d[:,0].min()
    x_max = X_2d[:,0].max()
    x = np.linspace(x_min, x_max, num=100)
    y_min = X_2d[:,1].min()
    y_max = X_2d[:,1].max()
    y = np.linspace(y_min, y_max, num=100)

    assert len(x)==100
    assert len(y)==100

    xx, yy = np.meshgrid(x, y)
    x_ = np.array(list(zip(xx.flatten(), yy.flatten())))

    assert len(x)==100
    assert len(y)==100
    for state, phone, color in zip(states, phones, colors):
        prob = state.probability(x_).reshape(len(x), len(y))
        phone_X = X[Y==phones.index(phone)]

        # only show probability above 90th quantile, to minimize overlap
        quantile90 = prob.quantile(0.90)
        prob[prob<quantile90]=float('-inf')

        plt.contourf(xx, yy, prob, cmap=color.capitalize()+'s', alpha=0.5)
        plt.scatter(phone_X[:,0], phone_X[:,1], s=10, color=color, alpha=0.2, label=phone)
    plt.legend()
    plt.show()
plot_gaussians(hmm.distributions)

for state1 in states:
    for state2 in states:
        hmm.add_edge(state1, state2, 0.25)
hmm.edges

tensor([[-1.3863, -1.3863, -1.3863, -1.3863],
        [-1.3863, -1.3863, -1.3863, -1.3863],
        [-1.3863, -1.3863, -1.3863, -1.3863],
        [-1.3863, -1.3863, -1.3863, -1.3863]])

lawn_sequence = df.loc[df['word']=='lawn', 'phone']
lawn_str = ''.join(lawn_sequence)
lawn_str

'lllllllllllllaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnnnnnnnnnnnnnnnn'

words = df['word'].unique() # ['lawn', 'lean', 'kneel', 'knee', 'gnaw']
df['last_phone']=''
last_state_rows = []
for word in words:
    word_mask = df['word']==word
    phone_seq = df.loc[word_mask, 'phone']
    shifted_seq = np.insert(phone_seq, 0, '^')[:-1] # prepend '^' and cut off last state
    df.loc[word_mask, 'last_phone']=shifted_seq
    last_state_row = {'phone': '$', 'last_phone': phone_seq.iloc[-1], 'word': word}
    last_state_rows.append(last_state_row)
df.head()

last_state_df = pd.DataFrame(last_state_rows)
last_state_df.head()

transition_counts = torch.zeros((6,6))
state_names = '^ailn'
# count all non-final states
for i, state1 in enumerate(state_names):
    state1_mask = df['last_phone']==state1
    for j, state2 in enumerate(state_names): 
        state2_mask = df['phone']==state2
        transition_counts[i,j]=len(df[state1_mask&state2_mask])
# count final state
for i, state in enumerate(state_names):
    state_mask = last_state_df['last_phone']==state
    transition_counts[i,-1]=len(last_state_df[state_mask])
transition_counts

tensor([[  0.,   0.,   0.,   2.,   3.,   0.],
        [  0.,  75.,   0.,   0.,   1.,   1.],
        [  0.,   0., 103.,   1.,   1.,   1.],
        [  0.,   1.,   1.,  59.,   0.,   1.],
        [  0.,   1.,   2.,   0., 100.,   2.],
        [  0.,   0.,   0.,   0.,   0.,   0.]])

transitions_out = transition_counts.sum(axis=1)
# reshape to column vector so we divide by rows
transitions_out=transitions_out.reshape((6,1))
transitions_out

tensor([[  5.],
        [ 77.],
        [106.],
        [ 62.],
        [105.],
        [  0.]])

transitions_out[-1]=1
transition_mat=transition_counts/transitions_out
print(transition_mat)

tensor([[0.0000, 0.0000, 0.0000, 0.4000, 0.6000, 0.0000],
        [0.0000, 0.9740, 0.0000, 0.0000, 0.0130, 0.0130],
        [0.0000, 0.0000, 0.9717, 0.0094, 0.0094, 0.0094],
        [0.0000, 0.0161, 0.0161, 0.9516, 0.0000, 0.0161],
        [0.0000, 0.0095, 0.0190, 0.0000, 0.9524, 0.0190],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])

transition_mat.sum(axis=1)

tensor([1., 1., 1., 1., 1., 0.])

def add_hmm_edges(hmm, transition_mat, states):
    for i, state1 in enumerate([hmm.start,]+states):
        for j, state2 in enumerate(states+[hmm.end,], start=1):
            weight = transition_mat[i,j]
            if weight==0:
                continue
            if i==0 and j==len(states)+1:
                continue
            hmm.add_edge(state1, state2, weight)

add_hmm_edges(hmm, transition_mat, states)
hmm.edges, hmm.starts

(tensor([[-0.0263, -1.3863, -1.3863, -4.3438],
         [-1.3863, -0.0287, -4.6634, -4.6634],
         [-4.1271, -4.1271, -0.0496, -1.3863],
         [-4.6540, -3.9608, -1.3863, -0.0488]]),
 tensor([   -inf,    -inf, -0.9163, -0.5108]))

a_gmm = hmm.distributions[0]

# re-use samples generated earlier as k-means centroids
a_gmm.probability(sample_a).item(), a_gmm.probability(sample_n).item()

(0.00053664471488446, 0.00010689154441934079)

a_gmm.log_probability(sample_a).item(), a_gmm.log_probability(sample_n).item()

(-7.530174255371094, -9.143695831298828)

n_gmm = hmm.distributions[3]
n_gmm.log_probability(sample_n).item(), n_gmm.log_probability(sample_a).item()

(-4.46790075302124, -93.80657958984375)

lawn_mask = df['word']=='lawn'
word_observations = X[lawn_mask]
word_states = df.loc[lawn_mask, 'phone'].apply(state_names.index)
word_prev_states = df.loc[lawn_mask, 'last_phone'].apply(state_names.index)

# turn into log probs
transition_mat_log = (transition_mat+1e-10).log() # avoid taking log of 0

word_likelihood = 0
for observation, state_i, prev_state_i in zip(word_observations, word_states, word_prev_states):
    state_distribution = hmm.distributions[state_i-1] # shift left since there isn't any distribution for start
    emission_logprob = state_distribution.log_probability(observation.reshape(1,2))
    transition_logprob = transition_mat_log[prev_state_i, state_i]
    word_likelihood+=emission_logprob+transition_logprob
word_likelihood.item()

-327.7899169921875

word_states_rand = np.random.choice(4, size=word_states.shape)

def joint_state_observation_prob(states, observations):
    log_likelihood = 0
    prev_states = np.insert(states, 0, 0)[:-1]
    for observation, state_i, prev_state_i in zip(observations, states, prev_states):
        state_distribution = hmm.distributions[state_i-1] # since there isn't any distribution for start
        emission_logprob = state_distribution.log_probability(observation.unsqueeze(0))
        transition_logprob = transition_mat_log[prev_state_i, state_i]
        log_likelihood+=emission_logprob+transition_logprob
    return log_likelihood.item()
word_likelihood_rand = joint_state_observation_prob(word_states_rand, word_observations)
word_likelihood_rand

-5469.267578125

(word_likelihood-word_likelihood_rand).exp()

tensor([inf])

from tqdm import tqdm

word = 'lawn'
word_features = df.loc[df['word']=='lawn', feat_cols].to_numpy()


def get_state_permutations(features):
    state_permutations = [[0]]
    for _ in tqdm(range(len(features))):
        timestep_paths = []
        for state_i in range(1, len(state_names)):
            for path in state_permutations:
                timestep_paths.append(path+[state_i,])
        state_permutations.extend(timestep_paths)
    return state_permutations
permutations = get_state_permutations(word_features[:10]) # try more iterations if you dare
len(permutations)

100%|██████████| 10/10 [00:09<00:00,  1.08it/s]

9765625

# set matrices to float128 to prevent underflow
# (using numpy as pytorrhc doesnt support float128)
forward_mat = np.zeros((6,len(word_observations)), dtype=np.float128)
transition_mat_128 = transition_mat.numpy().astype(np.float128)

# initial timestep
initial_observation = word_observations[0]
for j, state in enumerate(states, start=1):
    transition_prob=transition_mat_128[0,j] # where 0 indicates the initial state
    emission_prob=state.probability(initial_observation.unsqueeze(0)).item()
    forward_mat[j,0]=transition_prob*emission_prob

# remaining timesteps
for t, observation in enumerate(word_observations[1:], start=1):
    for j, curr_state in enumerate(states, start=1):
        emission_prob = curr_state.probability(observation.unsqueeze(0)).item()
        for i, _ in enumerate(states, start=1):
            transition_prob = transition_mat_128[i,j]
            prev_forward = forward_mat[i,t-1]
            forward_mat[j,t]+=prev_forward*emission_prob*transition_prob
        # print(forward_mat[:,t]) # uncomment to see how quickly numbers underflow
# transitions to end state
for i, state in enumerate(states, start=1):
    transition_prob=transition_mat_128[i,-1] # where -1 indicates the final state
    prev_forward=forward_mat[i,-1]
    forward_mat[-1,-1]+=prev_forward*transition_prob
word_prob=forward_mat[-1,-1]
word_prob

3.1844271695947884166e-143

def add_logprobs(log_probs: np.ndarray) -> float:
    if hasattr(log_probs, 'detach'):
        log_probs = log_probs.detach()
    # need quadruple precision to prevent underflow
    if log_probs.dtype is not np.float128:
        log_probs=np.array(log_probs, dtype=np.float128)
    probs=np.exp(log_probs)
    probs_sum=probs.sum()
    logprob_sum=np.log(probs_sum)
    return logprob_sum.astype(float)

def forward(observations, transition_mat_log, states):
    forward_mat = torch.full((len(states)+2,len(observations)), -torch.inf)
    # initial timestep
    forward_mat[0,0]=0 # always start in initial state
    initial_observation = observations[0]
    for j, state in enumerate(states, start=1):
        transition_logprob=transition_mat_log[0,j]
        emission_logprob=state.log_probability(initial_observation.reshape([1,2]))
        forward_mat[j,0]=transition_logprob+emission_logprob
    # remaining timesteps
    for t, observation in enumerate(observations[1:], start=1):
        for j, curr_state in enumerate(states, start=1):
            emission_logprob = curr_state.log_probability(observation.reshape([1,2])).item()
            logprobs = torch.zeros(len(states))
            for i, _ in enumerate(states, start=1):
                transition_logprob = transition_mat_log[i,j]
                prev_forward = forward_mat[i,t-1]
                logprobs[i-1]=prev_forward+transition_logprob
            logprob=add_logprobs(logprobs)
            logprob+=emission_logprob
            forward_mat[j,t]=logprob
    # transitions to end state
    end_logprobs = torch.zeros(len(states))
    for i, state in enumerate(states, start=1):
        transition_logprob=transition_mat_log[i,-1]
        prev_forward = forward_mat[i,-1]
        end_logprobs[i-1]=transition_logprob+prev_forward
    logprob=add_logprobs(end_logprobs)
    forward_mat[-1,-1]=logprob
    return logprob, forward_mat
logprob, forward_mat = forward(word_observations, transition_mat_log, states)
logprob

-328.11163330078125

hmm.log_probability(word_observations.unsqueeze(0)).item()

-327.5914001464844

logprob, forward_mat = forward(word_observations.flip(0), transition_mat_log, states)
logprob

-343.62776004407567

reversed_enum = lambda a, start=0: reversed(list(enumerate(a, start=start)))

def viterbi(observations, transition_mat_log, states):
    viterbi_mat = torch.full((len(states)+2,len(observations)), fill_value=-np.inf)
    backtrace = torch.zeros_like(viterbi_mat, dtype=int)

    # initial timestep - same as before
    initial_observation = observations[0]
    for j, state in enumerate(states, start=1):
        transition_logprob=transition_mat_log[0,j]
        emission_logprob=state.log_probability(initial_observation.reshape([1,2]))
        viterbi_mat[j,0]=transition_logprob+emission_logprob

    # remaining timesteps
    for t, observation in enumerate(observations[1:], start=1):
        for j, curr_state in enumerate(states, start=1):
            emission_logprob = curr_state.log_probability(observation.reshape([1,2]))
            prev_viterbi_vec = viterbi_mat[:,t-1]
            transition_vec = transition_mat_log[:,j]
            path_likelihoods = prev_viterbi_vec+transition_vec+emission_logprob

            max_path_likelihood = path_likelihoods.max()
            likely_prev_state = path_likelihoods.argmax() # argmax returns the index of the max value

            viterbi_mat[j,t]=max_path_likelihood
            backtrace[j,t]=likely_prev_state

    # transitions to end state
    final_viterbi_vec = viterbi_mat[:,-1]
    final_transition_vec = transition_mat_log[:,-1]
    final_likelihoods = final_viterbi_vec + final_transition_vec
    max_final_likelihood = final_likelihoods.max()
    likely_prefinal_state = final_likelihoods.argmax()
    
    viterbi_mat[-1,-1]=max_final_likelihood
    backtrace[-1,-1]=likely_prefinal_state
    
    # decode path from backtrace
    prev_state = likely_prefinal_state
    path = torch.zeros(len(observations+2), dtype=int)
    path[-1]=-1
    # so we can iterate thru columns
    backtrace_iter = backtrace.transpose(0,1)
    for t, idcs in reversed_enum(backtrace_iter):
        path[t]=prev_state
        prev_state=idcs[prev_state]
    return path, viterbi_mat
path, viterbi_mat = viterbi(word_observations, transition_mat_log, states)
path

tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4])

''.join([state_names[i] for i in path])

'llllllllllaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnnnnnnnnnnnnnnnn'

for word in words:
    word_mask = df['word']==word
    word_X = X[word_mask]
    word_Y = df.loc[word_mask,'phone'].tolist()
    print(word)
    pomegranate_preds = hmm.viterbi(np.reshape(word_X, (1,-1,2))).squeeze()
    # add 1 since 0th state for pomegranate is [a], not start
    pomegranate_decoded = ''.join([state_names[i+1] for i in pomegranate_preds])
    print('Pomegranate viterbi:\t',pomegranate_decoded)

    viterbi_preds, _ = viterbi(word_X, transition_mat_log, states)
    viterbi_decoded = ''.join([state_names[i] for i in viterbi_preds])
    print('Our viterbi:\t\t', viterbi_decoded)

    print('Ground truth:\t\t', ''.join(word_Y))

lawn
Pomegranate viterbi:	 llllllllllaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnnnnnnnnnnnnnnnn
Our viterbi:		 llllllllllaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnnnnnnnnnnnnnnnn
Ground truth:		 lllllllllllllaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaannnnnnnnnnnnnnnnnnnnnnnnn
lean
Pomegranate viterbi:	 lllllllllllllllllllllliiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiinnnnnnnnnnnnnnnnnnnnnn
Our viterbi:		 lllllllllllllllllllllliiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiinnnnnnnnnnnnnnnnnnnnnn
Ground truth:		 llllllllllllllliiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiinnnnnnnnnnnnnnnnnn
kneel
Pomegranate viterbi:	 nnnnnnnnnnnnnnnnnnnnniiiiiiiiiiiiiiiiiiiiiiiiiiiiiilllllllllllllllllllllllllllllllll
Our viterbi:		 nnnnnnnnnnnnnnnnnnnnniiiiiiiiiiiiiiiiiiiiiiiiiiiiiilllllllllllllllllllllllllllllllll
Ground truth:		 nnnnnnnnnnnnnnnnnnnnniiiiiiiiiiiiiiiiiiiiiiiiiiiiillllllllllllllllllllllllllllllllll
knee
Pomegranate viterbi:	 nnnnnnnnnnnnnnnnnnnnnnnniiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
Our viterbi:		 nnnnnnnnnnnnnnnnnnnnnnnniiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
Ground truth:		 nnnnnnnnnnnnnnnnnnnnnnnnniiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
gnaw
Pomegranate viterbi:	 nnnnnnnnnnnnnnnllaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaall
Our viterbi:		 nnnnnnnnnnnnnnnaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
Ground truth:		 nnnnnnnnnnnnnnnnaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

fig, axes = plt.subplots(nrows=3,ncols=2)
flat_axes=axes.flatten()
for i,word in enumerate(words):
    word_mask = df['word']==word
    word_X = X[word_mask]
    word_Y = df.loc[word_mask,'phone'].tolist()
    _, viterbi_mat = viterbi(word_X, transition_mat_log, states)
    plot_viterbi = viterbi_mat.transpose(0,1)[:,1:-1]
    flat_axes[i].plot(plot_viterbi, label=[*state_names[1:]])
    flat_axes[i].set_title(word)

handles, labels = flat_axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right')
plt.tight_layout(rect=[0, 0.05, 1, 1])
plt.show()

from scipy.special import softmax

def plot_viterbi(states=None, transition_mat=None, hmm_dict=None, transition_mat_dict=None):
    fig, axes = plt.subplots(nrows=3,ncols=2)
    flat_axes=axes.flatten()
    for i, word in enumerate(words):
        if transition_mat_dict:
            transition_mat = transition_mat_dict[word]

        if hmm_dict:
            states = hmm_dict[word].distributions
        word_mask = df['word']==word
        word_X = X[word_mask]
        path, viterbi_mat = viterbi(word_X, transition_mat, states)
        viterbi_mat_softmax = softmax(viterbi_mat, axis=0)
        viterbi_plot_data = viterbi_mat_softmax.transpose()[:,1:-1]
        flat_axes[i].plot(viterbi_plot_data, label=[*state_names[1:]])
        flat_axes[i].set_title(word)

    handles, labels = flat_axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper right')
    plt.tight_layout(rect=[0, 0.05, 1, 1])
    plt.show()
plot_viterbi(states, transition_mat=transition_mat_log)

fig, axes = plt.subplots(nrows=3,ncols=2)
flat_axes=axes.flatten()
for i, word in enumerate(words):
    word_mask = df['word']==word
    word_X = X[word_mask]
    word_Y = df.loc[word_mask,'phone'].tolist()
    gmm_likelihoods = torch.zeros((len(word_X),len(states)))
    for j,state in enumerate(states):
        state_prob = state.log_probability(word_X)
        gmm_likelihoods[:,j]=state_prob
    gmm_softmax = softmax(gmm_likelihoods, axis=1)
    flat_axes[i].plot(gmm_softmax, label=[*state_names[1:]])
    flat_axes[i].set_title(word)

handles, labels = flat_axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right')
plt.tight_layout(rect=[0, 0.05, 1, 1])
plt.show()

bw_states = [Normal().fit(X) for _ in range(4)]

# state_names = '^ailn'
# words = ['lawn', 'lean', 'kneel', 'knee', 'gnaw']
words_ipa = df['word_ipa'].unique()
word_transitions = {}

for word, word_ipa in zip(words, words_ipa):
    word_trans_mat = torch.zeros((6,6))
    # only one possible transition from initial state
    word_trans_mat[0,state_names.index(word_ipa[0])]=1
    for i, char in enumerate(word_ipa):
        char_i = state_names.index(char)
        if i < len(word_ipa)-1:
            next_char = word_ipa[i+1]
            next_char_i = state_names.index(next_char)
        else:
            next_char = '$'
            next_char_i = -1
        # equal likelihood transition to self or next char
        word_trans_mat[char_i, next_char_i] = 0.5
        word_trans_mat[char_i, char_i] = 0.5
    word_transitions[word]=word_trans_mat

word_transitions

{'lawn': tensor([[0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
         [0.0000, 0.5000, 0.0000, 0.0000, 0.5000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.5000, 0.0000, 0.5000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.5000, 0.5000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]),
 'lean': tensor([[0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.5000, 0.0000, 0.5000, 0.0000],
         [0.0000, 0.0000, 0.5000, 0.5000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.5000, 0.5000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]),
 'kneel': tensor([[0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.5000, 0.5000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.5000, 0.0000, 0.5000],
         [0.0000, 0.0000, 0.5000, 0.0000, 0.5000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]),
 'knee': tensor([[0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.5000, 0.0000, 0.0000, 0.5000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.5000, 0.0000, 0.5000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]),
 'gnaw': tensor([[0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000],
         [0.0000, 0.5000, 0.0000, 0.0000, 0.0000, 0.5000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.5000, 0.0000, 0.0000, 0.5000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])}

word_transitions_log = {k:v.log() for k,v in word_transitions.items()}
word_transitions_log

{'lawn': tensor([[   -inf,    -inf,    -inf,  0.0000,    -inf,    -inf],
         [   -inf, -0.6931,    -inf,    -inf, -0.6931,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf, -0.6931,    -inf, -0.6931,    -inf,    -inf],
         [   -inf,    -inf,    -inf,    -inf, -0.6931, -0.6931],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]]),
 'lean': tensor([[   -inf,    -inf,    -inf,  0.0000,    -inf,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf,    -inf, -0.6931,    -inf, -0.6931,    -inf],
         [   -inf,    -inf, -0.6931, -0.6931,    -inf,    -inf],
         [   -inf,    -inf,    -inf,    -inf, -0.6931, -0.6931],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]]),
 'kneel': tensor([[   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf,    -inf, -0.6931, -0.6931,    -inf,    -inf],
         [   -inf,    -inf,    -inf, -0.6931,    -inf, -0.6931],
         [   -inf,    -inf, -0.6931,    -inf, -0.6931,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]]),
 'knee': tensor([[   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf,    -inf, -0.6931,    -inf,    -inf, -0.6931],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf,    -inf, -0.6931,    -inf, -0.6931,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]]),
 'gnaw': tensor([[   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
         [   -inf, -0.6931,    -inf,    -inf,    -inf, -0.6931],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
         [   -inf, -0.6931,    -inf,    -inf, -0.6931,    -inf],
         [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]])}

word_hmms = {}

for word in words:
    word_hmm = DenseHMM()
    word_hmm.add_distributions(bw_states)
    add_hmm_edges(word_hmm, word_transitions[word], bw_states)
    word_hmms[word]=word_hmm
word_hmms['lawn'].edges, word_hmms['lawn'].starts

(tensor([[-0.6931,    -inf,    -inf, -0.6931],
         [   -inf,    -inf,    -inf,    -inf],
         [-0.6931,    -inf, -0.6931,    -inf],
         [   -inf,    -inf,    -inf, -0.6931]]),
 tensor([-inf, -inf, 0., -inf]))

def backward(observations, transition_mat_log, states):
    backward_mat = torch.full((len(states)+2,len(observations)), -torch.inf)
    # final timestep
    backward_mat[-1,-1]=0 # always end in final state
    final_observation = observations[-1]
    for i, state in enumerate(states, start=1):
        transition_logprob=transition_mat_log[i,-1]
        emission_logprob=state.log_probability(final_observation.reshape([1,2]))
        backward_mat[i,-1]=transition_logprob+emission_logprob
    # remaining timesteps
    for t, observation in reversed_enum(observations[:-1]):
        for i, curr_state in enumerate(states, start=1):
            emission_logprob = curr_state.log_probability(observation.reshape([1,2])).item()
            logprobs = torch.zeros(len(states))
            for j, _ in enumerate(states, start=1):
                transition_logprob = transition_mat_log[i,j]
                next_backward = backward_mat[j,t+1]
                logprobs[j-1]=next_backward+transition_logprob
            logprob=add_logprobs(logprobs)
            logprob+=emission_logprob
            backward_mat[i,t]=logprob
    # transitions to initial state
    init_logprobs = torch.zeros(len(states))
    for i, state in enumerate(states, start=1):
        transition_logprob=transition_mat_log[0,i]
        next_backward = backward_mat[i,0]
        init_logprobs[i-1]=transition_logprob+next_backward
    logprob=add_logprobs(init_logprobs)
    backward_mat[0,0]=logprob
    return logprob, backward_mat
back_logprob, backward_mat = backward(word_observations, transition_mat_log, states)
back_logprob

-328.11138916015625

forward_logprob, forward_mat = forward(word_observations, transition_mat_log, states)
forward_logprob

-328.11163330078125

def ksi(i, j, t, observations, forward, backward, transition_mat_log, states):
    """
    i and j in [start, *states, end]
    """
    forward_i = forward[i,t]
    backward_j = backward[j,t+1]
    transition = transition_mat_log[i,j]
    if (j==len(states)+1) or (j==-1):
        # can't transition to final state before final timestep
        if t<len(observations)-1:
            return float('-inf')
        # else emission probability is 1 (log(1)=0) when transitioning to final state at final timestep
        emission = 0
    else:
        emission = states[j-1].log_probability(observations[t+1].reshape([1,2])).item()
    
    seq_prob = forward[-1,-1]
    if seq_prob == float('-inf'):
        return float('-inf')

    ksi_val = forward_i + backward_j + transition + emission - seq_prob
    return ksi_val

ksi(
    3,
    2,
    len(word_observations)-15,
    word_observations,
    forward(word_observations, transition_mat_log, states)[1],
    backward(word_observations, transition_mat_log, states)[1],
    transition_mat_log=transition_mat_log,
    states=states,
)

tensor(-406.9579)

def gamma(i, t, forward, backward):
    """
    i in [start, *states, end]
    """
    forward_i = forward[i,t]
    backward_i = backward[i,t]
    seq_prob = forward[-1,-1]
    return forward_i + backward_i - seq_prob

gamma(
    3,
    len(word_observations)-15,
    forward(word_observations, transition_mat_log, states)[1],
    backward(word_observations, transition_mat_log, states)[1],
)

tensor(-664.5364)

def a_hat(i, observations, transition_mat_log, states):
    """
    i and j in [start, *states, end]
    """

    ksi_sums = torch.full((len(states),), float('-inf'))
    _, forward_mat = forward(observations, transition_mat_log, states)
    _, backward_mat = backward(observations, transition_mat_log, states)

    for j in range(1, len(states)+1):
        ksi_j = torch.full((len(observations),), float('-inf'))
        for t in range(len(observations)-1):
            ksi_log = ksi(i,j,t,observations,forward_mat,backward_mat,transition_mat_log,states)
            ksi_j[t]=ksi_log
        ksi_sums[j-1]=add_logprobs(ksi_j)

    total_ksi = add_logprobs(ksi_sums)
    if total_ksi == float('-inf'):
        return torch.full((len(states),), float('-inf'))
    a_hat_vec = ksi_sums - total_ksi
    return a_hat_vec

a_hat(1,word_observations,word_transitions_log['lawn'],states)

/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/1710079399.py:9: RuntimeWarning: divide by zero encountered in log
  logprob_sum=np.log(probs_sum)

tensor([-3.5322e-04,        -inf,        -inf, -7.9486e+00])

def mu_sigsq_hat(i, observations, transition_mat_log, states):
    """
    i in [start, *states, end]
    """
    _, forward_mat = forward(observations, transition_mat_log, states)
    _, backward_mat = backward(observations, transition_mat_log, states)
    
    # convert to numpy since we'll be using float128
    observations = observations.numpy()
    gamma_vec_log = np.array(
        [gamma(i,t,forward_mat,backward_mat) for t in range(len(observations))],
        dtype=np.float128,
    )
    gamma_vec = np.exp(gamma_vec_log)
    weighted_observations = observations*gamma_vec[:,None]
    mu_hat = weighted_observations.sum(axis=0)/gamma_vec.sum()

    observation_minus_mean = observations-mu_hat
    observation_minus_mean_dot = np.stack([column[:,None]@column[None,:] for column in observation_minus_mean])
    numerator = observation_minus_mean_dot * gamma_vec[:,None,None]
    sigma_hat = numerator.sum(axis=0)/gamma_vec.sum()
    
    sigma_hat = torch.tensor(sigma_hat.astype(np.float64))
    mu_hat = torch.tensor(mu_hat.astype(np.float64))

    return mu_hat, sigma_hat

mu_sigsq_hat(1, word_observations, transition_mat_log, states)

(tensor([-12.3937,   5.2621], dtype=torch.float64),
 tensor([[20.7811, -9.6813],
         [-9.6813,  6.1762]], dtype=torch.float64))

def em_step(df, X, hmm_dict, word_transitions_dict, phones):
    num_states = len(list(hmm_dict.values())[0].distributions)
    state_means = torch.zeros((num_states, 2))
    state_covs = torch.zeros((num_states, 2, 2))
    new_transitions={}
    for word in df['word'].unique():
        word_mask = df['word']==word
        word_ipa = df.loc[word_mask, 'word_ipa'].iloc[0]
        state_idcs = list(set(phones.index(c)+1 for c in word_ipa))
        word_feats = X[word_mask]
        word_hmm = hmm_dict[word]
        states = word_hmm.distributions
        word_trans_mat = word_transitions_dict[word]

        new_transition_mat = torch.full_like(word_trans_mat, -torch.inf)
        new_transition_mat[0] = word_trans_mat[0] # initial transition probabilities don't change
        for i in state_idcs:
            # expected transition probabilities
            a_hat_vec = a_hat(i, word_feats, word_trans_mat, states)
            print(word, i, a_hat_vec)
            # set transition probs for state i for given word
            new_transition_mat[i,1:-1]=a_hat_vec

            # collect emission probabilities
            mu_hat_vec, sigmasq_hat_mat = mu_sigsq_hat(i, word_feats, word_trans_mat, states)
            weight_for_avg = len(word_feats)/len(df)
            state_means[i-1]+=mu_hat_vec*weight_for_avg
            state_covs[i-1]+=sigmasq_hat_mat*weight_for_avg
        print(new_transition_mat)
        add_hmm_edges(word_hmm, torch.exp(new_transition_mat), states)
        new_transitions[word]=new_transition_mat
    for i in range(1,num_states):
        states[i-1].means=torch.nn.Parameter(torch.tensor(state_means[i-1]), requires_grad=False)
        states[i-1].covs=torch.nn.Parameter(torch.tensor(state_covs[i-1]), requires_grad=False)
    return new_transitions, states

plot_viterbi(hmm_dict=word_hmms, transition_mat_dict=word_transitions_log)

plot_gaussians(bw_states)

trained_transitions = word_transitions_log
for i in range(5):
    trained_transitions, trained_states=em_step(df, X, word_hmms, trained_transitions, phones)
plot_viterbi(hmm_dict=word_hmms, transition_mat_dict=trained_transitions)

/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/1710079399.py:9: RuntimeWarning: divide by zero encountered in log
  logprob_sum=np.log(probs_sum)

lawn 1 tensor([-0.0327,    -inf,    -inf, -3.4381])
lawn 3 tensor([-3.1452,    -inf, -0.0440,    -inf])
lawn 4 tensor([-inf, -inf, -inf, 0.])
tensor([[   -inf,    -inf,    -inf,  0.0000,    -inf,    -inf],
        [   -inf, -0.0327,    -inf,    -inf, -3.4381,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf, -3.1452,    -inf, -0.0440,    -inf,    -inf],
        [   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]])
lean 2 tensor([   -inf, -0.0362,    -inf, -3.3371])
lean 3 tensor([   -inf, -3.1638, -0.0432,    -inf])
lean 4 tensor([-inf, -inf, -inf, 0.])
tensor([[   -inf,    -inf,    -inf,  0.0000,    -inf,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf,    -inf, -0.0362,    -inf, -3.3371,    -inf],
        [   -inf,    -inf, -3.1638, -0.0432,    -inf,    -inf],
        [   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]])
kneel 2 tensor([   -inf, -0.0334, -3.4169,    -inf])
kneel 3 tensor([-inf, -inf, 0., -inf])
kneel 4 tensor([   -inf, -3.2642,    -inf, -0.0390])
tensor([[   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf,    -inf, -0.0334, -3.4169,    -inf,    -inf],
        [   -inf,    -inf,    -inf,  0.0000,    -inf,    -inf],
        [   -inf,    -inf, -3.2642,    -inf, -0.0390,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]])
knee 2 tensor([-inf, 0., -inf, -inf])
knee 4 tensor([   -inf, -3.6022,    -inf, -0.0276])
tensor([[   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf,    -inf,  0.0000,    -inf,    -inf,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf,    -inf, -3.6022,    -inf, -0.0276,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]])
gnaw 1 tensor([0., -inf, -inf, -inf])
gnaw 4 tensor([-3.3051,    -inf,    -inf, -0.0374])
tensor([[   -inf,    -inf,    -inf,    -inf,  0.0000,    -inf],
        [   -inf,  0.0000,    -inf,    -inf,    -inf,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf],
        [   -inf, -3.3051,    -inf,    -inf, -0.0374,    -inf],
        [   -inf,    -inf,    -inf,    -inf,    -inf,    -inf]])
lawn 1 tensor([-inf, -inf, -inf, -inf])

/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/4182966518.py:33: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  states[i-1].means=torch.nn.Parameter(torch.tensor(state_means[i-1]), requires_grad=False)
/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/4182966518.py:34: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  states[i-1].covs=torch.nn.Parameter(torch.tensor(state_covs[i-1]), requires_grad=False)

lawn 3 tensor([-inf, -inf, -inf, -inf])
lawn 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lean 2 tensor([-inf, -inf, -inf, -inf])
lean 3 tensor([-inf, -inf, -inf, -inf])
lean 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
kneel 2 tensor([-inf, -inf, -inf, -inf])
kneel 3 tensor([-inf, -inf, -inf, -inf])
kneel 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
knee 2 tensor([-inf, -inf, -inf, -inf])
knee 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
gnaw 1 tensor([-inf, -inf, -inf, -inf])
gnaw 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lawn 1 tensor([-inf, -inf, -inf, -inf])
lawn 3 tensor([-inf, -inf, -inf, -inf])
lawn 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lean 2 tensor([-inf, -inf, -inf, -inf])
lean 3 tensor([-inf, -inf, -inf, -inf])
lean 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
kneel 2 tensor([-inf, -inf, -inf, -inf])
kneel 3 tensor([-inf, -inf, -inf, -inf])
kneel 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
knee 2 tensor([-inf, -inf, -inf, -inf])
knee 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
gnaw 1 tensor([-inf, -inf, -inf, -inf])
gnaw 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lawn 1 tensor([-inf, -inf, -inf, -inf])
lawn 3 tensor([-inf, -inf, -inf, -inf])
lawn 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lean 2 tensor([-inf, -inf, -inf, -inf])
lean 3 tensor([-inf, -inf, -inf, -inf])
lean 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
kneel 2 tensor([-inf, -inf, -inf, -inf])
kneel 3 tensor([-inf, -inf, -inf, -inf])
kneel 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
knee 2 tensor([-inf, -inf, -inf, -inf])
knee 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
gnaw 1 tensor([-inf, -inf, -inf, -inf])
gnaw 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lawn 1 tensor([-inf, -inf, -inf, -inf])
lawn 3 tensor([-inf, -inf, -inf, -inf])
lawn 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
lean 2 tensor([-inf, -inf, -inf, -inf])
lean 3 tensor([-inf, -inf, -inf, -inf])
lean 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, 0., -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
kneel 2 tensor([-inf, -inf, -inf, -inf])
kneel 3 tensor([-inf, -inf, -inf, -inf])
kneel 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
knee 2 tensor([-inf, -inf, -inf, -inf])
knee 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])
gnaw 1 tensor([-inf, -inf, -inf, -inf])
gnaw 4 tensor([-inf, -inf, -inf, -inf])
tensor([[-inf, -inf, -inf, -inf, 0., -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf],
        [-inf, -inf, -inf, -inf, -inf, -inf]])

/Users/markjos/projects/forced_align_writeup/.venv/lib/python3.12/site-packages/scipy/special/_logsumexp.py:338: RuntimeWarning: invalid value encountered in subtract
  exp_x_shifted = np.exp(x - x_max)

plot_gaussians(trained_states)

cluster_means = torch.tensor(kmeans.cluster_centers_)
cluster_vars = torch.zeros(4,2)
seeded_states = []
for i, _ in enumerate(phones):
    cluster_mask = y_hat==i
    cluster_points = X[cluster_mask]
    var = X[cluster_mask].var(dim=0)
    cluster_vars[i]=var
    seeded_states.append(Normal(
        means=cluster_means[i],
        covs=var,
        covariance_type='diag'
    ))
seeded_states, cluster_means, cluster_vars

([Normal(), Normal(), Normal(), Normal()],
 tensor([[  2.7162, -13.0293],
         [-21.9970,   8.7712],
         [ 16.7898,   4.5764],
         [ -6.7494,   3.5071]], dtype=torch.float64),
 tensor([[ 9.9449,  3.3919],
         [34.1531,  7.5984],
         [22.8406,  5.9092],
         [12.9050, 10.3579]]))

plot_gaussians(seeded_states)

seeded_hmms = {}

for word in words:
    word_hmm = DenseHMM()
    word_hmm.add_distributions(seeded_states)
    add_hmm_edges(word_hmm, word_transitions[word], seeded_states)
    seeded_hmms[word]=word_hmm
seeded_hmms['lawn'].edges, seeded_hmms['lawn'].starts

(tensor([[-0.6931,    -inf,    -inf, -0.6931],
         [   -inf,    -inf,    -inf,    -inf],
         [-0.6931,    -inf, -0.6931,    -inf],
         [   -inf,    -inf,    -inf, -0.6931]]),
 tensor([-inf, -inf, 0., -inf]))

plot_viterbi(hmm_dict=seeded_hmms, transition_mat_dict=word_transitions_log)

trained_transitions = word_transitions_log
for i in range(1):
    trained_transitions,trained_states=em_step(df, X, seeded_hmms, trained_transitions, phones)
plot_viterbi(hmm_dict=seeded_hmms, transition_mat_dict=trained_transitions)

/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/1710079399.py:9: RuntimeWarning: divide by zero encountered in log
  logprob_sum=np.log(probs_sum)

lawn 1 tensor([-179.2359,      -inf,      -inf,    0.0000])
lawn 3 tensor([-50.1930,     -inf,   0.0000,     -inf])
lawn 4 tensor([-inf, -inf, -inf, 0.])
tensor([[     -inf,      -inf,      -inf,    0.0000,      -inf,      -inf],
        [     -inf, -179.2359,      -inf,      -inf,    0.0000,      -inf],
        [     -inf,      -inf,      -inf,      -inf,      -inf,      -inf],
        [     -inf,  -50.1930,      -inf,    0.0000,      -inf,      -inf],
        [     -inf,      -inf,      -inf,      -inf,    0.0000,      -inf],
        [     -inf,      -inf,      -inf,      -inf,      -inf,      -inf]])
lean 2 tensor([   -inf, -0.0542,    -inf, -2.9420])
lean 3 tensor([    -inf,   0.0000, -74.0305,     -inf])
lean 4 tensor([-inf, -inf, -inf, 0.])
tensor([[       -inf,        -inf,        -inf,  0.0000e+00,        -inf,
                -inf],
        [       -inf,        -inf,        -inf,        -inf,        -inf,
                -inf],
        [       -inf,        -inf, -5.4203e-02,        -inf, -2.9420e+00,
                -inf],
        [       -inf,        -inf,  0.0000e+00, -7.4031e+01,        -inf,
                -inf],
        [       -inf,        -inf,        -inf,        -inf,  0.0000e+00,
                -inf],
        [       -inf,        -inf,        -inf,        -inf,        -inf,
                -inf]])
kneel 2 tensor([    -inf,   0.0000, -45.8330,     -inf])
kneel 3 tensor([-inf, -inf, 0., -inf])
kneel 4 tensor([   -inf, -2.0963,    -inf, -0.1311])
tensor([[    -inf,     -inf,     -inf,     -inf,   0.0000,     -inf],
        [    -inf,     -inf,     -inf,     -inf,     -inf,     -inf],
        [    -inf,     -inf,   0.0000, -45.8330,     -inf,     -inf],
        [    -inf,     -inf,     -inf,   0.0000,     -inf,     -inf],
        [    -inf,     -inf,  -2.0963,     -inf,  -0.1311,     -inf],
        [    -inf,     -inf,     -inf,     -inf,     -inf,     -inf]])
knee 2 tensor([-inf, 0., -inf, -inf])
knee 4 tensor([    -inf, -18.2525,     -inf,   0.0000])
tensor([[    -inf,     -inf,     -inf,     -inf,   0.0000,     -inf],
        [    -inf,     -inf,     -inf,     -inf,     -inf,     -inf],
        [    -inf,     -inf,   0.0000,     -inf,     -inf,     -inf],
        [    -inf,     -inf,     -inf,     -inf,     -inf,     -inf],
        [    -inf,     -inf, -18.2525,     -inf,   0.0000,     -inf],
        [    -inf,     -inf,     -inf,     -inf,     -inf,     -inf]])
gnaw 1 tensor([0., -inf, -inf, -inf])
gnaw 4 tensor([-124.2903,      -inf,      -inf,    0.0000])
tensor([[     -inf,      -inf,      -inf,      -inf,    0.0000,      -inf],
        [     -inf,    0.0000,      -inf,      -inf,      -inf,      -inf],
        [     -inf,      -inf,      -inf,      -inf,      -inf,      -inf],
        [     -inf,      -inf,      -inf,      -inf,      -inf,      -inf],
        [     -inf, -124.2903,      -inf,      -inf,    0.0000,      -inf],
        [     -inf,      -inf,      -inf,      -inf,      -inf,      -inf]])

/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/4182966518.py:33: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  states[i-1].means=torch.nn.Parameter(torch.tensor(state_means[i-1]), requires_grad=False)
/var/folders/bt/_dsrh6ld2yncbnn9vk_d2lcc0000gp/T/ipykernel_99367/4182966518.py:34: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  states[i-1].covs=torch.nn.Parameter(torch.tensor(state_covs[i-1]), requires_grad=False)

plot_gaussians(trained_states)

	f1	f2	f3	amp	phone	word	word_ipa	time
0	308.842307	691.112245	2183.135206	66.923292	l	lawn	lan	1.939838
1	342.378196	734.724329	2186.841639	69.177411	l	lawn	lan	1.946088
2	362.149719	764.074480	2210.458974	70.539616	l	lawn	lan	1.952338
3	356.570667	762.745254	2225.582023	71.400740	l	lawn	lan	1.958588
4	357.828687	758.569292	2206.414437	72.209283	l	lawn	lan	1.964838
...	...	...	...	...	...	...	...	...
345	755.145121	805.773587	2260.783925	75.332562	a	gnaw	na	6.764838
346	619.998312	805.533887	2206.163646	74.449697	a	gnaw	na	6.771088
347	585.264409	806.373248	2106.831388	73.476442	a	gnaw	na	6.777338
348	465.633125	818.948885	2016.998327	72.475936	a	gnaw	na	6.783588
349	436.861831	819.351235	2057.549432	71.271442	a	gnaw	na	6.789838

	f1	f2	f3	amp	phone	word	word_ipa	time	color	last_phone
0	308.842307	691.112245	2183.135206	66.923292	l	lawn	lan	1.939838	green	^
1	342.378196	734.724329	2186.841639	69.177411	l	lawn	lan	1.946088	green	l
2	362.149719	764.074480	2210.458974	70.539616	l	lawn	lan	1.952338	green	l
3	356.570667	762.745254	2225.582023	71.400740	l	lawn	lan	1.958588	green	l
4	357.828687	758.569292	2206.414437	72.209283	l	lawn	lan	1.964838	green	l

Python demo of Forced Align with HMM-GMM¶

Dimensionality reduction¶

Exploring separability¶

Fitting phone models¶

Hidden Markov Models¶

Likelihood assignment¶

Decoding¶

Fitting HMM-Gaussian¶