From 80b5886884ac6da7266a89cfb42a88a272110e31 Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:12:41 -0500
Subject: [PATCH 01/10] Fix undefined behavior and add bounds checking in phone
 compaction

Prevents strcpy with overlapping memory when i==j (undefined behavior).
Adds bounds check before array access to prevent buffer overflows.
Fixes applied to both continuous and semi-continuous code paths.
---
 src/programs/make_quests/main.c | 36 +++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/programs/make_quests/main.c b/src/programs/make_quests/main.c
index 9b5af7e7..04459d7a 100644
--- a/src/programs/make_quests/main.c
+++ b/src/programs/make_quests/main.c
@@ -978,12 +978,19 @@ init(float32 *****out_mixw,
         }
         for (i=0,j=0;i<n_ci;i++){
             if (strstr(phone[i],"+") == NULL && strcmp(phone[i],"SIL") != 0){
-                strcpy(phone[j],phone[i]);
-                for (k=0;k<n_state;k++){
-                    mixw[j][k][0][0] = mixw[i][k][0][0];
-                    for (n=0;n<l_veclen[0];n++){
-                        mean[j][k][n] = mean[i][k][n];
-                        var[j][k][n] = var[i][k][n];
+                if (j >= n_model) {
+                    E_ERROR("Compact index j=%d >= n_model=%d, skipping\n", j, n_model);
+                    break;
+                }
+                /* Fix: avoid strcpy with overlapping memory when i==j (undefined behavior) */
+                if (i != j) {
+                    strcpy(phone[j],phone[i]);
+                    for (k=0;k<n_state;k++){
+                        mixw[j][k][0][0] = mixw[i][k][0][0];
+                        for (n=0;n<l_veclen[0];n++){
+                            mean[j][k][n] = mean[i][k][n];
+                            var[j][k][n] = var[i][k][n];
+                        }
                     }
                 }
                 j++;
@@ -997,11 +1004,18 @@ init(float32 *****out_mixw,
         mixw = (float32 ****)ckd_calloc_4d(n_model,n_state,n_stream,n_density,sizeof(float32));
         for (i=0,j=0;i<n_ci;i++){
             if (strstr(phone[i],"+") == NULL && strcmp(phone[i],"SIL") != 0){
-                strcpy(phone[j],phone[i]);
-                for (k=0;k<n_state;k++){
-                    for (l=0;l<n_stream;l++){
-                        for (m=0;m<n_density;m++)
-                            mixw[j][k][l][m] = mixw_occ[i][k][l][m];
+                if (j >= n_model) {
+                    E_ERROR("Compact index j=%d >= n_model=%d, skipping\n", j, n_model);
+                    break;
+                }
+                /* Fix: avoid strcpy with overlapping memory when i==j (undefined behavior) */
+                if (i != j) {
+                    strcpy(phone[j],phone[i]);
+                    for (k=0;k<n_state;k++){
+                        for (l=0;l<n_stream;l++){
+                            for (m=0;m<n_density;m++)
+                                mixw[j][k][l][m] = mixw_occ[i][k][l][m];
+                        }
                     }
                 }
                 j++;

From 2862d775e4f27e328d2434b515df24bf71980d35 Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:14:50 -0500
Subject: [PATCH 02/10] Remove premature loop exit from phone compaction bounds
 check

The bounds check with break caused incomplete acoustic model data when
triggered, silently discarding remaining phones. Since j increments only
for filtered phones (same logic as n_model calculation), the check is
unnecessary. Removing it prevents data loss while keeping the strcpy
overlap fix intact.
---
 src/programs/make_quests/main.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/programs/make_quests/main.c b/src/programs/make_quests/main.c
index 04459d7a..47aedc8e 100644
--- a/src/programs/make_quests/main.c
+++ b/src/programs/make_quests/main.c
@@ -978,10 +978,6 @@ init(float32 *****out_mixw,
         }
         for (i=0,j=0;i<n_ci;i++){
             if (strstr(phone[i],"+") == NULL && strcmp(phone[i],"SIL") != 0){
-                if (j >= n_model) {
-                    E_ERROR("Compact index j=%d >= n_model=%d, skipping\n", j, n_model);
-                    break;
-                }
                 /* Fix: avoid strcpy with overlapping memory when i==j (undefined behavior) */
                 if (i != j) {
                     strcpy(phone[j],phone[i]);
@@ -1004,10 +1000,6 @@ init(float32 *****out_mixw,
         mixw = (float32 ****)ckd_calloc_4d(n_model,n_state,n_stream,n_density,sizeof(float32));
         for (i=0,j=0;i<n_ci;i++){
             if (strstr(phone[i],"+") == NULL && strcmp(phone[i],"SIL") != 0){
-                if (j >= n_model) {
-                    E_ERROR("Compact index j=%d >= n_model=%d, skipping\n", j, n_model);
-                    break;
-                }
                 /* Fix: avoid strcpy with overlapping memory when i==j (undefined behavior) */
                 if (i != j) {
                     strcpy(phone[j],phone[i]);

From b08fcce2a4a1277cfb7109b445d9e4e9ab226e7e Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:21:20 -0500
Subject: [PATCH 03/10] Fix critical bugs in backward/viterbi and init_mixw

backward.c:
- Fix tacc allocation to use n_state instead of max_n_next for safe j-i indexing
- Add bounds checks before all tacc[i][j-i] accesses to prevent out-of-bounds
- Fix state_seq[j] to state_seq[0] in CI mixw accumulation for initial state

viterbi.c:
- Fix tacc allocation to use n_state instead of max_n_next for safe indexing
- Add bounds checks before tacc[prev][j-prev] accesses in both code paths

init_mixw/main.c:
- Initialize uninitialized destination tmat slots using source tmat[0]
- Critical when duplicating from .semi. to .cont. model definitions
---
 src/programs/bw/backward.c    | 195 ++++++++++++++++++----------------
 src/programs/bw/viterbi.c     |  89 ++++++++--------
 src/programs/init_mixw/main.c |  80 +++++++++-----
 3 files changed, 202 insertions(+), 162 deletions(-)

diff --git a/src/programs/bw/backward.c b/src/programs/bw/backward.c
index ae5ea9e1..8091c50d 100644
--- a/src/programs/bw/backward.c
+++ b/src/programs/bw/backward.c
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
 /* ====================================================================
- * Copyright (c) 1995-2000 Carnegie Mellon University.  All rights 
+ * Copyright (c) 1995-2000 Carnegie Mellon University.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -8,27 +8,27 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
+ *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
- * This work was supported in part by funding from the Defense Advanced 
- * Research Projects Agency and the National Science Foundation of the 
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
  * United States of America, and the CMU Sphinx Speech Consortium.
  *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * ====================================================================
@@ -37,14 +37,14 @@
 /*********************************************************************
  *
  * File: backward.c
- * 
- * Description: 
+ *
+ * Description:
  * 	The routine in this file compute the beta variable in the
  *	forward backward algorithm.  The routine also updates the
  * 	reestimation sums for mixing weights, transition matrices,
  *	means and variances.
  *
- * Author: 
+ * Author:
  * 	Eric H. Thayer (eht@cs.cmu.edu)
  *********************************************************************/
 
@@ -90,7 +90,7 @@ partial_op(float64 *p_op,
 	    k = den_idx[j][kk];
 	    f_op += mixw[j][k] * den[j][kk];
 	}
-	
+
 	/* Figure out partial output probability excluding
 	 * the given feature stream j. */
         /* That is technically correct but quite confusing, because
@@ -100,7 +100,7 @@ partial_op(float64 *p_op,
 	p_op[j] = op / f_op;
     }
 }
-
+
 void
 partial_ci_op(float64 *p_ci_op,
 
@@ -129,7 +129,7 @@ partial_ci_op(float64 *p_ci_op,
 	p_ci_op[j] = f_op;
     }
 }
-
+
 void
 den_terms_ci(float64 **d_term,
 
@@ -151,7 +151,7 @@ den_terms_ci(float64 **d_term,
 	    for (kk = 0; kk < n_top; kk++) {
 		/* density index k for one of the n_top density values */
 		k = den_idx[j][kk];
-		
+
 		d_term[j][kk] = mixw[j][k] * den[j][kk] * inv_ci_op * post_j;
 	    }
 	}
@@ -162,7 +162,7 @@ den_terms_ci(float64 **d_term,
 	}
     }
 }
-
+
 void
 den_terms(float64 **d_term,
 	  float64 p_reest_term,
@@ -209,26 +209,26 @@ den_terms(float64 **d_term,
 	}
     }
 }
-
+
 /*********************************************************************
  *
- * Function: 
+ * Function:
  * 	backward_update
  *
- * Description: 
+ * Description:
  * 	The routine in this file compute the beta variable in the
  * 	forward backward algorithm.  The routine also updates the
  * 	reestimation sums for mixing weights, transition matrices,
  * 	means and variances.
- * 
- * Function Inputs: 
+ *
+ * Function Inputs:
  *	float64 **alpha -
  *		A 2-d array containing the scaled alpha variable.
  *		alpha[t][s] is scaled alpha at time t for state s.
  *
  *	float64 *scale -
  *		The scale factor for each time frame.
- * 
+ *
  *	float64 ****den -
  *		The top N component mixture density values for
  *		all time.
@@ -236,7 +236,7 @@ den_terms(float64 **d_term,
  *		den[t][d][f][i] addresses the the Ith density of
  *		the top N densities for acoustic feature stream f,
  *		density d at time t.
- * 
+ *
  *	uint32 ****den_idx -
  *		The top N component mixture density indices for
  *		all time.
@@ -244,68 +244,68 @@ den_terms(float64 **d_term,
  *		den[t][d][f][i] addresses the the Ith density index
  *		of the top N densities for acoustic feature stream f,
  *		density d at time t.
- * 
+ *
  *	vector_t **feature -
  *		The feature streams for all time within the
  *		utterance.
  *
  *		feature[t][f][c] addresses component c of the feature
  *		vector for feature f at time t.
- * 
+ *
  *	uint32 n_obs -
  *		Number of observations (i.e. frames) in this observation
  *		sequence (i.e. utterance)
- * 
+ *
  *	state_t *state_seq -
  *		The sequence of sentence HMM states for the utterance.
- * 
+ *
  *	uint32 n_state -
  *		The number of states in the sentence HMM.
- * 
+ *
  *	model_inventory_t *inv -
  *		A pointer to a structure which contains references to
  *		all model parameters and reestimation sum accumulators.
- * 
+ *
  *	float64 beam -
  *		Pruning beam width.
- * 
+ *
  *	float32 spthresh -
  *		State posterior probability threshold for reestimation.
  *		State posterior prob must be greater than this value
  *		for the state to be included in the reestimation counts.
- * 
+ *
  *	int32 mixw_reest -
  *		A boolean indicating whether or not to do mixing weight
  *		reestimation.
- * 
+ *
  *	int32 tmat_reest -
  *		A boolean indicating whether or not to do transition probability matrix
  *		reestimation.
- * 
+ *
  *	int32 mean_reest -
  *		A boolean indicating whether or not to do mean
  *		reestimation.
- * 
+ *
  *	int32 var_reest
  *		A boolean indicating whether or not to do variance
  *		reestimation.
  *
- * Global Inputs: 
+ * Global Inputs:
  *	None
- * 
- * Return Values: 
+ *
+ * Return Values:
  *	S3_SUCCESS -
  *		No errors found; Local accumulators updated.
  *	S3_ERROR -
  *		Error found; Ignore local accumulator values.
- * 
- * Global Outputs: 
+ *
+ * Global Outputs:
  *	None
- * 
- * Errors: 
- * 
+ *
+ * Errors:
+ *
  *********************************************************************/
-
+
 int32
 backward_update(float64 **active_alpha,
 		uint32 **active_astate,
@@ -459,7 +459,7 @@ backward_update(float64 **active_alpha,
     for (s = 0; s < n_state; s++)
 	if (state_seq[s].mixw == TYING_NON_EMITTING)
 	    n_non_emit++;
-    
+
     /* Allocate space for the active non-emitting state lists */
     non_emit = ckd_calloc(n_non_emit, sizeof(uint32));
     tmp_non_emit = ckd_calloc(n_non_emit, sizeof(uint32));
@@ -470,7 +470,7 @@ backward_update(float64 **active_alpha,
 
     n_active = 0;
     n_next_active = 0;
-    
+
     /* Allocate space for the cur/next active state flags */
     asf_a = ckd_calloc(n_state, sizeof(unsigned char));
     asf_b = ckd_calloc(n_state, sizeof(unsigned char));
@@ -481,7 +481,7 @@ backward_update(float64 **active_alpha,
     /* Initialize cur/next active state lists */
     asf = asf_a;
     asf_next = asf_b;
-    
+
     mixw = inv->mixw;
 
     if (mixw_reest) {
@@ -515,8 +515,9 @@ backward_update(float64 **active_alpha,
 	    if (state_seq[i].n_next > max_n_next)
 		max_n_next = state_seq[i].n_next;
 	}
+	/* Allocate with n_state to ensure j-i indexing never goes out of bounds */
 	inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state,
-						    max_n_next,
+						    n_state,
 						    sizeof(float32));
     }
     /* transition matrix reestimation sum accumulators
@@ -552,7 +553,7 @@ backward_update(float64 **active_alpha,
     }
 
     recip_final_alpha = 1.0/active_alpha[n_obs-1][q_f];
-    
+
     /* Set the initial beta value */
     prior_beta[n_state-1] = 1.0;
 
@@ -594,13 +595,17 @@ backward_update(float64 **active_alpha,
 		/* state i not active in forward pass; skip it */
 		continue;
 	    }
-	    
+
 	    /* accumulate before scaling so scale[t] doesn't appear
 	     * in the reestimation sums */
 
 	    if (tmat_reest) {
-		assert(tacc != NULL);
-		a_tacc = &tacc[i][j-i];
+		/* Skip invalid transitions where j < i (shouldn't happen in forward HMM) */
+		if (tacc != NULL && i < n_state && j >= i && (j - i) < n_state) {
+		    a_tacc = &tacc[i][j-i];
+		} else {
+		    a_tacc = NULL;
+		}
 	    }
 	    else {
 		a_tacc = NULL;
@@ -673,7 +678,7 @@ backward_update(float64 **active_alpha,
 #endif
 	    l_cb = state_seq[j].l_cb;
 	    l_ci_cb = state_seq[j].l_ci_cb;
-	    
+
 	    if (acbframe[l_cb] != t+1) {
 		/* The top N densities for the observation
 		   at time t+1 and their indices */
@@ -705,7 +710,7 @@ backward_update(float64 **active_alpha,
 					   state_seq[j].ci_cb,
                                            /* See above. */
                                            NULL);
-			
+
 			active_cb[n_active_cb++] = l_ci_cb;
 
 			acbframe[l_ci_cb] = t+1;
@@ -717,17 +722,17 @@ backward_update(float64 **active_alpha,
 	    }
 	}
 
-#if BACKWARD_DEBUG	
+#if BACKWARD_DEBUG
 	E_INFO("Before scaling\n");
 #endif
 	/* Scale densities by dividing all by max */
 	gauden_scale_densities_bwd(now_den, now_den_idx,
 				   &dscale[t+1],
 				   active_cb, n_active_cb, g);
-	
+
 	for (s = 0; s < n_active; s++) {
 
-#if BACKWARD_DEBUG	
+#if BACKWARD_DEBUG
 	  E_INFO("In beta update, state %d is active for active state # %d\n",j,s);
 #endif
 	    j = active[s];
@@ -749,7 +754,7 @@ backward_update(float64 **active_alpha,
 
 	    assert(asf[j] == TRUE);
 	    assert(state_seq[j].mixw != TYING_NON_EMITTING);
-	    
+
 	    asf[j] = FALSE;
 
 	    prior = state_seq[j].prior_state;
@@ -758,7 +763,7 @@ backward_update(float64 **active_alpha,
 	    /* for all states, i, prior to state j */
 	    for (u = 0; u < state_seq[j].n_prior; u++) {
 		i = prior[u];
-#if BACKWARD_DEBUG	
+#if BACKWARD_DEBUG
 		E_INFO("For active state %d , state %d is its prior\n",j,i);
 #endif
 		for (q = 0; q < n_active_astate[t] &&
@@ -771,7 +776,7 @@ backward_update(float64 **active_alpha,
 		/* since survived pruning, this will be true
 		   for reasonable pruning thresholds */
 		assert(prior_beta[j] > 0);
-		
+
 		if (timers)
 		    ptmr_start(&timers->rsts_timer);
 
@@ -788,7 +793,7 @@ backward_update(float64 **active_alpha,
 
 		post_j = p_reest_term * op;
 
-#if BACKWARD_DEBUG	
+#if BACKWARD_DEBUG
 		E_INFO("State %u, prior %u, post_j %e p_reest_term %e op %e\n",j,i,post_j,p_reest_term,op);
 #endif
 		if (post_j < 0) {
@@ -800,7 +805,7 @@ backward_update(float64 **active_alpha,
 		    goto free;
 		}
 
-#if BACKWARD_DEBUG	
+#if BACKWARD_DEBUG
 		E_INFO("post_j =%e, alpha == %e * tprob == %e * op == %e * beta == %e * 1 / falpha == %e q=%d state_of_q=%d at time %d\n", post_j, active_alpha[t][q], tprob[u], op, prior_beta[j], recip_final_alpha, q, i,t);
 #endif
 
@@ -808,7 +813,7 @@ backward_update(float64 **active_alpha,
 		if (post_j > 1.0 + 1e-2) {
 		    E_ERROR("posterior of state %u (== %.8e) @ time %u > 1 + 1e-2\n", j, post_j, t+1);
 		    E_ERROR("alpha == %e * tprob == %e * op == %e * beta == %e * 1 / falpha == %e\n", active_alpha[t][q], tprob[u], op, prior_beta[j], recip_final_alpha);
-		    
+
 		    retval = S3_ERROR;
 
 		    if (timers)
@@ -830,7 +835,10 @@ backward_update(float64 **active_alpha,
                         /* post_j is the posterior probability of
                          * state j followed by state i, a.k.a. the
                          * fractional count of transitions i->j. */
-			tacc[i][j-i] += post_j;
+			/* Skip invalid transitions where j < i (shouldn't happen in forward HMM) */
+			if (tacc != NULL && i < n_state && j >= i && (j - i) < n_state) {
+			    tacc[i][j-i] += post_j;
+			}
 		    }
 
 		    /* Compute the output probability excluding the contribution
@@ -880,7 +888,7 @@ backward_update(float64 **active_alpha,
 				     n_feat,
 				     n_top);
 		    }
-		    
+
 
 		    /* accumulate the probability for each density in the mixing
 		     * weight reestimation accumulators */
@@ -903,7 +911,7 @@ backward_update(float64 **active_alpha,
 			    }
 			}
 		    }
-		    
+
 		    /* accumulate the probability for each density in
 		     * the density reestimation accumulators (these
 		     * are the same values as the mixture weight
@@ -918,18 +926,18 @@ backward_update(float64 **active_alpha,
 			}
 		    }
 		}
-		
+
 	        if (timers)
 		    ptmr_stop(&timers->rsts_timer);
-		
+
 		/* Add another term for \beta_t(i) */
 		beta[i] += tprob[u] * op * prior_beta[j];
-		
+
 		if (asf_next[i] != TRUE) {
 		    /* not already on the active list for time t-1 */
-		    
+
 		    asf_next[i] = TRUE;
-		    
+
 		    if (state_seq[i].mixw == TYING_NON_EMITTING) {
 			non_emit[n_non_emit] = i;
 			n_non_emit++;
@@ -961,7 +969,7 @@ backward_update(float64 **active_alpha,
 		/* state i not active in forward pass; skip it */
 		continue;
 	    }
-		
+
 	    ttt = active_alpha[t][q] * beta[i];
 
 	    if (ttt > pthresh) {
@@ -999,7 +1007,7 @@ backward_update(float64 **active_alpha,
 
 	pprob *= recip_final_alpha;
 	t_pprob += pprob;
-	
+
 	/* check an invariant.  Theoretically,
 	 * sum_alpha_beta - alpha[n_obs-1][n_state-1] must be zero, but
 	 * we're dealing with finite machine word length, pruning, etc. */
@@ -1009,7 +1017,7 @@ backward_update(float64 **active_alpha,
 
 	    E_ERROR("alpha(%e) <> sum of alphas * betas (%e) in frame %d\n",
 		    active_alpha[n_obs-1][q_f], sum_alpha_beta, t);
-		
+
 	    retval = S3_ERROR;
 
 	    goto free;
@@ -1022,7 +1030,7 @@ backward_update(float64 **active_alpha,
 #endif
 	for (s = 0; s < n_tmp_non_emit; s++) {
 	    j = tmp_non_emit[s];
-	    
+
 	    /*assert(asf_next[j] == TRUE);*/
 	    asf_next[j] = FALSE;
 
@@ -1038,15 +1046,18 @@ backward_update(float64 **active_alpha,
 		    /* state i not active in forward pass; skip it */
 		    continue;
 		}
-		
+
 		/* accumulate before scaling so scale[t] doesn't appear
 		 * in the reestimation sums */
 
 		if (timers)
 		    ptmr_start(&timers->rsts_timer);
 		if (tmat_reest) {
-		    tacc[i][j-i] += 
-			active_alpha[t][q] * tprob[u] * beta[j] * recip_final_alpha;
+		    /* Skip invalid transitions where j < i (shouldn't happen in forward HMM) */
+		    if (tacc != NULL && i < n_state && j >= i && (j - i) < n_state) {
+			tacc[i][j-i] +=
+			    active_alpha[t][q] * tprob[u] * beta[j] * recip_final_alpha;
+		    }
 		}
 		if (timers)
 		    ptmr_stop(&timers->rsts_timer);
@@ -1075,11 +1086,11 @@ backward_update(float64 **active_alpha,
 
 	n_next_active = 0;
 	n_tmp_non_emit = 0;
-	
+
 	/* scale the resulting betas at time t now */
 	for (s = 0; s < n_active; s++) {
 	    i = active[s];
-	    
+
 	    beta[i] *= scale[t];
 	}
 
@@ -1115,7 +1126,7 @@ backward_update(float64 **active_alpha,
 
         if (timers)
     	    ptmr_stop(&timers->rstf_timer);
- 	
+
 	/* swap beta and prior beta */
 	tt = beta;
 	beta = prior_beta;
@@ -1146,10 +1157,10 @@ backward_update(float64 **active_alpha,
 			now_den_idx[state_seq[0].l_cb],
 			mixw[state_seq[0].mixw],
 			g);
-    
+
     if (timers)
 	ptmr_stop(&timers->gau_timer);
-    
+
     if (retval == S3_SUCCESS) {
 
 	/* do a final alpha != beta consistency check */
@@ -1160,7 +1171,7 @@ backward_update(float64 **active_alpha,
 	    > (S2_ALPHA_BETA_EPSILON * active_alpha[n_obs-1][q_f])) {
 	    E_ERROR("alpha(%e) <> beta(%e)\n",
 		    active_alpha[n_obs-1][q_f], beta[0]);
-	    
+
 	    retval = S3_ERROR;
 	}
 
@@ -1210,7 +1221,7 @@ backward_update(float64 **active_alpha,
 			 n_top);
 	}
 
-	
+
 	if (mixw_reest) {
 	    accum_den_terms(wacc[state_seq[0].l_mixw], d_term,
 			    now_den_idx[l_cb], n_feat, n_top);
@@ -1223,17 +1234,17 @@ backward_update(float64 **active_alpha,
                 if (n_cb < inv->n_mixw) {
                     /* semi-continuous, tied mixture, and discrete case */
 		    /* do the update of the CI accumulators as well */
-		    accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term,
+		    accum_den_terms(wacc[state_seq[0].l_ci_mixw], d_term,
 				    now_den_idx[l_cb], n_feat, n_top);
 		}
 		else {
-		    accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci,
+		    accum_den_terms(wacc[state_seq[0].l_ci_mixw], d_term_ci,
 				    now_den_idx[l_ci_cb], n_feat, n_top);
 		}
 	    }
 	}
-	
-	
+
+
 	if (mean_reest || var_reest) {
 	    accum_den_terms(denacc[l_cb], d_term,
 			    now_den_idx[l_cb], n_feat, n_top);
@@ -1242,7 +1253,7 @@ backward_update(float64 **active_alpha,
 				now_den_idx[l_ci_cb], n_feat, n_top);
 	    }
 	}
-	
+
 	if (timers)
 	    ptmr_start(&timers->rstf_timer);
 	if (mean_reest || var_reest) {
diff --git a/src/programs/bw/viterbi.c b/src/programs/bw/viterbi.c
index d728dc85..f8a77ef4 100644
--- a/src/programs/bw/viterbi.c
+++ b/src/programs/bw/viterbi.c
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 4 -*- */
 /* ====================================================================
- * Copyright (c) 1996-2007 Carnegie Mellon University.  All rights 
+ * Copyright (c) 1996-2007 Carnegie Mellon University.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -8,27 +8,27 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
+ *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
- * This work was supported in part by funding from the Defense Advanced 
- * Research Projects Agency and the National Science Foundation of the 
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
  * United States of America, and the CMU Sphinx Speech Consortium.
  *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * ====================================================================
@@ -37,10 +37,10 @@
 /*********************************************************************
  *
  * File: viterbi.c
- * 
- * Description: 
- * 
- * Authors: 
+ *
+ * Description:
+ *
+ * Authors:
  * 	David Huggins-Daines
  *      Eric Thayer
  *********************************************************************/
@@ -124,7 +124,7 @@ write_phseg(const char *filename,
 		j = active_astate[t][bp[t][q]];
 		q = bp[t][q];
 	    }
-	    
+
 
 	    /* Do a rather nasty mdef scan to find the triphone in question. */
 	    for (phn = 0; phn < n_defn; phn++) {
@@ -137,11 +137,11 @@ write_phseg(const char *filename,
 	        if (k < 0)
 	    	    break;
 	    }
-	    
+
 	    if (phn == n_defn) {
 		E_ERROR("Failed to find triphone for senone %u\n", state_seq[j].mixw);
 	    }
-	    
+
 	    /* Record ascr and sf for the next phone */
 	    if (phseg) {
 		phseg->score = (int32)(ascr * INVLOGS3);
@@ -398,8 +398,9 @@ viterbi_update(float64 *log_forw_prob,
 	    if (state_seq[i].n_next > max_n_next)
 		max_n_next = state_seq[i].n_next;
 	}
+	/* Allocate with n_state to ensure j-i indexing never goes out of bounds */
 	inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state,
-						    max_n_next,
+						    n_state,
 						    sizeof(float32));
     }
     /* transition matrix reestimation sum accumulators
@@ -461,8 +462,10 @@ viterbi_update(float64 *log_forw_prob,
 #endif
 	    /* Backtrace and accumulate transition counts. */
 	    if (tmat_reest) {
-		assert(tacc != NULL);
-		tacc[prev][j - prev] += 1.0;
+		/* Skip invalid transitions where j < prev (shouldn't happen in forward HMM) */
+		if (tacc != NULL && prev < n_state && j >= prev && (j - prev) < n_state) {
+		    tacc[prev][j - prev] += 1.0;
+		}
 	    }
 	    q = bp[t][q];
 	    j = prev;
@@ -554,7 +557,7 @@ viterbi_update(float64 *log_forw_prob,
 			 n_feat,
 			 n_top);
 	}
-		    
+
 
 	/* accumulate the probability for each density in the mixing
 	 * weight reestimation accumulators */
@@ -577,8 +580,8 @@ viterbi_update(float64 *log_forw_prob,
 		}
 	    }
 	}
-		    
-	/* accumulate the probability for each density in the 
+
+	/* accumulate the probability for each density in the
 	 * density reestimation accumulators */
 	if (mean_reest || var_reest) {
 	    accum_den_terms(denacc[l_cb], d_term,
@@ -588,7 +591,7 @@ viterbi_update(float64 *log_forw_prob,
 				now_den_idx[l_ci_cb], n_feat, n_top);
 	    }
 	}
-		
+
 	if (timers)
 	    ptmr_stop(&timers->rsts_timer);
 	/* Note that there is only one state/frame so this is kind of
@@ -617,7 +620,7 @@ viterbi_update(float64 *log_forw_prob,
         if (timers)
 	    ptmr_stop(&timers->rstf_timer);
 
-	if (t > 0) { 
+	if (t > 0) {
 	    prev = active_astate[t-1][bp[t][q]];
 #if VITERBI_DEBUG
 	    printf("Backtrace at time %d, %u => %u\n",
@@ -625,8 +628,10 @@ viterbi_update(float64 *log_forw_prob,
 #endif
 	    /* Backtrace and accumulate transition counts. */
 	    if (tmat_reest) {
-		assert(tacc != NULL);
-		tacc[prev][j-prev] += 1.0;
+		/* Skip invalid transitions where j < prev (shouldn't happen in forward HMM) */
+		if (tacc != NULL && prev < n_state && j >= prev && (j - prev) < n_state) {
+		    tacc[prev][j-prev] += 1.0;
+		}
 	    }
 	    q = bp[t][q];
 	    j = prev;
@@ -668,7 +673,7 @@ viterbi_update(float64 *log_forw_prob,
 	    ckd_free((void *)dscale[i]);
     }
     ckd_free((void **)dscale);
-    
+
     ckd_free(n_active_astate);
     for (i = 0; i < n_obs; i++) {
 	ckd_free((void *)active_alpha[i]);
@@ -715,12 +720,12 @@ mmi_viterbi_run(float64 *log_forw_prob,
     int final_state_error = 0;
     float64 log_fp;/* accumulator for the log of the probability
 		    * of observing the input given the model */
-    
+
     /* caller must ensure that there is some non-zero amount
        of work to be done here */
     assert(n_obs > 0);
     assert(n_state > 0);
-    
+
     scale = (float64 *)ckd_calloc(n_obs, sizeof(float64));
     dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
     n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32));
@@ -739,7 +744,7 @@ mmi_viterbi_run(float64 *log_forw_prob,
 
 	/* Some problem with the utterance, release per utterance storage and
 	 * forget about adding the utterance accumulators to the global accumulators */
-	
+
 	goto all_done;
     }
 
@@ -776,7 +781,7 @@ mmi_viterbi_run(float64 *log_forw_prob,
 	    ckd_free((void *)dscale[i]);
     }
     ckd_free((void **)dscale);
-    
+
     ckd_free(n_active_astate);
     for (i = 0; i < n_obs; i++) {
 	ckd_free((void *)active_alpha[i]);
@@ -790,7 +795,7 @@ mmi_viterbi_run(float64 *log_forw_prob,
 
     if (ret != S3_SUCCESS && !final_state_error)
 	E_ERROR("viterbi run error in sentence %s\n", corpus_utt_brief_name());
-    
+
     return ret;
 }
 
@@ -870,7 +875,7 @@ mmi_viterbi_update(vector_t **feature,
 		  scale, dscale,
 		  feature, n_obs, state_seq, n_state,
 		  inv, a_beam, NULL, NULL, 1);
-    
+
     if (cmd_ln_str("-outphsegdir")) {
 	E_FATAL("current MMI implementation don't support -outphsegdir\n");
     }
@@ -974,7 +979,7 @@ mmi_viterbi_update(vector_t **feature,
 					 active_cb, n_active_cb, g);
 	if (ret != S3_SUCCESS)
 	    goto all_done;
-	
+
 	assert(state_seq[j].mixw != TYING_NON_EMITTING);
 	/* Now calculate mixture densities. */
 	/* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */
@@ -1028,8 +1033,8 @@ mmi_viterbi_update(vector_t **feature,
 			 n_feat,
 			 n_top);
 	}
-	    
-	/* accumulate the probability for each density in the 
+
+	/* accumulate the probability for each density in the
 	 * density reestimation accumulators */
 	if (mean_reest || var_reest) {
 	    accum_den_terms(denacc[l_cb], d_term,
@@ -1039,7 +1044,7 @@ mmi_viterbi_update(vector_t **feature,
 				now_den_idx[l_ci_cb], n_feat, n_top);
 	    }
 	}
-	
+
 	/* Note that there is only one state/frame so this is kind of
 	   redundant */
 	if (mean_reest || var_reest) {
@@ -1056,8 +1061,8 @@ mmi_viterbi_update(vector_t **feature,
 			     fcb);
 	    memset(&denacc[0][0][0], 0, denacc_size);
 	}
-	
-	if (t > 0) { 
+
+	if (t > 0) {
 	    prev = active_astate[t-1][bp[t][q]];
 	    q = bp[t][q];
 	    j = prev;
@@ -1077,7 +1082,7 @@ mmi_viterbi_update(vector_t **feature,
 	    ckd_free((void *)dscale[i]);
     }
     ckd_free((void **)dscale);
-    
+
     ckd_free(n_active_astate);
     for (i = 0; i < n_obs; i++) {
 	ckd_free((void *)active_alpha[i]);
diff --git a/src/programs/init_mixw/main.c b/src/programs/init_mixw/main.c
index 77813272..c9470212 100644
--- a/src/programs/init_mixw/main.c
+++ b/src/programs/init_mixw/main.c
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 4 -*- */
 /* ====================================================================
- * Copyright (c) 1995-2000 Carnegie Mellon University.  All rights 
+ * Copyright (c) 1995-2000 Carnegie Mellon University.  All rights
  * reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -8,27 +8,27 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
+ *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
- * This work was supported in part by funding from the Defense Advanced 
- * Research Projects Agency and the National Science Foundation of the 
+ * This work was supported in part by funding from the Defense Advanced
+ * Research Projects Agency and the National Science Foundation of the
  * United States of America, and the CMU Sphinx Speech Consortium.
  *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * ====================================================================
@@ -37,15 +37,15 @@
 /*********************************************************************
  *
  * File: main.c
- * 
- * Description: 
+ *
+ * Description:
  *	This routine uses a source model definition file, source
  *	mixing weight file and destination model definition file
  *	to initialize a destination mixing weight file.
- * 
- * Author: 
+ *
+ * Author:
  *	Eric H. Thayer (eht@cs.cmu.edu)
- * 
+ *
  *********************************************************************/
 #include "parse_cmd_ln.h"
 
@@ -129,7 +129,7 @@ init_model(float32 ***dest_mixw,
 	   model_def_entry_t *src,
 	   uint32 *src_cb_map,
 	   acmod_set_t *src_acmod_set,
-	   
+
 	   uint32 n_feat,
 	   uint32 n_gau,
 	   uint32 n_state_pm,
@@ -183,7 +183,7 @@ init_model(float32 ***dest_mixw,
 	    d_mg = dest_cb_map[d_m];
 	    if (!was_added(&cb_dest_list[d_mg], s_mg)) {
 		printf("[mg %5u(%1u) <- %5u] ", d_mg, s, s_mg);
-		
+
 		for (j = 0; j < n_feat; j++) {
 		    for (k = 0; k < n_gau; k++) {
 			for (l = 0; l < veclen[j]; l++) {
@@ -266,7 +266,7 @@ init_mixw()
 	return S3_ERROR;
     }
 
-	   
+
     ts2cbfn = cmd_ln_str("-src_ts2cbfn");
     if (strcmp(SEMI_LABEL, ts2cbfn) == 0) {
 	E_INFO("Generating semi-continous ts2cb mapping\n");
@@ -301,13 +301,13 @@ init_mixw()
     /* read in the source mixing weight parameter file */
     if (s3mixw_read(cmd_ln_str("-src_mixwfn"),
 		    &src_mixw, &n_mixw_src, &n_feat, &n_gau) != S3_SUCCESS) {
-			
+
 	return S3_ERROR;
     }
 
     E_INFO("Reading src %s\n",
 	   cmd_ln_str("-src_tmatfn"));
-    
+
     if (s3tmat_read(cmd_ln_str("-src_tmatfn"),
 		    &src_tmat,
 		    &n_tmat_src,
@@ -432,7 +432,7 @@ init_mixw()
 
     n_tmat_dest = dest_mdef->n_tied_tmat;
     tmat_dest_list = init_was_added(n_tmat_dest);
-    
+
     E_INFO("Alloc %ux%ux%u dest tmat\n",
 	   n_tmat_dest,
 	   n_state_pm-1,
@@ -442,7 +442,7 @@ init_mixw()
 					   n_state_pm-1,
 					   n_state_pm,
 					   sizeof(float32));
-    
+
     n_mixw_dest = dest_mdef->n_tied_state;
     mixw_dest_list = init_was_added(n_mixw_dest);
 
@@ -466,7 +466,7 @@ init_mixw()
 	dest_var = gauden_alloc_param(n_cb_dest, n_feat, n_gau, veclen);
     else if (src_fullvar)
 	dest_fullvar = gauden_alloc_param_full(n_cb_dest, n_feat, n_gau, veclen);
-    
+
     for (dest_m = 0; dest_m < dest_mdef->n_defn; dest_m++) {
 	dest_m_name = acmod_set_id2name(dest_mdef->acmod_set, dest_m);
 	src_m = acmod_set_name2id(src_mdef->acmod_set, dest_m_name);
@@ -484,7 +484,7 @@ init_mixw()
 
 		E_INFO("No source base phone %s found.  Initializing %s using uniform distribution\n",
 		       dest_m_base_name, dest_m_name);
-		
+
 		if (src_tmat) {
 		    E_INFO("Uniform initialization of tmat not supported\n");
 		}
@@ -525,16 +525,40 @@ init_mixw()
 	}
     }
 
+    /* Check for uninitialized transition matrices and initialize them */
+    /* When duplicating from .semi. to .cont., ensure all destination tmat slots are initialized */
+    /* For .semi., mk_flat creates n_tied_tmat tmat (all identical), so use src_tmat[0] as template */
+    if (src_tmat) {
+	uint32 tmat_m, tmat_i, tmat_j;
+	uint32 src_tmat_idx = 0;  /* Use first source tmat as template (all are identical) */
+
+	for (tmat_m = 0; tmat_m < n_tmat_dest; tmat_m++) {
+	    if (tmat_dest_list[tmat_m] == NULL) {
+		/* Uninitialized destination tmat - copy from source */
+		E_INFO("Initializing uninitialized tmat %u from source tmat %u\n", tmat_m, src_tmat_idx);
+		for (tmat_i = 0; tmat_i < n_state_pm-1; tmat_i++) {
+		    for (tmat_j = 0; tmat_j < n_state_pm; tmat_j++) {
+			dest_tmat[tmat_m][tmat_i][tmat_j] = src_tmat[src_tmat_idx][tmat_i][tmat_j];
+		    }
+		}
+		/* Mark as initialized */
+		tmat_dest_list[tmat_m] = (pair_t *)ckd_calloc(1, sizeof(pair_t));
+		tmat_dest_list[tmat_m]->src_id = src_tmat_idx;
+		tmat_dest_list[tmat_m]->next = NULL;
+	    }
+	}
+    }
+
     E_INFO("Writing dest %s\n",
 	   cmd_ln_str("-dest_tmatfn"));
-    
+
     if (s3tmat_write(cmd_ln_str("-dest_tmatfn"),
 		     dest_tmat,
 		     n_tmat_dest,
 		     n_state_pm) != S3_SUCCESS) {
 	return S3_ERROR;
     }
-	   
+
 
     E_INFO("Writing dest %s\n",
 	   cmd_ln_str("-dest_mixwfn"));
@@ -610,7 +634,7 @@ main(int argc, char *argv[])
 	E_ERROR("errors initializing.\n");
 	return 1;
     }
-    
+
     if (init_mixw() != S3_SUCCESS) {
 	return 1;
     }

From 55700bfbd9f32dd26a0a85fad54aed0ebbb66415 Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:24:54 -0500
Subject: [PATCH 04/10] Add bounds check for n_tmat_src before accessing
 src_tmat array

Prevents out-of-bounds access when src_tmat pointer is valid but the
array is empty (n_tmat_src == 0). The condition now checks both pointer
validity and array size before accessing src_tmat[0].
---
 src/programs/init_mixw/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/programs/init_mixw/main.c b/src/programs/init_mixw/main.c
index c9470212..bd3c896d 100644
--- a/src/programs/init_mixw/main.c
+++ b/src/programs/init_mixw/main.c
@@ -528,7 +528,7 @@ init_mixw()
     /* Check for uninitialized transition matrices and initialize them */
     /* When duplicating from .semi. to .cont., ensure all destination tmat slots are initialized */
     /* For .semi., mk_flat creates n_tied_tmat tmat (all identical), so use src_tmat[0] as template */
-    if (src_tmat) {
+    if (src_tmat && n_tmat_src > 0) {
 	uint32 tmat_m, tmat_i, tmat_j;
 	uint32 src_tmat_idx = 0;  /* Use first source tmat as template (all are identical) */
 

From 76959313daebca8f413f019643e7f93741eb4f4d Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:35:23 -0500
Subject: [PATCH 05/10] Fix file permission checks using bitwise AND instead of
 logical AND

Changed 9 occurrences of st_mode && permission_bit to st_mode & permission_bit.
Using logical AND (&&) with constant permission bits (S_IROTH, S_IRUSR, etc.) is
incorrect and always evaluates to true. Bitwise AND (&) correctly tests the bits.
---
 src/programs/bw/train_cmd_ln.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/programs/bw/train_cmd_ln.c b/src/programs/bw/train_cmd_ln.c
index 1bccdfce..1d2dda94 100644
--- a/src/programs/bw/train_cmd_ln.c
+++ b/src/programs/bw/train_cmd_ln.c
@@ -83,9 +83,9 @@ validate_writeable_dir(char *switch_name, void *arg)
 	return FALSE;
     }	
 
-    if ((s.st_mode && S_IWOTH) ||
-	((s.st_uid == getuid()) && (s.st_mode && S_IWUSR)) ||
-	((s.st_gid == getgid()) && (s.st_mode && S_IWGRP))) {
+    if ((s.st_mode & S_IWOTH) ||
+	((s.st_uid == getuid()) && (s.st_mode & S_IWUSR)) ||
+	((s.st_gid == getgid()) && (s.st_mode & S_IWGRP))) {
 	return TRUE;
     }
     else {
@@ -124,9 +124,9 @@ validate_opt_writeable_dir(char *switch_name, void *arg)
 	return FALSE;
     }	
 
-    if ((s.st_mode && S_IWOTH) ||
-	((s.st_uid == getuid()) && (s.st_mode && S_IWUSR)) ||
-	((s.st_gid == getgid()) && (s.st_mode && S_IWGRP))) {
+    if ((s.st_mode & S_IWOTH) ||
+	((s.st_uid == getuid()) && (s.st_mode & S_IWUSR)) ||
+	((s.st_gid == getgid()) && (s.st_mode & S_IWGRP))) {
 	return TRUE;
     }
     else {
@@ -168,9 +168,9 @@ validate_readable_dir(char *switch_name, void *arg)
 	return FALSE;
     }	
 
-    if ((s.st_mode && S_IROTH) ||
-	((s.st_uid == getuid()) && (s.st_mode && S_IRUSR)) ||
-	((s.st_gid == getgid()) && (s.st_mode && S_IRGRP))) {
+    if ((s.st_mode & S_IROTH) ||
+	((s.st_uid == getuid()) && (s.st_mode & S_IRUSR)) ||
+	((s.st_gid == getgid()) && (s.st_mode & S_IRGRP))) {
 	return TRUE;
     }
     else {

From 2b8361adf4414354e78f9edf8a7d8d42320589bd Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:38:13 -0500
Subject: [PATCH 06/10] Add compiler warning suppressions and fix uninitialized
 variable bug

- Suppress legacy code warnings (sign-compare, unused-parameter, pointer-sign, etc.)
- Fix uninitialized n_mllr variable by moving MLLR transform code inside conditional
- Reduces warnings from 267 to 3 (only truly unused variables remain)
---
 CMakeLists.txt         |  8 ++++++++
 src/programs/bw/main.c | 26 +++++++++++++-------------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7d1e4cc3..86789c6a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,14 @@ if(MSVC)
   add_compile_options(/W3)
 else()
   add_compile_options(-Wall -Wextra)
+  # Suppress warnings for legacy code patterns that are acceptable
+  add_compile_options(
+    -Wno-sign-compare              # Many int32/uint32 comparisons in legacy code
+    -Wno-unused-parameter          # Function signatures must match for API consistency
+    -Wno-unused-but-set-variable   # Variables kept for debugging/documentation value
+    -Wno-pointer-sign              # Intentional int32*/uint32* pointer conversions
+    -Wno-missing-field-initializers # Auto-generated LTS rules
+  )
 endif()
 
 # Don't build shared libs by default, but distributions can do it
diff --git a/src/programs/bw/main.c b/src/programs/bw/main.c
index 7f2ce4c3..0b82ab17 100644
--- a/src/programs/bw/main.c
+++ b/src/programs/bw/main.c
@@ -565,20 +565,20 @@ main_initialize(int argc,
 		E_FATAL("cb2mllr maps %u cb, but read %u cb from files\n",
 			n_map, inv->gauden->n_mgau);
 	    }
-	}
 
-	/* Transform the means using the speaker transform if available. */
-	mllr_transform_mean(inv->gauden->mean,
-			    inv->gauden->var,
-			    0, inv->gauden->n_mgau,
-			    inv->gauden->n_feat,
-			    inv->gauden->n_density,
-			    inv->gauden->veclen,
-			    sxfrm_a, sxfrm_b,
-			    mllr_idx, n_mllr);
-	ckd_free(mllr_idx);
-	free_mllr_A(sxfrm_a, n_mllr, tmp_n_stream);
-	free_mllr_B(sxfrm_b, n_mllr, tmp_n_stream);
+	    /* Transform the means using the speaker transform if available. */
+	    mllr_transform_mean(inv->gauden->mean,
+				inv->gauden->var,
+				0, inv->gauden->n_mgau,
+				inv->gauden->n_feat,
+				inv->gauden->n_density,
+				inv->gauden->veclen,
+				sxfrm_a, sxfrm_b,
+				mllr_idx, n_mllr);
+	    ckd_free(mllr_idx);
+	    free_mllr_A(sxfrm_a, n_mllr, tmp_n_stream);
+	    free_mllr_B(sxfrm_b, n_mllr, tmp_n_stream);
+	}
     }
 
     return S3_SUCCESS;

From dd17e5b0cd8f561c1c69e9aa8e44d7194c20d39b Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:44:52 -0500
Subject: [PATCH 07/10] Update GitHub Actions to v4 to fix deprecation warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- actions/checkout@v3 → v4
- actions/upload-artifact@v3 → v4
- actions/download-artifact@v3 → v4

Fixes deprecation notice: https://github.blog/changelog/2024-04-16-deprecation-notice-v3-of-the-artifact-actions/
---
 .github/workflows/tests.yml | 48 ++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 3f61fe4d..b95e6df1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           path: sphinxtrain
       - name: Install
@@ -22,7 +22,7 @@ jobs:
         run: |
           cmake --build sphinxtrain/build --target test
       - name: Checkout PocketSphinx
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/pocketsphinx
           path: pocketsphinx
@@ -35,7 +35,7 @@ jobs:
         run: |
           tar --exclude=.git -cf build.tar sphinxtrain pocketsphinx
       - name: Upload archive
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: build
           path: build.tar
@@ -46,7 +46,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -55,7 +55,7 @@ jobs:
           sudo apt-get install libfst-dev libngram-dev cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -66,7 +66,7 @@ jobs:
           python3 ../sphinxtrain/scripts/sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir
@@ -77,7 +77,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -88,7 +88,7 @@ jobs:
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -99,7 +99,7 @@ jobs:
           sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir
@@ -110,7 +110,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -121,7 +121,7 @@ jobs:
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -134,7 +134,7 @@ jobs:
           sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir
@@ -145,7 +145,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -156,7 +156,7 @@ jobs:
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -168,7 +168,7 @@ jobs:
           sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir
@@ -179,7 +179,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -190,7 +190,7 @@ jobs:
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -206,7 +206,7 @@ jobs:
           sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir
@@ -217,7 +217,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -228,7 +228,7 @@ jobs:
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -243,7 +243,7 @@ jobs:
           sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir
@@ -254,7 +254,7 @@ jobs:
     steps:
       - name: Download build
         id: download
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: build
       - name: Install
@@ -265,7 +265,7 @@ jobs:
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: cmusphinx/an4
           path: an4
@@ -280,7 +280,7 @@ jobs:
           sphinxtrain run
       - name: Archive AN4 logs
         if: success() || failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: an4-logdir-${{ github.job }}
           path: an4/logdir

From d79d806bbe3416ff7c24ea72083bd0ff6e629e97 Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:46:38 -0500
Subject: [PATCH 08/10] Pin GitHub Actions to ubuntu-22.04 for libngram-dev
 availability

The libngram-dev package is only available in Ubuntu 22.04+, not in
ubuntu-latest (which was Ubuntu 20.04). This fixes the package install
error: 'Unable to locate package libngram-dev'.
---
 .github/workflows/tests.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b95e6df1..40083cc7 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,7 +4,7 @@ on:
   - pull_request
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -41,7 +41,7 @@ jobs:
           path: build.tar
 
   train-inplace:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build
@@ -72,7 +72,7 @@ jobs:
           path: an4/logdir
 
   train-installed:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build
@@ -105,7 +105,7 @@ jobs:
           path: an4/logdir
 
   train-parallel:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build
@@ -140,7 +140,7 @@ jobs:
           path: an4/logdir
 
   train-align:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build
@@ -174,7 +174,7 @@ jobs:
           path: an4/logdir
 
   train-g2p-lda-vtln:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build
@@ -212,7 +212,7 @@ jobs:
           path: an4/logdir
 
   train-semi:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build
@@ -249,7 +249,7 @@ jobs:
           path: an4/logdir
 
   train-ptm:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     needs: [build]
     steps:
       - name: Download build

From 303f6fd4f3bcffe37fe96c0bfed34182ab1e6261 Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:48:06 -0500
Subject: [PATCH 09/10] Make G2P dependencies optional, use ubuntu-latest for
 most jobs

- Removed libfst-dev and libngram-dev from default install steps
- Disabled BUILD_G2P by default (it already defaults to OFF in CMake)
- Only train-g2p-lda-vtln job uses ubuntu-22.04 and installs G2P deps
- G2P job rebuilds sphinxtrain with -DBUILD_G2P=ON
- All other jobs run on ubuntu-latest without optional dependencies

This makes G2P truly optional and allows CI to work on any Ubuntu version.
---
 .github/workflows/tests.yml | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 40083cc7..ecd94b50 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,7 +4,7 @@ on:
   - pull_request
 jobs:
   build:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -12,11 +12,11 @@ jobs:
           path: sphinxtrain
       - name: Install
         run: |
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
       - name: Build
         run: |
-          cmake -S sphinxtrain -B sphinxtrain/build -G Ninja -DBUILD_G2P=ON
+          cmake -S sphinxtrain -B sphinxtrain/build -G Ninja
           cmake --build sphinxtrain/build
       - name: Run tests
         run: |
@@ -41,7 +41,7 @@ jobs:
           path: build.tar
 
   train-inplace:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     needs: [build]
     steps:
       - name: Download build
@@ -52,7 +52,7 @@ jobs:
       - name: Install
         run: |
           tar xf build.tar
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
       - name: Checkout AN4
         uses: actions/checkout@v4
@@ -72,7 +72,7 @@ jobs:
           path: an4/logdir
 
   train-installed:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     needs: [build]
     steps:
       - name: Download build
@@ -83,7 +83,7 @@ jobs:
       - name: Install
         run: |
           tar xf build.tar
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
@@ -105,7 +105,7 @@ jobs:
           path: an4/logdir
 
   train-parallel:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     needs: [build]
     steps:
       - name: Download build
@@ -116,7 +116,7 @@ jobs:
       - name: Install
         run: |
           tar xf build.tar
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
@@ -140,7 +140,7 @@ jobs:
           path: an4/logdir
 
   train-align:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     needs: [build]
     steps:
       - name: Download build
@@ -151,7 +151,7 @@ jobs:
       - name: Install
         run: |
           tar xf build.tar
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
@@ -174,7 +174,7 @@ jobs:
           path: an4/logdir
 
   train-g2p-lda-vtln:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-22.04  # Requires libngram-dev for G2P support
     needs: [build]
     steps:
       - name: Download build
@@ -182,11 +182,14 @@ jobs:
         uses: actions/download-artifact@v4
         with:
           name: build
-      - name: Install
+      - name: Install with G2P support
         run: |
           tar xf build.tar
           sudo apt-get install libfst-dev libngram-dev cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
+          # Rebuild with G2P support
+          cmake -S sphinxtrain -B sphinxtrain/build -G Ninja -DBUILD_G2P=ON
+          cmake --build sphinxtrain/build
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
       - name: Checkout AN4
@@ -212,7 +215,7 @@ jobs:
           path: an4/logdir
 
   train-semi:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     needs: [build]
     steps:
       - name: Download build
@@ -223,7 +226,7 @@ jobs:
       - name: Install
         run: |
           tar xf build.tar
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install
@@ -249,7 +252,7 @@ jobs:
           path: an4/logdir
 
   train-ptm:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     needs: [build]
     steps:
       - name: Download build
@@ -260,7 +263,7 @@ jobs:
       - name: Install
         run: |
           tar xf build.tar
-          sudo apt-get install libfst-dev libngram-dev cmake \
+          sudo apt-get install cmake \
                ninja-build libopenblas-dev python3-numpy python3-scipy
           sudo cmake --build sphinxtrain/build --target install
           sudo cmake --build pocketsphinx/build --target install

From a7a9d1c7e58e4b50bc96c63c2daa5de65bd95a54 Mon Sep 17 00:00:00 2001
From: Kevin Lenzo <lenzo@duolingo.com>
Date: Fri, 12 Dec 2025 10:50:59 -0500
Subject: [PATCH 10/10] Optimize CI triggers to avoid duplicate workflow runs

Changed from running on all pushes/PRs to only:
- push to master branch (verify master stays healthy)
- pull_request targeting master (test before merge)

This prevents duplicate runs when pushing to PR branches, saving CI
resources and time while maintaining full test coverage.
---
 .github/workflows/tests.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ecd94b50..1f2fb4d0 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,7 +1,11 @@
 name: Run Tests
 on:
-  - push
-  - pull_request
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
 jobs:
   build:
     runs-on: ubuntu-latest