diff --git a/agents/algorithms/ppo/ppo.py b/agents/algorithms/ppo/ppo.py
index e62ce94..6359b84 100644
--- a/agents/algorithms/ppo/ppo.py
+++ b/agents/algorithms/ppo/ppo.py
@@ -530,7 +530,7 @@ def _adjust_penalty(self, observ, old_policy_params, length):
             kl_change > 1.3 * self._config.kl_target,
             # pylint: disable=g-long-lambda
             lambda: tf.Print(self._penalty.assign(
-                self._penalty * 1.5), [0], 'increase penalty '),
+                self._penalty * 1.5 + 1e-8), [0], 'increase penalty '),
             float)
         maybe_decrease = tf.cond(
             kl_change < 0.7 * self._config.kl_target,