diff --git a/init2winit/dataset_lib/pg19.py b/init2winit/dataset_lib/pg19.py index db2afec5..a5b47fd8 100644 --- a/init2winit/dataset_lib/pg19.py +++ b/init2winit/dataset_lib/pg19.py @@ -27,7 +27,7 @@ This module implements a preprocessed PG-19 dataset from TFRecords. The PG-19 textfiles were tokenized and encoded with SubwordTextEncoder and aggregated into -tensors of maximum lenght of 8192. +tensors of maximum length of 8192. """ import functools diff --git a/init2winit/model_lib/adabelief_densenet.py b/init2winit/model_lib/adabelief_densenet.py index 9b06ac5e..18d20b50 100644 --- a/init2winit/model_lib/adabelief_densenet.py +++ b/init2winit/model_lib/adabelief_densenet.py @@ -120,7 +120,7 @@ def __call__(self, x, train): class DenseNet(nn.Module): """Adabelief DenseNet. - The network consists of an inital convolutaional layer, four dense blocks + The network consists of an initial convolutional layer, four dense blocks connected by transition blocks, a pooling layer and a classification layer. """ num_layers: int diff --git a/init2winit/model_lib/base_model.py b/init2winit/model_lib/base_model.py index 66538a52..6ffa4085 100644 --- a/init2winit/model_lib/base_model.py +++ b/init2winit/model_lib/base_model.py @@ -308,7 +308,7 @@ def _apply_override(sharding, param_type, param_shape): return overriden_shardings def get_sharding(self, params, mesh): - """Returns the overriden sharding annotations for the model. + """Returns the overridden sharding annotations for the model. The default sharding strategy is to replicate all layers on all devices. Models can override get_sharding_overrides() to specify sharding overrides diff --git a/init2winit/optimizer_lib/kitchen_sink/_src/transform.py b/init2winit/optimizer_lib/kitchen_sink/_src/transform.py index db147e14..259a94c5 100644 --- a/init2winit/optimizer_lib/kitchen_sink/_src/transform.py +++ b/init2winit/optimizer_lib/kitchen_sink/_src/transform.py @@ -1898,7 +1898,7 @@ def update_fn(updates, state, params): return optax.GradientTransformation(init_fn, update_fn) -# scale_by_rms exists only for backward compatability +# scale_by_rms exists only for backward compatibility _composites = { 'scale_by_adaptive_gd': scale_by_adaptive_gd, 'scale_by_adaptive_gd_simple': scale_by_adaptive_gd_simple, diff --git a/init2winit/optimizer_lib/samuel.py b/init2winit/optimizer_lib/samuel.py index 5b6a0f1f..41179e42 100644 --- a/init2winit/optimizer_lib/samuel.py +++ b/init2winit/optimizer_lib/samuel.py @@ -64,7 +64,7 @@ def samuel( mw_etas: list of multiplicative weight etas. seed: initial jax random seed. train_loss: train loss to be injected at update time. - learning_rate: for compatability, but ignored for now. + learning_rate: for compatibility, but ignored for now. Returns: samuel optimizer diff --git a/init2winit/optimizer_lib/search_subspace.py b/init2winit/optimizer_lib/search_subspace.py index 58bc33a1..35c9274d 100644 --- a/init2winit/optimizer_lib/search_subspace.py +++ b/init2winit/optimizer_lib/search_subspace.py @@ -15,7 +15,7 @@ """Algorithms for narrowing hyperparameter search spaces. -TODO(dsuo): suport discrete hparams. +TODO(dsuo): support discrete hparams. """ import copy import itertools diff --git a/init2winit/schedules.py b/init2winit/schedules.py index 8d27c978..a4f55f1e 100644 --- a/init2winit/schedules.py +++ b/init2winit/schedules.py @@ -25,7 +25,7 @@ def _check_schedule_hparams(schedule_hparams, expected_keys): if set(schedule_hparams.keys()) != set(expected_keys): raise ValueError( - 'Provided schedule_hparams keys are invalid. Recieved: {}, Expected: {}' + 'Provided schedule_hparams keys are invalid. Received: {}, Expected: {}' .format(sorted(schedule_hparams.keys()), sorted(expected_keys)) )