From c4d03af901e2f85017e9f1a5b907782a817c5f4a Mon Sep 17 00:00:00 2001 From: Maximillian Chen <34139938+maxlchen@users.noreply.github.com> Date: Wed, 25 May 2022 15:12:05 -0700 Subject: [PATCH] Fix: ignore tokens should be set to -100, not -1 Loss cannot be computed if labels[~masked_indices] = -1. From BertForMaskedLM documentation: "Indices should be in [-100, 0, ..., config.vocab_size] ... Tokens with indices set to -100 are ignored (masked), the loss is only computed for the tokens with labels in [0, ..., config.vocab_size]" --- run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.py b/run.py index 2906b29..798a9e2 100644 --- a/run.py +++ b/run.py @@ -345,7 +345,7 @@ def mask_tokens(inputs, tokenizer, mlm_probability=0.15): probability_matrix.masked_fill_(torch.tensor(labels == 0, dtype=torch.bool), value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() - labels[~masked_indices] = -1 # We only compute loss on masked tokens + labels[~masked_indices] = -100 # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices