Skip to content

inferencing error: TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of: #77

@balakreshnan

Description

@balakreshnan

TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:

  • (Tensor input, *, torch.dtype dtype = None)
  • (Tensor input, tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
  • (Tensor input, tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
  • entire stack trace: /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:628: UserWarning: do_sample is set to False. However, temperature is set to 0.0 -- this flag is only used in sample-based generation modes. You should set do_sample=True or unset temperature.
    warnings.warn(
    /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:838: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
    return fn(*args, **kwargs)
    /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/utils/checkpoint.py:86: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
    warnings.warn(

TypeError Traceback (most recent call last)
Cell In[6], line 2
1 with torch.inference_mode():
----> 2 generate_ids = model.generate(**inputs, **generation_args)

File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/utils/_contextlib.py:116, in context_decorator..decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)

File /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/utils.py:2255, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2247 input_ids, model_kwargs = self._expand_inputs_for_generation(
2248 input_ids=input_ids,
2249 expand_size=generation_config.num_return_sequences,
2250 is_encoder_decoder=self.config.is_encoder_decoder,
2251 **model_kwargs,
2252 )
2254 # 12. run sample (it degenerates to greedy search when generation_config.do_sample=False)
-> 2255 result = self._sample(
2256 input_ids,
2257 logits_processor=prepared_logits_processor,
2258 stopping_criteria=prepared_stopping_criteria,
2259 generation_config=generation_config,
2260 synced_gpus=synced_gpus,
2261 streamer=streamer,
2262 **model_kwargs,
2263 )
2265 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2266 # 11. prepare beam search scorer
2267 beam_scorer = BeamSearchScorer(
2268 batch_size=batch_size,
2269 num_beams=generation_config.num_beams,
(...) 2274 max_length=generation_config.max_length,
2275 )

File /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/utils.py:3254, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3251 model_inputs.update({"output_hidden_states": output_hidden_states} if output_hidden_states else {})
3253 if is_prefill:
-> 3254 outputs = self(**model_inputs, return_dict=True)
3255 is_prefill = False
3256 else:

File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/nn/modules/module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
1749 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1750 else:
-> 1751 return self._call_impl(*args, **kwargs)

File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/nn/modules/module.py:1762, in Module._call_impl(self, *args, **kwargs)
1757 # If we don't have any hooks, we want to skip the rest of the logic in
1758 # this function, and just call forward.
1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1760 or _global_backward_pre_hooks or _global_backward_hooks
1761 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762 return forward_call(*args, **kwargs)
1764 result = None
1765 called_always_called_hooks = set()

File ~/.cache/huggingface/modules/transformers_modules/microsoft/Magma-8B/44464069db9354fe76e98b2c0080b0325f38b20b/modeling_magma.py:674, in MagmaForCausalLM.forward(self, input_ids, pixel_values, image_sizes, attention_mask, position_ids, past_key_values, inputs_embeds, vision_feature_layer, vision_feature_select_strategy, labels, use_cache, output_attentions, output_hidden_states, return_dict)
671 feature_lens = torch.tensor(feature_lens, dtype=torch.long, device=image_features.device)
673 # inputs_embeds = inputs_embeds.to(image_features.dtype)
--> 674 inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_image_features(
675 image_features,
676 feature_lens,
677 inputs_embeds,
678 input_ids,
679 attention_mask,
680 position_ids,
681 labels=labels,
682 )
684 # pixel_values is not None but is empty ---> text only cases
685 elif pixel_values is not None and input_ids.shape[1] != 1 and pixel_values.size(0) == 0:
686 # there are no images

File ~/.cache/huggingface/modules/transformers_modules/microsoft/Magma-8B/44464069db9354fe76e98b2c0080b0325f38b20b/modeling_magma.py:448, in MagmaForCausalLM._merge_input_ids_with_image_features(self, image_features, feature_lens, inputs_embeds, input_ids, attention_mask, position_ids, labels, image_token_index, ignore_index)
446 special_image_token_mask = input_ids == image_token_index
447 # special_image_token_mask: [bsz, seqlen]
--> 448 num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1)
449 # num_special_image_tokens: [bsz]
450 # Reserve for padding of num_images
451 total_num_special_image_tokens = torch.sum(special_image_token_mask)

TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:

  • (Tensor input, *, torch.dtype dtype = None)
  • (Tensor input, tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
  • (Tensor input, tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)

Code: https://huggingface.co/microsoft/Magma-8B

import torch
from PIL import Image
from io import BytesIO
import requests

from transformers import AutoModelForCausalLM, AutoProcessor

Load the model and processor

dtype = torch.bfloat16
model = AutoModelForCausalLM.from_pretrained("microsoft/Magma-8B", trust_remote_code=True, torch_dtype=dtype)
processor = AutoProcessor.from_pretrained("microsoft/Magma-8B", trust_remote_code=True)
model.to("cuda")

Inference

url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(BytesIO(requests.get(url, stream=True).content))
image = image.convert("RGB")

convs = [
{"role": "system", "content": "You are agent that can see, talk and act."},
{"role": "user", "content": "<image_start><image_end>\nWhat is in this image?"},
]
prompt = processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
inputs = processor(images=[image], texts=prompt, return_tensors="pt")
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
inputs = inputs.to("cuda").to(dtype)

generation_args = {
"max_new_tokens": 128,
"temperature": 0.0,
"do_sample": False,
"use_cache": True,
"num_beams": 1,
}

with torch.inference_mode():
generate_ids = model.generate(**inputs, **generation_args)

generate_ids = generate_ids[:, inputs["input_ids"].shape[-1] :]
response = processor.decode(generate_ids[0], skip_special_tokens=True).strip()
print(response)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions