inferencing error: TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:

TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:
 * (Tensor input, *, torch.dtype dtype = None)
 * (Tensor input, tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
 * (Tensor input, tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)

- entire stack trace: /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:628: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
  warnings.warn(
/anaconda/envs/py312llm/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:838: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
  return fn(*args, **kwargs)
/anaconda/envs/py312llm/lib/python3.12/site-packages/torch/utils/checkpoint.py:86: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
  warnings.warn(
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[6], line 2
      1 with torch.inference_mode():
----> 2     generate_ids = model.generate(**inputs, **generation_args)

File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/utils.py:2255, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
   2247     input_ids, model_kwargs = self._expand_inputs_for_generation(
   2248         input_ids=input_ids,
   2249         expand_size=generation_config.num_return_sequences,
   2250         is_encoder_decoder=self.config.is_encoder_decoder,
   2251         **model_kwargs,
   2252     )
   2254     # 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)
-> 2255     result = self._sample(
   2256         input_ids,
   2257         logits_processor=prepared_logits_processor,
   2258         stopping_criteria=prepared_stopping_criteria,
   2259         generation_config=generation_config,
   2260         synced_gpus=synced_gpus,
   2261         streamer=streamer,
   2262         **model_kwargs,
   2263     )
   2265 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
   2266     # 11. prepare beam search scorer
   2267     beam_scorer = BeamSearchScorer(
   2268         batch_size=batch_size,
   2269         num_beams=generation_config.num_beams,
   (...)   2274         max_length=generation_config.max_length,
   2275     )

File /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/utils.py:3254, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
   3251 model_inputs.update({"output_hidden_states": output_hidden_states} if output_hidden_states else {})
   3253 if is_prefill:
-> 3254     outputs = self(**model_inputs, return_dict=True)
   3255     is_prefill = False
   3256 else:

File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/nn/modules/module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
   1749     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1750 else:
-> 1751     return self._call_impl(*args, **kwargs)

File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/nn/modules/module.py:1762, in Module._call_impl(self, *args, **kwargs)
   1757 # If we don't have any hooks, we want to skip the rest of the logic in
   1758 # this function, and just call forward.
   1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1760         or _global_backward_pre_hooks or _global_backward_hooks
   1761         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762     return forward_call(*args, **kwargs)
   1764 result = None
   1765 called_always_called_hooks = set()

File ~/.cache/huggingface/modules/transformers_modules/microsoft/Magma-8B/44464069db9354fe76e98b2c0080b0325f38b20b/modeling_magma.py:674, in MagmaForCausalLM.forward(self, input_ids, pixel_values, image_sizes, attention_mask, position_ids, past_key_values, inputs_embeds, vision_feature_layer, vision_feature_select_strategy, labels, use_cache, output_attentions, output_hidden_states, return_dict)
    671     feature_lens = torch.tensor(feature_lens, dtype=torch.long, device=image_features.device)
    673     # inputs_embeds = inputs_embeds.to(image_features.dtype)
--> 674     inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_image_features(
    675         image_features,
    676         feature_lens,
    677         inputs_embeds,
    678         input_ids,
    679         attention_mask,
    680         position_ids,
    681         labels=labels,
    682     )
    684 # pixel_values is not None but is empty ---> text only cases
    685 elif pixel_values is not None and input_ids.shape[1] != 1 and pixel_values.size(0) == 0:
    686     # there are no images

File ~/.cache/huggingface/modules/transformers_modules/microsoft/Magma-8B/44464069db9354fe76e98b2c0080b0325f38b20b/modeling_magma.py:448, in MagmaForCausalLM._merge_input_ids_with_image_features(self, image_features, feature_lens, inputs_embeds, input_ids, attention_mask, position_ids, labels, image_token_index, ignore_index)
    446 special_image_token_mask = input_ids == image_token_index
    447 # special_image_token_mask: [bsz, seqlen]
--> 448 num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1)
    449 # num_special_image_tokens: [bsz]
    450 # Reserve for padding of num_images
    451 total_num_special_image_tokens = torch.sum(special_image_token_mask)

TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:
 * (Tensor input, *, torch.dtype dtype = None)
 * (Tensor input, tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
 * (Tensor input, tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)


Code: https://huggingface.co/microsoft/Magma-8B

import torch
from PIL import Image
from io import BytesIO
import requests

from transformers import AutoModelForCausalLM, AutoProcessor

# Load the model and processor
dtype = torch.bfloat16
model = AutoModelForCausalLM.from_pretrained("microsoft/Magma-8B", trust_remote_code=True, torch_dtype=dtype)
processor = AutoProcessor.from_pretrained("microsoft/Magma-8B", trust_remote_code=True)
model.to("cuda")

# Inference
url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(BytesIO(requests.get(url, stream=True).content))
image = image.convert("RGB")

convs = [
    {"role": "system", "content": "You are agent that can see, talk and act."},
    {"role": "user", "content": "<image_start><image><image_end>\nWhat is in this image?"},
]
prompt = processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
inputs = processor(images=[image], texts=prompt, return_tensors="pt")
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
inputs = inputs.to("cuda").to(dtype)

generation_args = { 
    "max_new_tokens": 128, 
    "temperature": 0.0, 
    "do_sample": False, 
    "use_cache": True,
    "num_beams": 1,
}

with torch.inference_mode():
    generate_ids = model.generate(**inputs, **generation_args)

generate_ids = generate_ids[:, inputs["input_ids"].shape[-1] :]
response = processor.decode(generate_ids[0], skip_special_tokens=True).strip()
print(response)


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

inferencing error: TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of: #77

Load the model and processor

Inference

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

inferencing error: TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of: #77

Description

Load the model and processor

Inference

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions