-
Notifications
You must be signed in to change notification settings - Fork 154
Description
TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:
- (Tensor input, *, torch.dtype dtype = None)
- (Tensor input, tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
- (Tensor input, tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
- entire stack trace: /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:628: UserWarning:
do_sampleis set toFalse. However,temperatureis set to0.0-- this flag is only used in sample-based generation modes. You should setdo_sample=Trueor unsettemperature.
warnings.warn(
/anaconda/envs/py312llm/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py:838: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
return fn(*args, **kwargs)
/anaconda/envs/py312llm/lib/python3.12/site-packages/torch/utils/checkpoint.py:86: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
warnings.warn(
TypeError Traceback (most recent call last)
Cell In[6], line 2
1 with torch.inference_mode():
----> 2 generate_ids = model.generate(**inputs, **generation_args)
File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/utils/_contextlib.py:116, in context_decorator..decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/utils.py:2255, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2247 input_ids, model_kwargs = self._expand_inputs_for_generation(
2248 input_ids=input_ids,
2249 expand_size=generation_config.num_return_sequences,
2250 is_encoder_decoder=self.config.is_encoder_decoder,
2251 **model_kwargs,
2252 )
2254 # 12. run sample (it degenerates to greedy search when generation_config.do_sample=False)
-> 2255 result = self._sample(
2256 input_ids,
2257 logits_processor=prepared_logits_processor,
2258 stopping_criteria=prepared_stopping_criteria,
2259 generation_config=generation_config,
2260 synced_gpus=synced_gpus,
2261 streamer=streamer,
2262 **model_kwargs,
2263 )
2265 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2266 # 11. prepare beam search scorer
2267 beam_scorer = BeamSearchScorer(
2268 batch_size=batch_size,
2269 num_beams=generation_config.num_beams,
(...) 2274 max_length=generation_config.max_length,
2275 )
File /anaconda/envs/py312llm/lib/python3.12/site-packages/transformers/generation/utils.py:3254, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3251 model_inputs.update({"output_hidden_states": output_hidden_states} if output_hidden_states else {})
3253 if is_prefill:
-> 3254 outputs = self(**model_inputs, return_dict=True)
3255 is_prefill = False
3256 else:
File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/nn/modules/module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
1749 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1750 else:
-> 1751 return self._call_impl(*args, **kwargs)
File /anaconda/envs/py312llm/lib/python3.12/site-packages/torch/nn/modules/module.py:1762, in Module._call_impl(self, *args, **kwargs)
1757 # If we don't have any hooks, we want to skip the rest of the logic in
1758 # this function, and just call forward.
1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1760 or _global_backward_pre_hooks or _global_backward_hooks
1761 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762 return forward_call(*args, **kwargs)
1764 result = None
1765 called_always_called_hooks = set()
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Magma-8B/44464069db9354fe76e98b2c0080b0325f38b20b/modeling_magma.py:674, in MagmaForCausalLM.forward(self, input_ids, pixel_values, image_sizes, attention_mask, position_ids, past_key_values, inputs_embeds, vision_feature_layer, vision_feature_select_strategy, labels, use_cache, output_attentions, output_hidden_states, return_dict)
671 feature_lens = torch.tensor(feature_lens, dtype=torch.long, device=image_features.device)
673 # inputs_embeds = inputs_embeds.to(image_features.dtype)
--> 674 inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_image_features(
675 image_features,
676 feature_lens,
677 inputs_embeds,
678 input_ids,
679 attention_mask,
680 position_ids,
681 labels=labels,
682 )
684 # pixel_values is not None but is empty ---> text only cases
685 elif pixel_values is not None and input_ids.shape[1] != 1 and pixel_values.size(0) == 0:
686 # there are no images
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Magma-8B/44464069db9354fe76e98b2c0080b0325f38b20b/modeling_magma.py:448, in MagmaForCausalLM._merge_input_ids_with_image_features(self, image_features, feature_lens, inputs_embeds, input_ids, attention_mask, position_ids, labels, image_token_index, ignore_index)
446 special_image_token_mask = input_ids == image_token_index
447 # special_image_token_mask: [bsz, seqlen]
--> 448 num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1)
449 # num_special_image_tokens: [bsz]
450 # Reserve for padding of num_images
451 total_num_special_image_tokens = torch.sum(special_image_token_mask)
TypeError: sum() received an invalid combination of arguments - got (bool, dim=int), but expected one of:
- (Tensor input, *, torch.dtype dtype = None)
- (Tensor input, tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
- (Tensor input, tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None, Tensor out = None)
Code: https://huggingface.co/microsoft/Magma-8B
import torch
from PIL import Image
from io import BytesIO
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
Load the model and processor
dtype = torch.bfloat16
model = AutoModelForCausalLM.from_pretrained("microsoft/Magma-8B", trust_remote_code=True, torch_dtype=dtype)
processor = AutoProcessor.from_pretrained("microsoft/Magma-8B", trust_remote_code=True)
model.to("cuda")
Inference
url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(BytesIO(requests.get(url, stream=True).content))
image = image.convert("RGB")
convs = [
{"role": "system", "content": "You are agent that can see, talk and act."},
{"role": "user", "content": "<image_start><image_end>\nWhat is in this image?"},
]
prompt = processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
inputs = processor(images=[image], texts=prompt, return_tensors="pt")
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
inputs = inputs.to("cuda").to(dtype)
generation_args = {
"max_new_tokens": 128,
"temperature": 0.0,
"do_sample": False,
"use_cache": True,
"num_beams": 1,
}
with torch.inference_mode():
generate_ids = model.generate(**inputs, **generation_args)
generate_ids = generate_ids[:, inputs["input_ids"].shape[-1] :]
response = processor.decode(generate_ids[0], skip_special_tokens=True).strip()
print(response)