-
Notifications
You must be signed in to change notification settings - Fork 79
Expand file tree
/
Copy pathvideo_processor_node.py
More file actions
167 lines (132 loc) · 6.01 KB
/
video_processor_node.py
File metadata and controls
167 lines (132 loc) · 6.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
from . import utils
import torch
class LucyEditProAPINode:
"""
A ComfyUI node that edits video using the Decart API.
This node takes video frames, a text prompt describing desired edits,
and an API key, then returns the edited video using Decart's Lucy-Edit-Pro model.
Input formats:
- images: ComfyUI IMAGE tensor with shape [frames, height, width, channels=3]
Expected dtype: float32 with values in range [0.0, 1.0]
Color format: RGB
- prompt: String describing the desired video edits
- api_key: Valid Decart API key for authentication
- fps: Frame rate (float, default: 24.0)
Output formats:
- images: Edited video as ComfyUI IMAGE tensor
Shape: [frames, height, width, channels=3]
dtype: float32 with values in range [0.0, 1.0]
Color format: RGB
- fps: Frame rate of the output video (may differ from input)
"""
CATEGORY = "video/editing"
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"images": ("IMAGE",), # ComfyUI uses IMAGE type for video frames
"prompt": ("STRING", {"multiline": True, "default": ""}),
"api_key": ("STRING", {"default": ""}),
"fps": (
"FLOAT",
{"default": 24.0, "min": 0.01, "max": 1000.0, "step": 0.01},
),
}
}
RETURN_TYPES = ("IMAGE", "FLOAT")
RETURN_NAMES = ("images", "fps")
FUNCTION = "process_video"
def process_video(self, images, prompt, api_key, fps):
"""
Edit video using the Decart API based on the provided prompt.
Args:
images: Input video tensor with shape [frames, height, width, channels=3]
Expected dtype: float32 with values in range [0.0, 1.0]
Color format: RGB
prompt: Text description of desired video edits (e.g., "Change the shirt to blue")
api_key: Valid Decart API key for authentication
fps: Input frame rate (float)
Returns:
tuple: (output_images, output_fps)
- output_images: Edited video tensor with same format as input
Shape: [frames, height, width, channels=3]
dtype: float32, range: [0.0, 1.0], RGB format
- output_fps: Frame rate of the edited video (may differ from input)
Raises:
Exception: If API call fails or video processing encounters an error
"""
output_images, output_fps = utils.generate_edited_video_tensor(images, prompt, api_key, fps)
return (output_images, output_fps)
class LucyConditionConcatNode:
"""
A ComfyUI node that concatenates additional latents to the input channels
for conditioning during diffusion. The additional latents are concatenated
via c_concat and properly handled during the diffusion process.
This is designed for models with doubled input channels (like WAN2.2)
where the extra channels are used for conditioning.
"""
CATEGORY = "conditioning/latent"
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"model": ("MODEL",),
"concat_latent": ("LATENT",), # Additional latent to concatenate
}
}
RETURN_NAMES = ("model", "latent")
RETURN_TYPES = ("MODEL", "LATENT")
FUNCTION = "apply_concat"
def apply_concat(self, model, concat_latent):
"""
Apply channel concatenation conditioning to the model.
Args:
model: The diffusion model (ModelPatcher)
concat_latent: Dict with "samples" tensor to concatenate
Returns:
tuple: (modified_model, modified_conditioning)
"""
# Clone model to avoid affecting other nodes
model = model.clone()
# Get the concatenation latent tensor
concat_tensor = concat_latent["samples"].clone()
# Normalize the concat_tensor with the same parameters as the main latent
if hasattr(model.model, 'process_latent_in'):
concat_tensor = model.model.process_latent_in(concat_tensor)
# Store the concat latent in model options for proper handling
# This will be used during the diffusion process
model.model_options = model.model_options.copy()
# Initialize the latent tensor
latent = torch.zeros_like(concat_tensor)
# Create a wrapper function that will handle the concatenation during the diffusion steps
def concat_wrapper(model_function, params):
nonlocal concat_tensor
x = params["input"]
t = params["timestep"]
c = params["c"].copy()
# Ensure concat_tensor matches batch size
if x.shape[0] != concat_tensor.shape[0]:
if concat_tensor.shape[0] == 1:
concat_tensor = concat_tensor.repeat(
(x.shape[0],) + (1,) * (concat_tensor.ndim - 1)
)
else:
raise ValueError(
f"Batch size of concat_tensor and x do not match: {concat_tensor.shape[0]} != {x.shape[0]}"
)
# Ensure spatial dimensions match
if x.shape != concat_tensor.shape:
raise ValueError(
f"Spatial dimensions of concat_tensor and x do not match: {x.shape} != {concat_tensor.shape}"
)
# Move to same device and dtype as input
concat_tensor = concat_tensor.to(x.device, dtype=x.dtype)
# Add c_concat to the conditioning dictionary
c["c_concat"] = concat_tensor
# Call the original model function with the modified conditioning
return model_function(x, t, **c)
# Set the wrapper function
model.set_model_unet_function_wrapper(concat_wrapper)
out_latent = {}
out_latent["samples"] = latent
return (model, out_latent)