sd-depth-api/process_image.py at main · dfattal/sd-depth-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# process_image.py
import sys
import torch
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
# from diffusers.utils import load_image
from PIL import Image
import io

# Define the cache directory
cache_dir = "models"

# Load models
print("Loading models...")
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    variant="fp16",
    use_safetensors=True,
    torch_dtype=torch.float16,
    cache_dir=cache_dir
)
pipe.enable_model_cpu_offload()
print("Models loaded successfully")

def process_image(depth_image_bytes):
    print("Processing image...")
    depth_image = Image.open(io.BytesIO(depth_image_bytes)).convert("RGB")
    controlnet_conditioning_scale = 0.5

    prompt = "realistic photography"
    images = pipe(
        prompt, image=depth_image, num_inference_steps=30, controlnet_conditioning_scale=controlnet_conditioning_scale,
    ).images
    print("Image processed successfully")
    return images[0]

if __name__ == "__main__":
    depth_image_path = sys.argv[1]
    print(f"Received depth image path: {depth_image_path}")

    with open(depth_image_path, "rb") as f:
        depth_image_bytes = f.read()
    output_image = process_image(depth_image_bytes)
    output_image.save("output.jpg")
    print("Output image saved as output.jpg")
    print(f"Output image size: {output_image.size}")