deepdream/simple_dreaming.py at master · EdCo95/deepdream · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# ======== IMPORTS ========

import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import tensorflow as tf

# =========================

# ======== HELPER FUNCTIONS ========

def showarray(a, fmt="jpeg"):
    a = np.uint8(np.clip(a, 0, 1) * 255)
    f = BytesIO()
    img = PIL.Image.fromarray(a) #.save(f, fmt)
    img.show()

def wait():
    input("Press enter to continue...")

def visstd(a, s=0.1):
    """
    Normalise the image range for visualisation.
    :param a: the array to normalise
    :param s: ?
    :return: the normalised image
    """
    return (a - a.mean()) / max(a.std(), 1e-4)*s + 0.5

def T(layer):
    """
    Convenience function for getting a layer's output tensor
    :param layer: the layer to get the tensor
    :return: the tensor
    """
    return graph.get_tensor_by_name("import/%s:0" % layer)

def render_naive(t_obj, img0, iter_n=20, step=1.0):
    """
    This is the core of SIMPLE_DREAMING. Performs a naive gradient ascent on an image of random noise.
    :param t_obj: defines the optimization objective.
    :param img0: the image to enhance.
    :param iter_n: the number of gradient ascent operations to perform when enhancing the image.
    :param step: the size of each gradient ascent step to make.
    """

    # The optimisation objective
    t_score = tf.reduce_mean(t_obj)

    # Compute the gradient of the input image with regard to a particular layer
    t_grad = tf.gradients(t_score, t_input)[0]

    # Create a copy of the image
    img = img0.copy()

    # Begin the dream iterations
    for i in range(iter_n):

        # Compute the gradient and the score
        g, score = sess.run([t_grad, t_score], feed_dict={t_input:img})

        # Normalise the gradient so that the same step size should work for different layers and networks
        g /= g.std()+1e-8

        # Make the new image
        img += g * step

        print(score, end=" ")

    showarray(visstd(img))

# ==================================

# ======== MAIN CODE ========

"""
The GoogLeNet architecture (InceptionV5) is used here which has been pretrained on multiple for several weeks on the
ImageNet dataset.
"""

# Model location
model_fn = "tensorflow_inception_graph.pb"

# Create an interactive session and base to load the graph into
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)

# Read the graph in
with tf.gfile.FastGFile(model_fn, "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

# Define the input Tensor
t_input = tf.placeholder(np.float32, name="input")
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {"input":t_preprocessed})

"""
We try to generate images that maximize the sum of activations of a a particular channel of a particular convolutional
layer of the neural network. InceptionV5 contains many convolutional layers, each of which outputs tens to hundreds of
feature channels. This allows many different patterns to be explored.
"""

# Create a list of all the layers in the network
layers = [op.name for op in graph.get_operations() if op.type=="Conv2D" and "import/" in op.name]
feature_nums = [int(graph.get_tensor_by_name(name+":0").get_shape()[-1]) for name in layers]

"""
SIMPLE_DREAMING uses a fairly naive way to visualise the different channels - gradient ascent.
"""

# Pick an internal layer to enhance. We use outputs before applying the ReLU nonlinearity to have non-zero gradients
# for features with negative initial activations
layer = "mixed4d_3x3_bottleneck_pre_relu"

# Pick a random feature channel to visualise - there are 144 in that layer
channel = 139

# Make an image of random noise
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0

# Read an image - applying simple dreaming to it doesn't really do anything just overlays the same pattern as random
# noise but very vaguely
image = PIL.Image.open("mountain.jpg")

# The objective to visualise
objective = T(layer)[:, :, :, channel]

# Render the image
render_naive(objective, img0=img_noise)

# ===========================