卷积

优质

小牛编辑

139浏览

2023-12-01

图像的卷积（Convolution）定义为

$$f(x) = act(\sum{i, j}^n \theta{(n - i)(n - j)} x_{ij}+b)$$

其计算过程为

示例1

import tensorflow as tf
import numpy as np
sess = tf.InteractiveSession()
input_batch = tf.constant([
        [  # First Input
            [[0.0], [1.0]],
            [[2.0], [3.0]]
        ],
        [  # Second Input
            [[2.0], [4.0]],
            [[6.0], [8.0]]
        ]
    ])
kernel = tf.constant([
        [
            [[1.0, 2.0]]
        ]
    ])
conv2d = tf.nn.conv2d(input_batch, kernel, strides=[1, 1, 1, 1], padding='SAME')
print sess.run(conv2d)
lower_right_image_pixel = sess.run(input_batch)[0][1][1]
lower_right_kernel_pixel = sess.run(conv2d)[0][1][1]
print lower_right_image_pixel, lower_right_kernel_pixel

output =>

[[[[  0.   0.]
   [  1.   2.]]
  [[  2.   4.]
   [  3.   6.]]]

 [[[  2.   4.]
   [  4.   8.]]
  [[  6.  12.]
   [  8.  16.]]]]
[ 3.] [ 3.  6.]

示例2

import tensorflow as tf
import numpy as np
sess = tf.InteractiveSession()
input_batch = tf.constant([
        [  # First Input (6x6x1)
            [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0]],
            [[0.1], [1.1], [2.1], [3.1], [4.1], [5.1]],
            [[0.2], [1.2], [2.2], [3.2], [4.2], [5.2]],
            [[0.3], [1.3], [2.3], [3.3], [4.3], [5.3]],
            [[0.4], [1.4], [2.4], [3.4], [4.4], [5.4]],
            [[0.5], [1.5], [2.5], [3.5], [4.5], [5.5]],
        ],
    ])
kernel = tf.constant([  # Kernel (3x3x1)
        [[[0.0]], [[0.5]], [[0.0]]],
        [[[0.0]], [[1.0]], [[0.0]]],
        [[[0.0]], [[0.5]], [[0.0]]]
    ])
# NOTE: the change in the size of the strides parameter.
conv2d = tf.nn.conv2d(input_batch, kernel, strides=[1, 3, 3, 1], padding='SAME')
sess.run(conv2d)

output =>

array([[[[ 2.20000005],
         [ 8.19999981]],
        [[ 2.79999995],
         [ 8.80000019]]]], dtype=float32)

示例3（图像卷积）

原始图片：

提取边缘：

%matplotlib inline
import matplotlib as mil
#mil.use('svg')
mil.use("nbagg")
import tensorflow as tf
import numpy as np
from matplotlib import pyplot
sess = tf.InteractiveSession()
image_filename = "n02113023_219.jpg"
filename_queue = tf.train.string_input_producer(
    tf.train.match_filenames_once(image_filename))
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file)
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
image_batch = tf.image.convert_image_dtype(tf.expand_dims(image, 0), tf.float32, saturate=False)
kernel = tf.constant([
        [
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]
        ],
        [
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 8., 0., 0.], [ 0., 8., 0.], [ 0., 0., 8.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]
        ],
        [
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]
        ]
    ])

conv2d = tf.nn.conv2d(image_batch, kernel, [1, 1, 1, 1], padding="SAME")
activation_map = sess.run(tf.minimum(tf.nn.relu(conv2d), 255))
fig = pyplot.gcf() #pyplot.figure()
ax1 = fig.add_subplot(121)
ax1.set_title("original")
ax1.set_xticks([])
ax1.set_yticks([])
ax1.imshow(image_batch[0].eval(), interpolation='nearest')
ax2 = fig.add_subplot(122)
ax2.imshow(activation_map[0], interpolation='nearest')
ax2.set_title("after")
ax2.set_xticks([])
ax2.set_yticks([])
# fig.set_size_inches(4, 4)
# fig.savefig("example-edge-detection.png")
fig.show()
filename_queue.close(cancel_pending_enqueues=True)
coord.request_stop()
coord.join(threads)

还有很多其他的卷积核https://en.wikipedia.org/wiki/Kernel_(image_processing)，比如

kernel = tf.constant([
        [
            [[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]]
        ],
        [
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 5., 0., 0.], [ 0., 5., 0.], [ 0., 0., 5.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]
        ],
        [
            [[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 0, 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]]
        ]
    ])

conv2d = tf.nn.conv2d(image_batch, kernel, [1, 1, 1, 1], padding="SAME")
activation_map = sess.run(tf.minimum(tf.nn.relu(conv2d), 255))
fig = pyplot.gcf() #pyplot.figure()
ax1 = fig.add_subplot(121)
ax1.set_title("original")
ax1.set_xticks([])
ax1.set_yticks([])
ax1.imshow(image_batch[0].eval(), interpolation='nearest')
ax2 = fig.add_subplot(122)
ax2.imshow(activation_map[0], interpolation='nearest')
ax2.set_title("after")
ax2.set_xticks([])
ax2.set_yticks([])

示例4（图像卷积）

还是图像卷积，但是使用matplotlib.pyplot读取图像

# %matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
image = plt.imread("test.jpg")
data = np.array(image).astype(np.float32)
# create session
sess = tf.InteractiveSession()
# convolution requires images in 4d: N*W*H*C
img_4d = tf.reshape(data, [1, data.shape[0], data.shape[1],data.shape[2] ])

kernel = tf.constant([
        [
            [[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]]
        ],
        [
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 5., 0., 0.], [ 0., 5., 0.], [ 0., 0., 5.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]]
        ],
        [
            [[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]],
            [[ -1., 0., 0.], [ 0., -1., 0.], [ 0., 0., -1.]],
            [[ 0, 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]]
        ]
    ])
conv2d = tf.nn.conv2d(img_4d, kernel, [1, 1, 1, 1], padding="SAME")
fig = plt.gcf() #pyplot.figure()
ax1 = fig.add_subplot(121)
ax1.set_title("original")
ax1.set_xticks([])
ax1.set_yticks([])
ax1.imshow(data.astype(np.uint8))
ax2 = fig.add_subplot(122)
ax2.imshow(conv2d.eval()[0])
ax2.set_title("conv")
ax2.set_xticks([])
ax2.set_yticks([])
fig.show()

参考文档

working with images