blob: 08c7fc531acedec440132cd6c27f8e72be269c30 [file] [log] [blame]
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains Gradient functions for image ops."""
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import array_ops_stack
from tensorflow.python.ops import gen_image_ops
from tensorflow.python.ops import math_ops
@ops.RegisterGradient("ResizeNearestNeighbor")
def _ResizeNearestNeighborGrad(op, grad):
"""The derivatives for nearest neighbor resizing.
Args:
op: The ResizeNearestNeighbor op.
grad: The tensor representing the gradient w.r.t. the output.
Returns:
The gradients w.r.t. the input and the output.
"""
image = op.inputs[0]
if image.get_shape()[1:3].is_fully_defined():
image_shape = image.get_shape()[1:3]
else:
image_shape = array_ops.shape(image)[1:3]
grads = gen_image_ops.resize_nearest_neighbor_grad(
grad,
image_shape,
align_corners=op.get_attr("align_corners"),
half_pixel_centers=op.get_attr("half_pixel_centers"))
return [grads, None]
@ops.RegisterGradient("ResizeBilinear")
def _ResizeBilinearGrad(op, grad):
"""The derivatives for bilinear resizing.
Args:
op: The ResizeBilinear op.
grad: The tensor representing the gradient w.r.t. the output.
Returns:
The gradients w.r.t. the input.
"""
grad0 = gen_image_ops.resize_bilinear_grad(
grad,
op.inputs[0],
align_corners=op.get_attr("align_corners"),
half_pixel_centers=op.get_attr("half_pixel_centers"))
return [grad0, None]
@ops.RegisterGradient("ScaleAndTranslate")
def _ScaleAndTranslateGrad(op, grad):
"""The derivatives for ScaleAndTranslate transformation op.
Args:
op: The ScaleAndTranslate op.
grad: The tensor representing the gradient w.r.t. the output.
Returns:
The gradients w.r.t. the input.
"""
grad0 = gen_image_ops.scale_and_translate_grad(
grad,
op.inputs[0],
op.inputs[2],
op.inputs[3],
kernel_type=op.get_attr("kernel_type"),
antialias=op.get_attr("antialias"))
return [grad0, None, None, None]
@ops.RegisterGradient("ResizeBicubic")
def _ResizeBicubicGrad(op, grad):
"""The derivatives for bicubic resizing.
Args:
op: The ResizeBicubic op.
grad: The tensor representing the gradient w.r.t. the output.
Returns:
The gradients w.r.t. the input.
"""
allowed_types = [dtypes.float32, dtypes.float64]
grad0 = None
if op.inputs[0].dtype in allowed_types:
grad0 = gen_image_ops.resize_bicubic_grad(
grad,
op.inputs[0],
align_corners=op.get_attr("align_corners"),
half_pixel_centers=op.get_attr("half_pixel_centers"))
return [grad0, None]
@ops.RegisterGradient("CropAndResize")
def _CropAndResizeGrad(op, grad):
"""The derivatives for crop_and_resize.
We back-propagate to the image only when the input image tensor has floating
point dtype but we always back-propagate to the input boxes tensor.
Args:
op: The CropAndResize op.
grad: The tensor representing the gradient w.r.t. the output.
Returns:
The gradients w.r.t. the input image, boxes, as well as the always-None
gradients w.r.t. box_ind and crop_size.
"""
image = op.inputs[0]
if image.get_shape().is_fully_defined():
image_shape = image.get_shape().as_list()
else:
image_shape = array_ops.shape(image)
allowed_types = [dtypes.float16, dtypes.float32, dtypes.float64]
if op.inputs[0].dtype in allowed_types:
# pylint: disable=protected-access
grad0 = gen_image_ops.crop_and_resize_grad_image(
grad, op.inputs[1], op.inputs[2], image_shape, T=op.get_attr("T"),
method=op.get_attr("method"))
# pylint: enable=protected-access
else:
grad0 = None
# `grad0` is the gradient to the input image pixels and it
# has been implemented for nearest neighbor and bilinear sampling
# respectively. `grad1` is the gradient to the input crop boxes' coordinates.
# When using nearest neighbor sampling, the gradient to crop boxes'
# coordinates are not well defined. In practice, we still approximate
# grad1 using the gradient derived from bilinear sampling.
grad1 = gen_image_ops.crop_and_resize_grad_boxes(
grad, op.inputs[0], op.inputs[1], op.inputs[2])
return [grad0, grad1, None, None]
def _CustomReciprocal(x):
"""Wrapper function around `math_ops.div_no_nan()` to perform a "safe" reciprocal incase the input is zero. Avoids divide by zero and NaNs.
Input:
x -> input tensor to be reciprocat-ed.
Returns:
x_reciprocal -> reciprocal of x without NaNs.
"""
return math_ops.div_no_nan(math_ops.cast(1.0, x.dtype), x)
@ops.RegisterGradient("RGBToHSV")
def _RGBToHSVGrad(op, grad):
"""The gradients for `rgb_to_hsv` operation.
This function is a piecewise continuous function as defined here:
https://en.wikipedia.org/wiki/HSL_and_HSV#From_RGB
We perform the multivariate derivative and compute all partial derivatives
separately before adding them in the end. Formulas are given before each
partial derivative calculation.
Args:
op: The `rgb_to_hsv` `Operation` that we are differentiating.
grad: Gradient with respect to the output of the `rgb_to_hsv` op.
Returns:
Gradients with respect to the input of `rgb_to_hsv`.
"""
# Input Channels
reds = op.inputs[0][..., 0]
greens = op.inputs[0][..., 1]
blues = op.inputs[0][..., 2]
# Output Channels
saturation = op.outputs[0][..., 1]
value = op.outputs[0][..., 2]
dtype = op.inputs[0].dtype
# Mask/Indicator for max and min values of each pixel.
# Arbitrary assignment in case of tie breakers with R>G>B.
# Max values
red_biggest = math_ops.cast((reds >= blues) & \
(reds >= greens), dtype)
green_biggest = math_ops.cast((greens > reds) & \
(greens >= blues), dtype)
blue_biggest = math_ops.cast((blues > reds) & \
(blues > greens), dtype)
# Min values
red_smallest = math_ops.cast((reds < blues) & \
(reds < greens), dtype)
green_smallest = math_ops.cast((greens <= reds) & \
(greens < blues), dtype)
blue_smallest = math_ops.cast((blues <= reds) & \
(blues <= greens), dtype)
# Derivatives of R, G, B wrt Value slice
dv_dr = red_biggest
dv_dg = green_biggest
dv_db = blue_biggest
# Derivatives of R, G, B wrt Saturation slice
# The first term in the addition is the case when the corresponding color
# from (r,g,b) was "MAX"
# -> derivative = MIN/square(MAX), MIN could be one of the other two colors
# The second term is the case when the corresponding color from
# (r,g,b) was "MIN"
# -> derivative = -1/MAX, MAX could be one of the other two colours.
ds_dr = math_ops.cast(reds > 0, dtype) * math_ops.add(
red_biggest * math_ops.add(green_smallest * greens, blue_smallest * blues)
* _CustomReciprocal(math_ops.square(reds)), red_smallest * -1 *
_CustomReciprocal((green_biggest * greens) + (blue_biggest * blues)))
ds_dg = math_ops.cast(greens > 0, dtype) * math_ops.add(
green_biggest * math_ops.add(red_smallest * reds, blue_smallest * blues) *
_CustomReciprocal(math_ops.square(greens)), green_smallest * -1 *
_CustomReciprocal((red_biggest * reds) + (blue_biggest * blues)))
ds_db = math_ops.cast(blues > 0, dtype) * math_ops.add(
blue_biggest * math_ops.add(green_smallest * greens, red_smallest * reds)
* _CustomReciprocal(math_ops.square(blues)), blue_smallest * -1 *
_CustomReciprocal((green_biggest * greens) + (red_biggest * reds)))
# Derivatives of R, G, B wrt Hue slice
# Need to go case by case for each color.
# for red, dh_dr -> dh_dr_1 + dh_dr_2 + dh_dr_3 + dh_dr_4 + dh_dr_5
# dh_dr_1 ->
# if red was MAX, then derivative = 60 * -1 * (G-B)/square(MAX-MIN) == 60 *\
# -1 * (greens-blues) * reciprocal(square(saturation)) * \
# reciprocal(square(value))
# elif green was MAX, there are two subcases
# ie when red was MIN and when red was NOT MIN
# dh_dr_2 ->
# if red was MIN (use UV rule) -> 60 * ((1 * -1/(MAX-MIN)) +\
# (B-R)*(-1/square(MAX-MIN) * -1)) == 60 * (blues - greens) *\
# reciprocal(square(reds - greens))
# dh_dr_3 ->
# if red was NOT MIN -> 60 * -1/MAX-MIN == -60 * reciprocal(greens-blues)
# elif blue was MAX, there are two subcases
# dh_dr_4 ->
# if red was MIN (similarly use the UV rule) -> 60 * (blues - greens) *\
# reciprocal(square(blues - reds))
# dh_dr_5 ->
# if red was NOT MIN -> 60 * 1/MAX-MIN == 60 * reciprocal(blues-greens)
dh_dr_1 = 60 * (
math_ops.cast(reds > 0, dtype) * red_biggest * -1 *
(greens - blues) * _CustomReciprocal(math_ops.square(saturation)) *
_CustomReciprocal(math_ops.square(value)))
dh_dr_2 = 60 * (
math_ops.cast(greens > 0, dtype) * green_biggest * red_smallest *
(blues - greens) * _CustomReciprocal(math_ops.square(reds - greens)))
dh_dr_3 = 60 * (
math_ops.cast(greens > 0, dtype) * green_biggest * blue_smallest * -1 *
_CustomReciprocal(greens - blues))
dh_dr_4 = 60 * (
math_ops.cast(blues > 0, dtype) * blue_biggest * red_smallest *
(blues - greens) * _CustomReciprocal(math_ops.square(blues - reds)))
dh_dr_5 = 60 * (
math_ops.cast(blues > 0, dtype) * blue_biggest * green_smallest *
_CustomReciprocal(blues - greens))
dh_dr = dh_dr_1 + dh_dr_2 + dh_dr_3 + dh_dr_4 + dh_dr_5
# Converting from degrees to [0,1] scale as specified in
# https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_hsv
dh_dr = dh_dr / 360
# for green, dh_dg -> dh_dg_1 + dh_dg_2 + dh_dg_3 + dh_dg_4 + dh_dg_5
# dh_dg_1 ->
# if green was MAX, then derivative = 60 * -1 * (B-R)/square(MAX-MIN) == 60 *\
# -1 * (blues - reds) * reciprocal(square(saturation)) * \
# reciprocal(square(value))
# elif red was MAX, there are two subcases ie
# when green was MIN and when green was NOT MIN
# dh_dg_2 ->
# if green was MIN (use UV rule) -> 60 * ((1 * 1/(MAX-MIN)) + \
# (greens-blues) * (-1/square(MAX-MIN) * -1)) == 60 * \
# ((reciprocal(reds-greens) + (greens-blues) * \
# reciprocal(square(reds-greens))))
# dh_dg_3 ->
# if green was NOT MIN -> 60 * 1/MAX-MIN == 60 * reciprocal(reds - blues)
# elif blue was MAX, there are two subcases
# dh_dg_4 ->
# if green was MIN (similarly use the UV rule) -> 60 * -1 * \
# (reciprocal(blues - greens) + (reds-greens)* -1 * \
# reciprocal(square(blues-greens)))
# dh_dr_5 ->
# if green was NOT MIN -> 60 * -1/MAX-MIN == -60 * reciprocal(blues - reds)
dh_dg_1 = 60 * (
math_ops.cast(greens > 0, dtype) * green_biggest * -1 *
(blues - reds) * _CustomReciprocal(math_ops.square(saturation)) *
_CustomReciprocal(math_ops.square(value)))
dh_dg_2 = 60 * (
math_ops.cast(reds > 0, dtype) * red_biggest * green_smallest *
(reds - blues) * _CustomReciprocal(math_ops.square(reds - greens)))
dh_dg_3 = 60 * (
math_ops.cast(reds > 0, dtype) * red_biggest * blue_smallest *
_CustomReciprocal(reds - blues))
dh_dg_4 = 60 * (
math_ops.cast(blues > 0, dtype) * blue_biggest * green_smallest *
(reds - blues) * _CustomReciprocal(math_ops.square(blues - greens)))
dh_dg_5 = 60 * (
math_ops.cast(blues > 0, dtype) * blue_biggest * red_smallest * -1 *
_CustomReciprocal(blues - reds))
dh_dg = dh_dg_1 + dh_dg_2 + dh_dg_3 + dh_dg_4 + dh_dg_5
# Converting from degrees to [0,1] scale as specified in
# https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_hsv
dh_dg = dh_dg / 360
# for blue, dh_db -> dh_db_1 + dh_db_2 + dh_db_3 + dh_db_4 + dh_db_5
# dh_db_1 ->
# if blue was MAX, then derivative = 60 * -1 * (R-G)/square(MAX-MIN) == 60 *\
# -1 * reciprocal(square(saturation)) * reciprocal(square(value))
# elif red was MAX, there are two subcases
# ie when blue was MIN and when blue was NOT MIN
# dh_dg_2 ->
# if blue was MIN (use UV rule) -> 60 * ((1 * -1/(MAX-MIN)) + \
# (greens-blues) * (-1/square(MAX-MIN) * -1)) == 60 * (greens - reds) *\
# reciprocal(square(reds - blues))
# dh_dg_3 ->
# if blue was NOT MIN -> 60 * -1/MAX-MIN == 60 * -1 * \
# reciprocal(reds - greens)
# elif green was MAX, there are two subcases
# dh_dg_4 ->
# if blue was MIN (similarly use the UV rule) -> 60 * -1 * \
# (reciprocal(greens - blues) + (blues - reds) * -1 * \
# reciprocal(square(greens - blues)))
# dh_dr_5 ->
# if blue was NOT MIN -> 60 * 1/MAX-MIN == 60 * reciprocal(greens - reds)
dh_db_1 = 60 * (
math_ops.cast(blues > 0, dtype) * blue_biggest * -1 *
(reds - greens) * _CustomReciprocal(math_ops.square(saturation)) *
_CustomReciprocal(math_ops.square(value)))
dh_db_2 = 60 * (
math_ops.cast(reds > 0, dtype) * red_biggest * blue_smallest *
(greens - reds) * _CustomReciprocal(math_ops.square(reds - blues)))
dh_db_3 = 60 * (
math_ops.cast(reds > 0, dtype) * red_biggest * green_smallest * -1 *
_CustomReciprocal(reds - greens))
dh_db_4 = 60 * (
math_ops.cast(greens > 0, dtype) * green_biggest * blue_smallest *
(greens - reds) * _CustomReciprocal(math_ops.square(greens - blues)))
dh_db_5 = 60 * (
math_ops.cast(greens > 0, dtype) * green_biggest * red_smallest *
_CustomReciprocal(greens - reds))
dh_db = dh_db_1 + dh_db_2 + dh_db_3 + dh_db_4 + dh_db_5
# Converting from degrees to [0,1] scale as specified in
# https://www.tensorflow.org/api_docs/python/tf/image/rgb_to_hsv
dh_db = dh_db / 360
# Gradients wrt to inputs
dv_drgb = array_ops_stack.stack(
[grad[..., 2] * dv_dr, grad[..., 2] * dv_dg, grad[..., 2] * dv_db],
axis=-1)
ds_drgb = array_ops_stack.stack(
[grad[..., 1] * ds_dr, grad[..., 1] * ds_dg, grad[..., 1] * ds_db],
axis=-1)
dh_drgb = array_ops_stack.stack(
[grad[..., 0] * dh_dr, grad[..., 0] * dh_dg, grad[..., 0] * dh_db],
axis=-1)
gradient_input = math_ops.add(math_ops.add(dv_drgb, ds_drgb), dh_drgb)
return gradient_input