| # Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # maxlengthations under the License. |
| # ============================================================================== |
| """bincount ops.""" |
| |
| from tensorflow.python.framework import constant_op |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import ops |
| from tensorflow.python.framework import sparse_tensor |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import check_ops |
| from tensorflow.python.ops import gen_count_ops |
| from tensorflow.python.ops import gen_math_ops |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops.ragged import ragged_tensor |
| from tensorflow.python.util import deprecation |
| from tensorflow.python.util.tf_export import tf_export |
| |
| |
| @tf_export("math.bincount", v1=[]) |
| def bincount(arr, |
| weights=None, |
| minlength=None, |
| maxlength=None, |
| dtype=dtypes.int32, |
| name=None, |
| axis=None, |
| binary_output=False): |
| """Counts the number of occurrences of each value in an integer array. |
| |
| If `minlength` and `maxlength` are not given, returns a vector with length |
| `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. |
| If `weights` are non-None, then index `i` of the output stores the sum of the |
| value in `weights` at each index where the corresponding value in `arr` is |
| `i`. |
| |
| ```python |
| values = tf.constant([1,1,2,3,2,4,4,5]) |
| tf.math.bincount(values) #[0 2 2 1 2 1] |
| ``` |
| Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6 |
| will be the vector length. |
| |
| Each bin value in the output indicates number of occurrences of the particular |
| index. Here, index 1 in output has a value 2. This indicates value 1 occurs |
| two times in `values`. |
| |
| ```python |
| values = tf.constant([1,1,2,3,2,4,4,5]) |
| weights = tf.constant([1,5,0,1,0,5,4,5]) |
| tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5] |
| ``` |
| Bin will be incremented by the corresponding weight instead of 1. |
| Here, index 1 in output has a value 6. This is the summation of weights |
| corresponding to the value in `values`. |
| |
| **Bin-counting on a certain axis** |
| |
| This example takes a 2 dimensional input and returns a `Tensor` with |
| bincounting on each sample. |
| |
| >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) |
| >>> tf.math.bincount(data, axis=-1) |
| <tf.Tensor: shape=(2, 4), dtype=int32, numpy= |
| array([[1, 1, 1, 1], |
| [2, 1, 1, 0]], dtype=int32)> |
| |
| |
| **Bin-counting with binary_output** |
| |
| This example gives binary output instead of counting the occurrence. |
| |
| >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) |
| >>> tf.math.bincount(data, axis=-1, binary_output=True) |
| <tf.Tensor: shape=(2, 4), dtype=int32, numpy= |
| array([[1, 1, 1, 1], |
| [1, 1, 1, 0]], dtype=int32)> |
| |
| Args: |
| arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted. |
| These tensors must have a rank of 2 if `axis=-1`. |
| weights: If non-None, must be the same shape as arr. For each value in |
| `arr`, the bin will be incremented by the corresponding weight instead of |
| 1. |
| minlength: If given, ensures the output has length at least `minlength`, |
| padding with zeros at the end if necessary. |
| maxlength: If given, skips values in `arr` that are equal or greater than |
| `maxlength`, ensuring that the output has length at most `maxlength`. |
| dtype: If `weights` is None, determines the type of the output bins. |
| name: A name scope for the associated operations (optional). |
| axis: The axis to slice over. Axes at and below `axis` will be flattened |
| before bin counting. Currently, only `0`, and `-1` are supported. If None, |
| all axes will be flattened (identical to passing `0`). |
| binary_output: If True, this op will output 1 instead of the number of times |
| a token appears (equivalent to one_hot + reduce_any instead of one_hot + |
| reduce_add). Defaults to False. |
| |
| Returns: |
| A vector with the same dtype as `weights` or the given `dtype`. The bin |
| values. |
| |
| Raises: |
| `InvalidArgumentError` if negative values are provided as an input. |
| |
| """ |
| name = "bincount" if name is None else name |
| with ops.name_scope(name): |
| # TODO(b/255381064) Remove the following block which uses older kernels for |
| # backwards compatibility for certain cases once all tests pass with the |
| # newer (dense_bincount, ragged_bincount and sparse_bincount) kernels. |
| if ( |
| not isinstance(arr, ragged_tensor.RaggedTensor) |
| and not binary_output |
| and axis is None |
| ): |
| arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32) |
| array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 |
| output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * ( |
| math_ops.reduce_max(arr) + 1) |
| if minlength is not None: |
| minlength = ops.convert_to_tensor( |
| minlength, name="minlength", dtype=dtypes.int32) |
| output_size = gen_math_ops.maximum(minlength, output_size) |
| if maxlength is not None: |
| maxlength = ops.convert_to_tensor( |
| maxlength, name="maxlength", dtype=dtypes.int32) |
| output_size = gen_math_ops.minimum(maxlength, output_size) |
| if weights is not None: |
| weights = ops.convert_to_tensor(weights, name="weights") |
| return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) |
| weights = constant_op.constant([], dtype) |
| arr = array_ops.reshape(arr, [-1]) |
| return gen_math_ops.bincount(arr, output_size, weights) |
| |
| if not isinstance(arr, sparse_tensor.SparseTensor): |
| arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr, name="arr") |
| if weights is not None: |
| if not isinstance(weights, sparse_tensor.SparseTensor): |
| weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( |
| weights, name="weights") |
| |
| if weights is not None and binary_output: |
| raise ValueError("Arguments `binary_output` and `weights` are mutually " |
| "exclusive. Please specify only one.") |
| |
| if not arr.dtype.is_integer: |
| arr = math_ops.cast(arr, dtypes.int32) |
| if axis is None: |
| axis = 0 |
| |
| if axis not in [0, -1]: |
| raise ValueError(f"Unsupported value for argument axis={axis}. Only 0 and" |
| " -1 are currently supported.") |
| |
| array_is_nonempty = array_ops.size(arr) > 0 |
| if isinstance(arr, sparse_tensor.SparseTensor): |
| output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( |
| math_ops.reduce_max(arr.values) + 1) |
| else: |
| output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( |
| math_ops.reduce_max(arr) + 1) |
| if minlength is not None: |
| minlength = ops.convert_to_tensor( |
| minlength, name="minlength", dtype=arr.dtype) |
| output_size = gen_math_ops.maximum(minlength, output_size) |
| if maxlength is not None: |
| maxlength = ops.convert_to_tensor( |
| maxlength, name="maxlength", dtype=arr.dtype) |
| output_size = gen_math_ops.minimum(maxlength, output_size) |
| |
| if axis == 0: |
| if isinstance(arr, sparse_tensor.SparseTensor): |
| if weights is not None: |
| weights = validate_sparse_weights(arr, weights, dtype) |
| arr = arr.values |
| elif isinstance(arr, ragged_tensor.RaggedTensor): |
| # Flatten RaggedTensors with multiple ragged dimensions which use a |
| # nested RaggedTensor for the values tensor. |
| while isinstance(arr, ragged_tensor.RaggedTensor): |
| if weights is not None: |
| weights = validate_ragged_weights(arr, weights, dtype) |
| arr = arr.values |
| else: |
| if weights is not None: |
| weights = array_ops.reshape(weights, [-1]) |
| arr = array_ops.reshape(arr, [-1]) |
| |
| if isinstance(arr, sparse_tensor.SparseTensor): |
| weights = validate_sparse_weights(arr, weights, dtype) |
| return gen_math_ops.sparse_bincount( |
| indices=arr.indices, |
| values=arr.values, |
| dense_shape=arr.dense_shape, |
| size=output_size, |
| weights=weights, |
| binary_output=binary_output) |
| elif isinstance(arr, ragged_tensor.RaggedTensor): |
| weights = validate_ragged_weights(arr, weights, dtype) |
| return gen_math_ops.ragged_bincount( |
| splits=arr.row_splits, |
| values=arr.values, |
| size=output_size, |
| weights=weights, |
| binary_output=binary_output) |
| else: |
| weights = validate_dense_weights(arr, weights, dtype) |
| return gen_math_ops.dense_bincount( |
| input=arr, |
| size=output_size, |
| weights=weights, |
| binary_output=binary_output) |
| |
| |
| @tf_export(v1=["math.bincount", "bincount"]) |
| @deprecation.deprecated_endpoints("bincount") |
| def bincount_v1(arr, |
| weights=None, |
| minlength=None, |
| maxlength=None, |
| dtype=dtypes.int32): |
| """Counts the number of occurrences of each value in an integer array. |
| |
| If `minlength` and `maxlength` are not given, returns a vector with length |
| `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. |
| If `weights` are non-None, then index `i` of the output stores the sum of the |
| value in `weights` at each index where the corresponding value in `arr` is |
| `i`. |
| |
| Args: |
| arr: An int32 tensor of non-negative values. |
| weights: If non-None, must be the same shape as arr. For each value in |
| `arr`, the bin will be incremented by the corresponding weight instead of |
| 1. |
| minlength: If given, ensures the output has length at least `minlength`, |
| padding with zeros at the end if necessary. |
| maxlength: If given, skips values in `arr` that are equal or greater than |
| `maxlength`, ensuring that the output has length at most `maxlength`. |
| dtype: If `weights` is None, determines the type of the output bins. |
| |
| Returns: |
| A vector with the same dtype as `weights` or the given `dtype`. The bin |
| values. |
| """ |
| return bincount(arr, weights, minlength, maxlength, dtype) |
| |
| |
| @tf_export("sparse.bincount") |
| def sparse_bincount(values, |
| weights=None, |
| axis=0, |
| minlength=None, |
| maxlength=None, |
| binary_output=False, |
| name=None): |
| """Count the number of times an integer value appears in a tensor. |
| |
| This op takes an N-dimensional `Tensor`, `RaggedTensor`, or `SparseTensor`, |
| and returns an N-dimensional int64 SparseTensor where element |
| `[i0...i[axis], j]` contains the number of times the value `j` appears in |
| slice `[i0...i[axis], :]` of the input tensor. Currently, only N=0 and |
| N=-1 are supported. |
| |
| Args: |
| values: A Tensor, RaggedTensor, or SparseTensor whose values should be |
| counted. These tensors must have a rank of 2 if `axis=-1`. |
| weights: If non-None, must be the same shape as arr. For each value in |
| `value`, the bin will be incremented by the corresponding weight instead |
| of 1. |
| axis: The axis to slice over. Axes at and below `axis` will be flattened |
| before bin counting. Currently, only `0`, and `-1` are supported. If None, |
| all axes will be flattened (identical to passing `0`). |
| minlength: If given, ensures the output has length at least `minlength`, |
| padding with zeros at the end if necessary. |
| maxlength: If given, skips values in `values` that are equal or greater than |
| `maxlength`, ensuring that the output has length at most `maxlength`. |
| binary_output: If True, this op will output 1 instead of the number of times |
| a token appears (equivalent to one_hot + reduce_any instead of one_hot + |
| reduce_add). Defaults to False. |
| name: A name for this op. |
| |
| Returns: |
| A SparseTensor with `output.shape = values.shape[:axis] + [N]`, where `N` is |
| * `maxlength` (if set); |
| * `minlength` (if set, and `minlength > reduce_max(values)`); |
| * `0` (if `values` is empty); |
| * `reduce_max(values) + 1` otherwise. |
| |
| Raises: |
| `InvalidArgumentError` if negative values are provided as an input. |
| |
| Examples: |
| |
| **Bin-counting every item in individual batches** |
| |
| This example takes an input (which could be a Tensor, RaggedTensor, or |
| SparseTensor) and returns a SparseTensor where the value of (i,j) is the |
| number of times value j appears in batch i. |
| |
| >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) |
| >>> output = tf.sparse.bincount(data, axis=-1) |
| >>> print(output) |
| SparseTensor(indices=tf.Tensor( |
| [[ 0 10] |
| [ 0 20] |
| [ 0 30] |
| [ 1 11] |
| [ 1 101] |
| [ 1 10001]], shape=(6, 2), dtype=int64), |
| values=tf.Tensor([1 2 1 2 1 1], shape=(6,), dtype=int64), |
| dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) |
| |
| **Bin-counting with defined output shape** |
| |
| This example takes an input (which could be a Tensor, RaggedTensor, or |
| SparseTensor) and returns a SparseTensor where the value of (i,j) is the |
| number of times value j appears in batch i. However, all values of j |
| above 'maxlength' are ignored. The dense_shape of the output sparse tensor |
| is set to 'minlength'. Note that, while the input is identical to the |
| example above, the value '10001' in batch item 2 is dropped, and the |
| dense shape is [2, 500] instead of [2,10002] or [2, 102]. |
| |
| >>> minlength = maxlength = 500 |
| >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) |
| >>> output = tf.sparse.bincount( |
| ... data, axis=-1, minlength=minlength, maxlength=maxlength) |
| >>> print(output) |
| SparseTensor(indices=tf.Tensor( |
| [[ 0 10] |
| [ 0 20] |
| [ 0 30] |
| [ 1 11] |
| [ 1 101]], shape=(5, 2), dtype=int64), |
| values=tf.Tensor([1 2 1 2 1], shape=(5,), dtype=int64), |
| dense_shape=tf.Tensor([ 2 500], shape=(2,), dtype=int64)) |
| |
| **Binary bin-counting** |
| |
| This example takes an input (which could be a Tensor, RaggedTensor, or |
| SparseTensor) and returns a SparseTensor where (i,j) is 1 if the value j |
| appears in batch i at least once and is 0 otherwise. Note that, even though |
| some values (like 20 in batch 1 and 11 in batch 2) appear more than once, |
| the 'values' tensor is all 1s. |
| |
| >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) |
| >>> output = tf.sparse.bincount(data, binary_output=True, axis=-1) |
| >>> print(output) |
| SparseTensor(indices=tf.Tensor( |
| [[ 0 10] |
| [ 0 20] |
| [ 0 30] |
| [ 1 11] |
| [ 1 101] |
| [ 1 10001]], shape=(6, 2), dtype=int64), |
| values=tf.Tensor([1 1 1 1 1 1], shape=(6,), dtype=int64), |
| dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) |
| |
| **Weighted bin-counting** |
| |
| This example takes two inputs - a values tensor and a weights tensor. These |
| tensors must be identically shaped, and have the same row splits or indices |
| in the case of RaggedTensors or SparseTensors. When performing a weighted |
| count, the op will output a SparseTensor where the value of (i, j) is the |
| sum of the values in the weight tensor's batch i in the locations where |
| the values tensor has the value j. In this case, the output dtype is the |
| same as the dtype of the weights tensor. |
| |
| >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) |
| >>> weights = [[2, 0.25, 15, 0.5], [2, 17, 3, 0.9]] |
| >>> output = tf.sparse.bincount(data, weights=weights, axis=-1) |
| >>> print(output) |
| SparseTensor(indices=tf.Tensor( |
| [[ 0 10] |
| [ 0 20] |
| [ 0 30] |
| [ 1 11] |
| [ 1 101] |
| [ 1 10001]], shape=(6, 2), dtype=int64), |
| values=tf.Tensor([2. 0.75 15. 5. 17. 0.9], shape=(6,), dtype=float32), |
| dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) |
| |
| """ |
| with ops.name_scope(name, "count", [values, weights]): |
| if not isinstance(values, sparse_tensor.SparseTensor): |
| values = ragged_tensor.convert_to_tensor_or_ragged_tensor( |
| values, name="values") |
| if weights is not None: |
| if not isinstance(weights, sparse_tensor.SparseTensor): |
| weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( |
| weights, name="weights") |
| |
| if weights is not None and binary_output: |
| raise ValueError("Arguments `binary_output` and `weights` are mutually " |
| "exclusive. Please specify only one.") |
| |
| if axis is None: |
| axis = 0 |
| |
| if axis not in [0, -1]: |
| raise ValueError(f"Unsupported value for argument axis={axis}. Only 0 and" |
| " -1 are currently supported.") |
| |
| minlength_value = minlength if minlength is not None else -1 |
| maxlength_value = maxlength if maxlength is not None else -1 |
| |
| if axis == 0: |
| if isinstance(values, sparse_tensor.SparseTensor): |
| if weights is not None: |
| weights = validate_sparse_weights(values, weights) |
| values = values.values |
| elif isinstance(values, ragged_tensor.RaggedTensor): |
| if weights is not None: |
| weights = validate_ragged_weights(values, weights) |
| values = values.values |
| else: |
| if weights is not None: |
| weights = array_ops.reshape(weights, [-1]) |
| values = array_ops.reshape(values, [-1]) |
| |
| if isinstance(values, sparse_tensor.SparseTensor): |
| weights = validate_sparse_weights(values, weights) |
| c_ind, c_val, c_shape = gen_count_ops.sparse_count_sparse_output( |
| values.indices, |
| values.values, |
| values.dense_shape, |
| weights, |
| minlength=minlength_value, |
| maxlength=maxlength_value, |
| binary_output=binary_output) |
| elif isinstance(values, ragged_tensor.RaggedTensor): |
| weights = validate_ragged_weights(values, weights) |
| c_ind, c_val, c_shape = gen_count_ops.ragged_count_sparse_output( |
| values.row_splits, |
| values.values, |
| weights, |
| minlength=minlength_value, |
| maxlength=maxlength_value, |
| binary_output=binary_output) |
| else: |
| weights = validate_dense_weights(values, weights) |
| c_ind, c_val, c_shape = gen_count_ops.dense_count_sparse_output( |
| values, |
| weights=weights, |
| minlength=minlength_value, |
| maxlength=maxlength_value, |
| binary_output=binary_output) |
| |
| return sparse_tensor.SparseTensor(c_ind, c_val, c_shape) |
| |
| |
| def validate_dense_weights(values, weights, dtype=None): |
| """Validates the passed weight tensor or creates an empty one.""" |
| if weights is None: |
| if dtype: |
| return array_ops.constant([], dtype=dtype) |
| return array_ops.constant([], dtype=values.dtype) |
| |
| if not isinstance(weights, ops.Tensor): |
| raise ValueError( |
| "Argument `weights` must be a tf.Tensor if `values` is a tf.Tensor. " |
| f"Received weights={weights} of type: {type(weights).__name__}") |
| |
| return weights |
| |
| |
| def validate_sparse_weights(values, weights, dtype=None): |
| """Validates the passed weight tensor or creates an empty one.""" |
| if weights is None: |
| if dtype: |
| return array_ops.constant([], dtype=dtype) |
| return array_ops.constant([], dtype=values.values.dtype) |
| |
| if not isinstance(weights, sparse_tensor.SparseTensor): |
| raise ValueError( |
| "Argument `weights` must be a SparseTensor if `values` is a " |
| f"SparseTensor. Received weights={weights} of type: " |
| f"{type(weights).__name__}") |
| |
| checks = [] |
| if weights.dense_shape is not values.dense_shape: |
| checks.append( |
| check_ops.assert_equal( |
| weights.dense_shape, |
| values.dense_shape, |
| message="'weights' and 'values' must have the same dense shape.")) |
| if weights.indices is not values.indices: |
| checks.append( |
| check_ops.assert_equal( |
| weights.indices, |
| values.indices, |
| message="'weights' and 'values' must have the same indices.") |
| ) |
| if checks: |
| with ops.control_dependencies(checks): |
| weights = array_ops.identity(weights.values) |
| else: |
| weights = weights.values |
| |
| return weights |
| |
| |
| def validate_ragged_weights(values, weights, dtype=None): |
| """Validates the passed weight tensor or creates an empty one.""" |
| if weights is None: |
| if dtype: |
| return array_ops.constant([], dtype=dtype) |
| return array_ops.constant([], dtype=values.values.dtype) |
| |
| if not isinstance(weights, ragged_tensor.RaggedTensor): |
| raise ValueError( |
| "`weights` must be a RaggedTensor if `values` is a RaggedTensor. " |
| f"Received argument weights={weights} of type: " |
| f"{type(weights).__name__}.") |
| |
| checks = [] |
| if weights.row_splits is not values.row_splits: |
| checks.append( |
| check_ops.assert_equal( |
| weights.row_splits, |
| values.row_splits, |
| message="'weights' and 'values' must have the same row splits.")) |
| if checks: |
| with ops.control_dependencies(checks): |
| weights = array_ops.identity(weights.values) |
| else: |
| weights = weights.values |
| |
| return weights |