Reputation: 1056

numpy random choice in Tensorflow

Is there an equivalent function to numpy random choice in Tensorflow. In numpy we can get an item randomly from the given list with its weights.

 np.random.choice([1,2,3,5], 1, p=[0.1, 0, 0.3, 0.6, 0])

This code will select an item from the given list with p weights.

Upvotes: 29

Answers (10)

user3303504

Reputation: 555

Spotted this as I was trying to solve a similar problem but needed more flexibility than the answers here provided.

import tensorflow as tf


def select_indices_with_replacement(probabilities, num_indices):
    # Convert the probabilities to a tensor and normalize them, so they sum to 1
    probabilities = tf.constant(probabilities, dtype=tf.float32)
    probabilities /= tf.reduce_sum(probabilities)

    # Flatten the probability tensor so it has a single dimension
    shape = tf.constant([1, tf.reduce_prod(probabilities.get_shape()).numpy()])
    flat_probs = tf.reshape(probabilities, shape)

    # Use the categorical distribution function in TensorFlow to sample indices
    # based on the probabilities
    index = tf.random.categorical(tf.math.log(flat_probs), num_indices, dtype=tf.int32)

    indices = tf.unravel_index(index[0], probabilities.shape)
    return indices


def select_indices_no_replacement(probabilities, num_indices):
    # Convert the probabilities to a tensor and normalize them, so they sum to 1
    probabilities = tf.constant(probabilities, dtype=tf.float32)
    probabilities /= tf.reduce_sum(probabilities)

    # Flatten the probability tensor so it has a single dimension
    shape = tf.constant([1, tf.reduce_prod(probabilities.get_shape()).numpy()])
    flat_probs = tf.reshape(probabilities, shape)

    # Create a tensor of the same shape as the probability tensor, but with all
    # elements set to False
    selected = tf.zeros_like(probabilities, dtype=tf.bool)

    # Use a loop to sample indices without replacement
    indices = []
    for _ in range(num_indices):
        # Use the categorical distribution function to sample an index based on
        # the remaining probabilities
        index = tf.random.categorical(tf.math.log(flat_probs), 1, dtype=tf.int32)
        index = index[0, 0]

        # Convert the flat index to a tuple of indices for the original ND tensor
        indices.append(tf.unravel_index(index, probabilities.shape))
        # indices[-1] = indices[-1].numpy()  # Comment out to leave wrapped in tensorflow

        # Set the probability of the selected index to 0 to ensure it is not
        # selected again
        flat_probs = tf.tensor_scatter_nd_update(flat_probs, [[0, index]], [0.0])

        # Set the selected element to True
        selected = tf.tensor_scatter_nd_update(selected, [indices[-1]], [True])

    indices = tf.transpose(indices)
    return indices


def select_indices(probabilities, num_indices, replace=True):
    if replace:
        return select_indices_with_replacement(probabilities, num_indices)
    else:
        return select_indices_no_replacement(probabilities, num_indices)


def main():
    # Example usage
    probabilities = [[[0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1]]]
    num_indices = 8
    indices = select_indices(probabilities, num_indices, replace=False)
    print(indices)


if __name__ == "__main__":
    main()

When replace=False:

tf.Tensor(
[[0 0 0 0 0 0 0 0]
 [1 1 0 0 0 1 0 1]
 [2 0 1 2 3 1 0 3]], shape=(3, 8), dtype=int32)

When replace=True:

tf.Tensor(
[[0 0 0 0 0 0 0 0]
 [1 0 0 1 1 0 1 0]
 [3 3 3 2 0 3 3 1]], shape=(3, 8), dtype=int32)

==EDIT==

If you don't need to worry about custom distributions and are happy with a simple uniform distribution, then you can also use the following without needing to generate a probabilities matrix (just its dimensions):

def select_indices_uniform(dims, num_indices, unique=False):
    # Create a tensor of probabilities
    size = tf.reduce_prod(tf.constant(dims, dtype=tf.int32)).numpy()

    # Use the uniform_candidate_sampler function to sample indices
    samples = tf.random.log_uniform_candidate_sampler(
            true_classes=[[0]], num_true=1, num_sampled=num_indices, unique=unique, range_max=size)

    # Extract the data and return it
    indices = tf.unravel_index(samples[0], dims)
    return indices

Upvotes: 0

Ajinkya Pol

Reputation: 1

you can achieve the same result as follows

g = tf.random.Generator.from_seed(123) l = [1, 2, 3] print(l[tf.squeeze(g.uniform(shape= [1], minval=0, maxval=3, dtype=tf.dtypes.int32))])

Upvotes: 0

mon

Reputation: 22244

Here is side-by-side comparision of np.random.choce and tf.random.categorical with examples.

N = np.random.choice([0,1,2,3,4], 5000, p=[i/sum(range(1,6)) for i in range(1,6)])
plt.hist(N, density=True, bins=5)
plt.grid()

T = tf.random.categorical(tf.math.log([[i/sum(range(1,6)) for i in range(1,6)]]), 5000)
# T = tf.random.categorical([[i/sum(range(1,6)) for i in range(1,6)]], 1000)
plt.hist(T, density=True, bins=5)
plt.grid()

Here is another way to achieve this.

    def random_choice(a, size):
        """Random choice from 'a' based on size without duplicates
        Args:
            a: Tensor
            size: int or shape as a tuple of ints e.g., (m, n, k).
        Returns: Tensor of the shape specified with 'size' arg.

        Examples:
            X = tf.constant([[1,2,3],[4,5,6]])
            random_choice(X, (2,1,2)).numpy()
            -----
            [
              [
                [5 4]
              ],
              [
                [1 2]
              ]
            ]
        """
        if isinstance(size, int) or np.issubdtype(type(a), np.integer) or (tf.is_tensor(a) and a.shape == () and a.dtype.is_integer):
            shape = (size,)
        elif isinstance(size, tuple) and len(size) > 0:
            shape = size
        else:
            raise AssertionError(f"Unexpected size arg {size}")

        sample_size = tf.math.reduce_prod(size, axis=None)
        assert sample_size > 0

        # --------------------------------------------------------------------------------
        # Select elements from a flat array
        # --------------------------------------------------------------------------------
        a = tf.reshape(a, (-1))
        length = tf.size(a)
        assert sample_size <= length

        # --------------------------------------------------------------------------------
        # Shuffle a sequential numbers (0, ..., length-1) and take size.
        # To select 'sample_size' elements from a 1D array of shape (length,),
        # TF Indices needs to have the shape (sample_size,1) where each index
        # has shape (1,),
        # --------------------------------------------------------------------------------
        indices = tf.reshape(
            tensor=tf.random.shuffle(tf.range(0, length, dtype=tf.int32))[:sample_size],
            shape=(-1, 1)   # Convert to the shape:(sample_size,1)
        )
        return tf.reshape(tensor=tf.gather_nd(a, indices), shape=shape)

X = tf.constant([[1,2,3],[4,5,6]])
print(random_choice(X, (2,2,1)).numpy())
---
[[[5]
  [4]]

 [[2]
  [1]]]

Upvotes: 4

t.okuda

Reputation: 41

Very late to the party too,　I found the simplest solution.

#sample source matrix
M = tf.constant(np.arange(4*5).reshape(4,5))
N_samples = 2
tf.gather(M, tf.cast(tf.random.uniform([N_samples])*M.shape[0], tf.int32), axis=0)

Upvotes: 3

Arvind

Reputation: 318

In tensorflow 2.0 tf.compat.v1.multinomial is deprecated instead use tf.random.categorical

Upvotes: 11

Tim Hargreaves

Reputation: 332

Very late to the party but I will add another solution since the existing tf.multinomial approach takes up a lot of temporary memory and so can't be used for large inputs. Here is the method I use (for TF 2.0):

# Sampling k members of 1D tensor a using weights w
cum_dist = tf.math.cumsum(w)
cum_dist /= cum_dist[-1]  # to account for floating point errors
unif_samp = tf.random.uniform((k,), 0, 1)
idxs = tf.searchsorted(cum_dist, unif_samp)
samp = tf.gather(a, idxs)  # samp contains the k weighted samples

Upvotes: 2

PaulG

Reputation: 3491

My team and I had the same problem with the requirement of keeping all operations as tensorflow ops and implementing a 'without replacement' version.

Solution:

def tf_random_choice_no_replacement_v1(one_dim_input, num_indices_to_drop=3):

    input_length = tf.shape(one_dim_input)[0]

    # create uniform distribution over the sequence
    # for tf.__version__<1.11 use tf.random_uniform - no underscore in function name
    uniform_distribution = tf.random.uniform(
        shape=[input_length],
        minval=0,
        maxval=None,
        dtype=tf.float32,
        seed=None,
        name=None
    )

    # grab the indices of the greatest num_words_to_drop values from the distibution
    _, indices_to_keep = tf.nn.top_k(uniform_distribution, input_length - num_indices_to_drop)
    sorted_indices_to_keep = tf.contrib.framework.sort(indices_to_keep)

    # gather indices from the input array using the filtered actual array
    result = tf.gather(one_dim_input, sorted_indices_to_keep)
    return result

The idea behind this code is to produce a random uniform distribution with a dimensionality that is equal to the dimension of the vector over which you'd like to perform the choice selection. Since the distribution will produce a sequence of numbers that will be unique and able to be ranked, you can take the indices of the top k positions, and use those as your choices. Since the position of the top k will be as random as the uniform distribution, it equates to performing a random choice without replacement.

This can perform the choice operation on any 1-d sequence in tensorflow.

Upvotes: 6

Jiabo He

Reputation: 1

There is no need to do random choice in tf. you can just directly do it using np.random.choice(data, p=probs), tf can accept that.

Upvotes: -10

twink_ml

Reputation: 512

If instead of sampling random elements from a 1-dimensional Tensor, you want to randomly sample rows from an n-dimensional Tensor, you can combine tf.multinomial and tf.gather.

def _random_choice(inputs, n_samples):
    """
    With replacement.
    Params:
      inputs (Tensor): Shape [n_states, n_features]
      n_samples (int): The number of random samples to take.
    Returns:
      sampled_inputs (Tensor): Shape [n_samples, n_features]
    """
    # (1, n_states) since multinomial requires 2D logits.
    uniform_log_prob = tf.expand_dims(tf.zeros(tf.shape(inputs)[0]), 0)

    ind = tf.multinomial(uniform_log_prob, n_samples)
    ind = tf.squeeze(ind, 0, name="random_choice_ind")  # (n_samples,)

    return tf.gather(inputs, ind, name="random_choice")

Upvotes: 2

sygi

Reputation: 4637

No, but you can achieve the same result using tf.multinomial:

elems = tf.convert_to_tensor([1,2,3,5])
samples = tf.multinomial(tf.log([[1, 0, 0.3, 0.6]]), 1) # note log-prob
elems[tf.cast(samples[0][0], tf.int32)].eval()
Out: 1
elems[tf.cast(samples[0][0], tf.int32)].eval()
Out: 5

The [0][0] part is here, as multinomial expects a row of unnormalized log-probabilities for each element of the batch and also has another dimension for the number of samples.

Upvotes: 24

numpy random choice in Tensorflow

Answers (10)

Related Questions