user13254983
user13254983

Reputation:

custom loss function not differentiable

I'm trying to use custom loss functions. Here's the code structure:

Libraries: import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_probability as tfp\

from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
import string
import random
import math
import pickle\

Dataset is in the form:

https://drive.google.com/file/d/1sB8at-hZl-HXeFyFSp1Mm2Bhd3eV8ZxA/view?usp=sharing

Source code:

latent_dim = 64 #Number of Nodes in Hidden layes \
train_size = 10000 #Size of training vocablry 
# The trainin dataset has been downloaded form - https://norvig.com/ngrams/
words = pd.read_csv("./enable2.csv") #Opensource data corpus
words = list(words['aa'])
random.shuffle(words)
# Creating the list with all possible characters in english language
letters = list(string.ascii_letters)
punc = list(string.punctuation)
space = [' ','  ','   ']
letters.extend(punc)
letters.extend(space)
random.shuffle(letters)
def word_to_vector(words):
  # Creating features out of the word based on the list of characters built in previous step
  features = list()
  for word in words:
      feature = ([0]*v_size)
      for i in range(len(word)):
          w = word[i]
          feature[i] = letters.index(w)
      features.append(feature)
  return features

vector_words = word_to_vector(words)

# Splitting the vocab into training and test datasets
x_train = np.array(vector_words[:train_size])
x_train.shape
def train_nn_with_custom_loss_function(custom_loss_function, nn, lr, batch_size, nb_epochs, save_to_folder=None, plot_loss=True):
  """
    Train a neural net with custom loss function, plots the loss
    and returns the history (output of nn)

    Args:
      custom_loss_function (function): One of the customly developed loss
          functions
  """
  # reset session
  tf.keras.backend.clear_session()
  # It's  important to use run_eagerly=True in this example because the custom loss 
  # function converts tensors to numpy arrays which requires to be on eager mode
  opt = tf.keras.optimizers.Adam(learning_rate=lr)
  nn.compile(optimizer=opt, loss=custom_loss_function, run_eagerly=True)
  # nn.build(input_shape=(100,))
  nn.summary()
  history = nn.fit(x_train, x_train,
                epochs=nb_epochs, batch_size = batch_size, verbose=1,
       shuffle=True)
  
  if save_to_folder is not None:
    nn.save(save_to_folder)

  plt.plot(history.history['loss'])
  plt.ylabel('loss')
  plt.xlabel('epoch')
  # plt.set_xticks(np.arange(len(history.history['loss'])))
  plt.show()

  pred = nn.predict(x_train)
  classes = np.argmax(pred, axis=1)
  y, x = np.histogram(classes, bins=np.arange(16))
  x = x[1:]
  plt.bar(x, y)
  plt.ylabel('input')
  plt.xlabel('class')
  plt.xticks(np.arange(1, 16, step=1))

  plt.show()
  return y

Loss function:

from scipy.spatial import distance

# we don't have a y_true in unsupervised
def distance_loss_fn(y_true, y_pred):
  # calculate the centroid of every cluster
  #thresh=-1;
  #y_pred = tf.gather(y_pred, tf.where(y_pred>thresh))
  pred = tf.argmax(y_pred, axis=1).numpy()
  # centroids coordinates
  centroids = np.zeros((15, 100), dtype=float)
  # centroids frequency
  centroids_f = np.zeros(15)
  # points has the coordinates of the space of the points to cluster
  points = y_true.numpy()
  # iterate on points
  for idx in range(points.shape[0]):
    # sum point to calculate centroid later
    centroids[pred[idx]] += points[idx]
    centroids_f[pred[idx]] += 1
  # average over all points to calculate centroid
  for idx in range(len(centroids)):
    if centroids_f[idx] == 0:
      continue
    centroids[idx] /= centroids_f[idx]
  # sum over the distance between each point and the centroid to calculate the loss to minimize
  loss = 0
  for idx in range(len(points)):
    # add the distance between each coordinate and the centroid of the predicted class
    # tfa.metrics.hamming.hamming_loss_fn(y, output, threshold=0.5, mode='multilabel')
    # loss += tf.reduce_sum(centroids[pred[idx]] - points[idx])
    loss += distance.hamming(centroids[pred[idx]], points[idx])
    # loss += tfa.metrics.hamming.hamming_loss_fn(centroids[pred[idx]], 
    #                                             points[idx],
    #                                             threshold=0.5,
    #                                             mode='multilabel')
    # print('d ', distance.hamming(centroids[pred[idx]], points[idx]))
  l = tf.reduce_sum(y_pred)
  # print(loss)
  ret = tf.constant(loss, dtype=tf.float32)
  return ret

Architecture:

base_nn = tf.keras.Sequential([
  layers.Flatten(input_shape=(100,)),
  layers.Dense(128, activation='relu'),
  layers.Dropout(0.2),
  layers.Dense(15, activation='softmax')
  ])


diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)

Error I get:

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
flatten (Flatten)            (None, 100)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               12928     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 15)                1935      
=================================================================
Total params: 14,863
Trainable params: 14,863
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-134-8e360d25be14> in <module>()
      7 
      8 
----> 9 diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)

12 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py in filter_empty_gradients(grads_and_vars)
     74   if not filtered:
     75     raise ValueError("No gradients provided for any variable: %s." %
---> 76                      ([v.name for _, v in grads_and_vars],))
     77   if vars_with_empty_grads:
     78     logging.warning(

ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].

I've this loss function. Problem is the loss function is not getting differentiable Any help on how to make this loss function differentiable will be highly appreciated. A breif info about this method: This is an unsupervised nn, where input is like "get filename". I need to predict a class between 1 to 15 numbers. loss function information: Loss 3: Labels with distance

  1. Extract the predicted class from the last layer of the batch.
  2. Calculate the centroids of each class by average the sum of points of each class
  3. Sum the distances (hamming distance used) between each point and the centroid and hence the loss

Upvotes: 1

Views: 1339

Answers (1)

kacpo1
kacpo1

Reputation: 565

The problem is probably in loss function because of pred = tf.argmax(y_pred, axis=1).numpy(). This operation cannot be derivated so gradient won't flow through it. This is causing your error.

Upvotes: 1

Related Questions