TensorFlow: Unimplemented: Cast string to float is not supported

Question

I have had some considerable trouble in getting my TensorFlow model to actually run on my own input data.

I am drawing images from labeled directories. I have two classes of images, "good" and "bad," which are stored in their own respective directories.

I read them in, using TensorFlow's built-in list_files(glob), and process them with strictly TensorFlow operations. However, now that I am trying to run my model, it ceases to run on the first epoch and outputs the error code: tensorflow/core/framework/op_kernel.cc:1730] OP_REQUIRES failed at cast_op.cc:123 : Unimplemented: Cast string to float is not supported

My code is as follows:

import numpy as np
import matplotlib.pyplot as plt
import pathlib

import tensorflow as tf
from tensorflow.keras import layers, models

import random

import os

class E6Classifier:
    image_size = 256
    train_test_proportion = .8
    current_working_directory = pathlib.Path.cwd()
    data_directory = current_working_directory.parent / 'Jupyter Notebooks' / 'Tensorflow' / 'e6Classifier' / 'data'
    categories = ['good', 'bad']
    good_images = []
    bad_images = []
    batch_size = 32

    total_dataset = None
    train_dataset = None
    test_dataset = None

    def __init__(self):
        self.read_images()
        self.print_statistics()

        #TensorFlow implementation
        self.make_tensorflow_dataset()
        self.train_test_split()
        self.create_model()
        self.train_model()

    def read_images(self):
        good_path = self.data_directory / self.categories[0]
        bad_path = self.data_directory / self.categories[1]
        filetypes = ('*.jpg', '*.png')

        for filetype in filetypes:
            self.good_images.extend(good_path.glob(filetype))
            self.bad_images.extend(bad_path.glob(filetype))

    def print_statistics(self):
        self.num_good_images = len(self.good_images)
        self.num_bad_images = len(self.bad_images)
        self.total_images = self.num_good_images + self.num_bad_images
        self.proportion_good = round(self.num_good_images / self.total_images * 100, 2)

        print(str(self.total_images) + ' total images | ' + str(self.num_good_images) + ' good images, ' + str(self.num_bad_images) + ' bad images | ' + str(self.proportion_good) + ' percent good to bad')

    def make_tensorflow_dataset(self):
        directory_strings = []
        for filetype in ['*.jpg', '*.png']:
            directory_strings.append(str(self.data_directory / 'good' / filetype))
            directory_strings.append(str(self.data_directory / 'bad' / filetype))

        list_dataset = tf.data.Dataset.list_files(directory_strings)

        labeled_dataset = list_dataset.map(self.process_tensor_path)

        self.total_dataset = labeled_dataset

    def process_tensor_path(self, filepath):
        label = tf.strings.split(filepath, os.sep)[-2]

        image = tf.io.read_file(filepath)
        image = tf.image.decode_image(image, channels = 3)
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize_with_pad(image, target_width = self.image_size, target_height = self.image_size)

        return image, label

    def train_test_split(self):
        num_training_images = int(round(self.total_images * self.train_test_proportion,0))

        self.total_dataset.shuffle(buffer_size = self.total_images)

        self.train_dataset = self.total_dataset.take(num_training_images)
        self.test_dataset = self.total_dataset.skip(num_training_images)

    def create_model(self):
        #Batch datasets
        self.train_dataset = self.train_dataset.batch(self.batch_size, drop_remainder = True)
        self.test_dataset = self.test_dataset.batch(self.batch_size, drop_remainder = True)
        self.total_dataset = self.total_dataset.batch(self.batch_size, drop_remainder = True)

        #Create model
        self.model = models.Sequential()
        #Add a convolutional layer to detect features in the image
        self.model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (self.image_size, self.image_size, 3)))
        #Add a pooling layer to remove sensitivity to position in the image of the feature
        self.model.add(layers.MaxPooling2D((2, 2)))
        #Repeat ad nauseum
        self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
        self.model.add(layers.MaxPooling2D(2, 2))
        self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
        self.model.add(layers.MaxPooling2D(2, 2))
        self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
        self.model.add(layers.MaxPooling2D(2, 2))
        self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))

        #Add dense layers
        self.model.add(layers.Flatten())
        self.model.add(layers.Dense(64, activation = 'relu'))
        self.model.add(layers.Dense(2))

        #Compile the model for training
        self.model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = ['accuracy'])

    def train_model(self):
        #self.train_dataset.repeat()
        #self.test_dataset.repeat()

        self.model.fit(self.train_dataset, epochs = 1, validation_data = self.test_dataset, verbose = True)

def main():
    E6Classifier()

if __name__ == '__main__':
    main()

I don't understand what is causing this error to be thrown, and there doesn't seem to be a tremendous amount of information about where the call is actually failing. I have looked at the datatypes of my TensorFlow datasets, and they indicate they are tuples of Tensors with types float32 and string, respectively.

Is this an issue with having strings for category names? If so, how would I go about replacing the category names with numbers?

Andrea Angeli · Accepted Answer

You want to classify you images into two categories: good and bad. Since training a network involves calculating a loss value (which is a numberical value) and then backpropagating it to update the weights (also numerical values), you should have numerical outputs and labels. Convert your 'good' label to eg. 1, and the 'bad' label to 0. You can do this in tensorflow (see the example below) or in you folder structure (rename the folders and modify your code accordingly).

string_label = tf.strings.split(filepath, os.sep)[-2]
label = tf.constant(1.) if tf.math.equal(string_label, tf.constant('good', dtype=tf.string)) else tf.constant(0.)

TensorFlow: Unimplemented: Cast string to float is not supported

Answers (2)

Related Questions