ChoWonYang
ChoWonYang

Reputation: 11

PyTorch vs Tensorflow gives different results

I am implementing the "perceptual loss" function. But, PyTorch vs Tensorflow gives different results. I used the same images. Please, let me know why.

TensorFlow

class FeatureExtractor(tf.keras.Model):
    def __init__(self, n_layers):
            super(FeatureExtractor, self).__init__()
            extractor = tf.keras.applications.VGG16(weights="imagenet",
                          include_top=False,input_shape=(256, 256, 3))
            extractor.trainable = True

            #features = [extractor.layers[i].output for i in n_layers]
            features = [extractor.get_layer(i).output for i in n_layers]
            self.extractor = tf.keras.models.Model(extractor.inputs,features)
  
    def call(self, x):
         return self.extractor(x)

def loss_function(generated_image, target_image,
                            feature_extractor):
         MSE = tf.keras.losses.MeanSquaredError()
         mse_loss = MSE(generated_image, target_image) 

         real_features = feature_extractor(target_image)
         generated_features = feature_extractor(generated_image)
         perceptual_loss = 0

         for i in range(len(real_features)):
             loss = MSE(real_features[i], generated_features[i]) 
             print(loss)
             perceptual_loss += loss
         return mse_loss, perceptual_loss

Run:

feature_extractor = FeatureExtractor(n_layers=["block1_conv1","block1_conv2", 
  "block3_conv2","block4_conv2"])

mse_loss, perceptual_loss = loss_function(image1, image2,
                                          feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss+perceptual_loss}")

It gives:

output:
tf.Tensor(0.0014001362, shape=(), dtype=float32)
tf.Tensor(0.030578917, shape=(), dtype=float32)
tf.Tensor(2.6163354, shape=(), dtype=float32)
tf.Tensor(0.842701, shape=(), dtype=float32)
0.002584027126431465 3.4910154342651367 3.4935994148254395

Pytorch

class FeatureExtractor(torch.nn.Module):
       def __init__(self, n_layers):
            super(FeatureExtractor, self).__init__()
            extractor = models.vgg16(pretrained=True).features            
            index = 0
            self.layers = nn.ModuleList([])

            for i in range(len(n_layers)):
                self.layers.append(torch.nn.Sequential())
                for j in range(index, n_layers[i] + 1):
                    self.layers[i].add_module(str(j), extractor[j])
                index = n_layers[i] + 1

            for param in self.parameters():
                param.requires_grad = False

    def forward(self, x):
            result = []

            for i in range(len(self.layers)):
                x = self.layers[i](x)
                result.append(x)
            return result

def loss_function(generated_image, target_image, feature_extractor):
        MSE = nn.MSELoss(reduction='mean')
        mse_loss = MSE(generated_image, target_image) 
        real_features = feature_extractor(target_image)
        generated_features = feature_extractor(generated_image)
        perceptual_loss = 0

        for i in range(len(real_features)):
             loss = MSE(real_features[i], generated_features[i])
             perceptual_loss += loss
             print(loss)

        return mse_loss, perceptual_loss

Run:

feature_extractor = FeatureExtractor(n_layers=[1, 3, 13, 20]).to(device)
mse_loss, perceptual_loss = loss_function(image1, image2,
                                          feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss+perceptual_loss}")

It gives:

output:
tensor(0.0003)
tensor(0.0029)
tensor(0.2467)
tensor(0.2311)
0.002584027359262109 0.4810013473033905 0.483585387468338

Upvotes: 1

Views: 1464

Answers (1)

FancyXun
FancyXun

Reputation: 1298

Although they are the same models, the parameters of final model may be different because of different initialization parameters. For different frameworks like keras and pytorch, it's different to preprocess the input images before training. So the tenor value is different after processing even if they are same images. The following code is an example that could help understand.

from abc import ABC

import torch
import numpy as np
import tensorflow as tf

from torch import nn
from PIL import Image
from torch.autograd import Variable
import torchvision.models as models
import torchvision.transforms as transforms
from keras.preprocessing.image import load_img
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import img_to_array

# 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg'
IMG_URL1 = ' the local path of 1200px-Cat03.jpeg'
# 'https://upload.wikimedia.org/wikipedia/commons/b/bb/Kittyply_edit1.jpg'
IMG_URL2 = 'the local path of Kittyply_edit1.jpg'

# preprocess in keras
image1_tf = load_img(IMG_URL1, target_size=(224, 224))
image1_tf = img_to_array(image1_tf)
image1_tf = image1_tf.reshape((1, image1_tf.shape[0], image1_tf.shape[1], image1_tf.shape[2]))
image1_tf = preprocess_input(image1_tf)

image2_tf = load_img(IMG_URL2, target_size=(224, 224))
image2_tf = img_to_array(image2_tf)
image2_tf = image2_tf.reshape((1, image2_tf.shape[0], image2_tf.shape[1], image2_tf.shape[2]))
image2_tf = preprocess_input(image2_tf)


# preprocess in pytorch
image1_torch = Image.open(IMG_URL1)
image2_torch = Image.open(IMG_URL2)
image1_torch = image1_torch.resize((224, 224))
image2_torch = image2_torch.resize((224, 224))

min_img_size = 224
transform_pipeline = transforms.Compose([transforms.Resize(min_img_size),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                              std=[0.229, 0.224, 0.225])])
image1_torch = transform_pipeline(image1_torch)
image2_torch = transform_pipeline(image2_torch)
image1_torch = image1_torch.unsqueeze(0)
image2_torch = image2_torch.unsqueeze(0)
image1_torch = Variable(image1_torch)
image2_torch = Variable(image2_torch)


class FeatureExtractor(tf.keras.Model, ABC):
    def __init__(self, n_layers):
        super(FeatureExtractor, self).__init__()

        extractor = tf.keras.applications.VGG16(weights="imagenet", input_shape=(224, 224, 3))
        extractor.trainable = True
        features = [extractor.get_layer(i).output for i in n_layers]
        self.extractor = tf.keras.models.Model(extractor.inputs, features)

    def call(self, x):
        return self.extractor(x)


def loss_function(generated_image, target_image, feature_extractor):
    MSE = tf.keras.losses.MeanSquaredError()
    mse_loss = MSE(generated_image, target_image)

    real_features = feature_extractor(target_image)
    generated_features = feature_extractor(generated_image)
    
    print("tf prediction:", np.argmax(generated_features[-1].numpy()[0]))
    print("tf prediction:", np.argmax(real_features[-1].numpy()[0]))

    perceptual_loss = 0

    for i in range(len(real_features[:-1])):
        loss = MSE(real_features[i], generated_features[i])
        print(loss)
        perceptual_loss += loss

    return mse_loss, perceptual_loss


feature_extractor = FeatureExtractor(n_layers=["block1_conv1", "block1_conv2", "block3_conv2",
                                               "block4_conv2", "predictions"])
print("tensorflow: ")
mse_loss, perceptual_loss = loss_function(image1_tf, image2_tf, feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss + perceptual_loss}")


class FeatureExtractor1(torch.nn.Module):

    def __init__(self, n_layers):
        super(FeatureExtractor1, self).__init__()
        self.vgg = models.vgg16(pretrained=True)
        extractor = self.vgg.features
        index = 0
        self.layers = nn.ModuleList([])
        for i in range(len(n_layers)):

            self.layers.append(torch.nn.Sequential())
            for j in range(index, n_layers[i] + 1):
                self.layers[i].add_module(str(j), extractor[j])
            index = n_layers[i] + 1

        for param in self.parameters():
            param.requires_grad = False

    def forward(self, x):
        result = []
        predict = self.vgg(x)
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            result.append(x)
        result.append(predict)
        return result


def loss_function1(generated_image, target_image, feature_extractor):
    MSE = nn.MSELoss(reduction='mean')
    mse_loss = MSE(generated_image, target_image)

    real_features = feature_extractor(target_image)
    generated_features = feature_extractor(generated_image)

    print("torch prediction:", np.argmax(generated_features[-1].numpy()[0]))
    print("torch prediction:", np.argmax(real_features[-1].numpy()[0]))
    perceptual_loss = 0

    for i in range(len(real_features[:-1])):
        loss = MSE(real_features[i], generated_features[i])
        perceptual_loss += loss
        print(loss)

    return mse_loss, perceptual_loss


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
feature_extractor = FeatureExtractor1(n_layers=[1, 3, 13, 20]).to(device)
print("pytorch: ")
mse_loss, perceptual_loss = loss_function1(image1_torch, image2_torch, feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss + perceptual_loss}")

In addition, the training goal of the model is accuracy of classification, so the difference results between feature maps in the middle of network would make sense.

Upvotes: 1

Related Questions