Reputation: 136
I am trying to implement linear regression using tensor-flow. Following is the code I am using.
import tensorflow as tf
import numpy as np
import pandas as pd
import os
rng = np.random
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# reading data from a csv file
file1 = pd.read_csv('out.csv')
x_data=file1['^GSPC']
# converting datafram into array
x_data=x_data.values
y_data=file1['FB']
#converting dataframe into array
y_data=y_data.values
n_steps = 1000 #Total number of steps
n_iterations = [] #Nth iteration value
n_loss = [] #Loss at nth iteration
learned_weight = [] #weight at nth iteration
learned_bias = [] #bias value at nth iteration
# Try to find values for W and b that compute y_data = W * x_data + b
W = tf.Variable(rng.randn())
b = tf.Variable(rng.rand())
y = W * x_data + b
# Minimize the mean squared errors.
loss=tf.reduce_sum(tf.pow(y-y_data, 2))/(2*28)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
with tf.Session() as sess:
# Before starting, initialize the variables. We will 'run' this first.
sess.run(tf.global_variables_initializer())
for step in range(n_steps):
sess.run(train)
n_iterations.append(step)
n_loss.append(loss.eval())
learned_weight.append(W.eval())
learned_bias.append(b.eval())
print("Final Weight: "+str(learned_weight[-1])+", Final Bias: "+str(learned_bias[-1]) + ", Final cost:"+str(n_loss[-1]))
The problem is every time I run the code I get different result (weights, bias and cost(loss)). I have studied from a few resources that weights, bias and cost should be approximately same in every run. Secondly, the line i.e ( y=weights*x_data+bias) does not quite fit the training data. Thirdly, I have to convert dataframe x_data and y_data to array by implementing the following
x_data=x_data.values
y_data=y_data.values
if I don’t do as shown above my code run the following error:
Traceback (most recent call last): File "python", line 33, in File "tensorflow/python/framework/fast_tensor_util.pyx", line 120, in tensorflow.python.framework.fast_tensor_util.AppendObjectArrayToTensorProto TypeError: Expected binary or unicode string, got tf.Tensor 'sub:0' shape=(28,) dtype=float32
Please help me understanding what I am doing wrong!
P.S: My questions may sound stupid because I am new to tensor flow and machine learning.
Upvotes: 0
Views: 159
Reputation: 2982
The code is implemented wrongly:
tf.Placeholders
for data that will be passed into the model.feed_dict
attribute of sess.run
to pass data to the placeholder when executing the graph.Here's an updated example:
import numpy as np
import tensorflow as tf
import numpy as np
# dataset
X_data = np.random.randn(100,3)
y_data = 2*np.sum(X_data, 1)+0.01
# reshape y to be a column vector
y_data = np.reshape(y_data, [-1, 1])
# parameters
n_steps = 1000 #Total number of steps
batch_size = 20
input_length = X_data.shape[0] # => 100
display_cost = 500
# data placeholders
X = tf.placeholder(shape=[None, 3],dtype = tf.float32)
y = tf.placeholder(shape=[None, 1],dtype = tf.float32)
# build the model
W = tf.Variable(initial_value = tf.random_normal([3,1]))
b = tf.Variable(np.random.rand())
y_fitted = tf.add(tf.matmul(X, W), b)
# Minimize the mean squared errors
loss=tf.losses.mean_squared_error(labels=y, predictions=y_fitted)
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# execute in Session
with tf.Session() as sess:
# initialize all variables
tf.global_variables_initializer().run()
# Train the model
for steps in range(n_steps):
mini_batch = zip(range(0, input_length, batch_size),
range(batch_size, input_length+1, batch_size))
# train data in mini-batches
for (start, end) in mini_batch:
sess.run(optimizer, feed_dict = {X: X_data[start:end],
y: y_data[start:end]})
# print training performance
if (steps+1) % display_cost == 0:
print('Step: {}'.format((steps+1)))
# evaluate loss function
cost = sess.run(loss, feed_dict = {X: X_data,
y: y_data})
print('Cost: {}'.format(cost))
# report rmse for training and test data
print('\nFinal Weight: {}'.format(W.eval()))
print('\nFinal Bias: {}'.format(b.eval()))
# Run 1
Step: 500
Cost: 3.1569701713918263e-11
Step: 1000
Cost: 3.1569701713918263e-11
Final Weight: [[2.0000048]
[2.0000024]
[1.9999973]]
Final Bias: 0.010000854730606079
# Run 2
Step: 500
Cost: 7.017615221566187e-12
Step: 1000
Cost: 7.017615221566187e-12
Final Weight: [[1.9999975]
[1.9999989]
[1.9999999]]
Final Bias: 0.0099998963996768
Indeed, the weight and bias are approximately the same for multiple calls to build a classifier using the same dataset. Also when doing numerical computations, Numpy ndarrays
are mostly the preferred data format hence the conversion using .values
.
Upvotes: 2