Reputation: 41
Here are my complete Code and Traceback. This is the starter code for my ml model. There are going to be a lot of additions to it.
import pandas as pd
import tensorflow as tf
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics
from IPython import display
from tensorflow.python.data import Dataset
tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 5
pd.options.display.float_format = '{:.1f}'.format
housing_data = pd.read_csv("train.csv")
housing_data = housing_data.reindex(
np.random.permutation(housing_data.index))
housing_data = pd.get_dummies(
housing_data).dropna()
Preprocessing the features. More work is to be done here.
def preprocess_features(housing_data):
selected_features = housing_data
selected_features = selected_features.drop(columns = "SalePrice")
processed_features = selected_features.copy()
return processed_features
def preprocess_target(housing_data):
output_target = pd.DataFrame()
output_target["SalePrice"] = (housing_data.SalePrice / 1000.0)
return output_target
training_examples = preprocess_features(housing_data.head(900))
training_targets = preprocess_target(housing_data.head(900))
validation_examples = preprocess_features(housing_data.tail(221))
validation_targets = preprocess_target(housing_data.tail(221))
def construct_feature_columns(input_features):
'''
Returns the set of feature columns for tf.estimator classifiers and regressors
'''
return set([tf.feature_column.numeric_column(my_feature) for my_feature in input_features])
def my_input_fn(features, targets, batch_size = 1, shuffle = True, num_epochs = None):
#convert the pandas dataframe into a numpy array
features = {key:np.array(value) for key,value in dict(features).items()}
#create the dataset
ds = Dataset.from_tensor_slices((features,targets))
ds = ds.batch(batch_size).repeat(num_epochs)
#shuffle the data
if shuffle:
ds = ds.shuffle(1000)
#return the features and targets tuple for next iteration
features,labels=
ds.make_one_shot_iterator().get_next()
return features,labels
The Linear Classifier
def train_linear_classifier_model(
learning_rate,
regularization_strength,
steps,
batch_size,
training_examples,
training_targets,
validation_examples,
validation_targets
):
periods = 10
steps_per_period = steps / periods
my_optimizer = tf.train.FtrlOptimizer(learning_rate=learning_rate, l1_regularization_strength=regularization_strength)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
linear_classifier = tf.estimator.LinearClassifier(
feature_columns=construct_feature_columns(training_examples),
optimizer=my_optimizer
)
training_input_fn = lambda: my_input_fn(training_examples,
training_targets["SalePrice"],
batch_size=batch_size)
predict_training_input_fn = lambda: my_input_fn(training_examples,
training_targets["SalePrice"],
num_epochs=1,
shuffle=False)
predict_validation_input_fn = lambda: my_input_fn(validation_examples,
validation_targets["SalePrice"],
num_epochs=1,
shuffle=False)
print("Training model...")
print("LogLoss (on validation data):")
training_log_losses = []
validation_log_losses = []
for period in range (0, periods):
linear_classifier.train(
input_fn=training_input_fn,
steps=steps_per_period
)
# Take a break and compute predictions.
training_probabilities = linear_classifier.predict(input_fn=predict_training_input_fn)
training_probabilities = np.array([item['probabilities'] for item in training_probabilities])
validation_probabilities = linear_classifier.predict(input_fn=predict_validation_input_fn)
validation_probabilities = np.array([item['probabilities'] for item in validation_probabilities])
# Compute training and validation loss.
training_log_loss = metrics.log_loss(training_targets, training_probabilities)
validation_log_loss = metrics.log_loss(validation_targets, validation_probabilities)
# Occasionally print the current loss.
print(" period %02d : %0.2f" % (period, validation_log_loss))
# Add the loss metrics from this period to our list.
training_log_losses.append(training_log_loss)
validation_log_losses.append(validation_log_loss)
print("Model training finished.")
# Output a graph of loss metrics over periods.
plt.ylabel("LogLoss")
plt.xlabel("Periods")
plt.title("LogLoss vs. Periods")
plt.tight_layout()
plt.plot(training_log_losses, label="training")
plt.plot(validation_log_losses, label="validation")
plt.legend()
return linear_classifier
linear_classifier = train_linear_classifier_model(
learning_rate=0.1,
regularization_strength=0.1,
steps=300,
batch_size=100,
training_examples=training_examples,
training_targets=training_targets,
validation_examples=validation_examples,
validation_targets = validation_targets)
And here is my Traceback
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-75-f9d203769761> in <module>()
7 training_targets=training_targets,
8 validation_examples=validation_examples,
----> 9 validation_targets = validation_targets)
<ipython-input-74-e1dbd56d9615> in train_linear_classifier_model(learning_rate, regularization_strength, steps, batch_size, training_examples, training_targets, validation_examples, validation_targets)
40 linear_classifier.train(
41 input_fn=training_input_fn,
---> 42 steps=steps_per_period
43 )
44 # Take a break and compute predictions.
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\estimator\estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
350
351 saving_listeners = _check_listeners_type(saving_listeners)
--> 352 loss = self._train_model(input_fn, hooks, saving_listeners)
353 logging.info('Loss for final step: %s.', loss)
354 return self
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\estimator\estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
810 worker_hooks.extend(input_hooks)
811 estimator_spec = self._call_model_fn(
--> 812 features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
813
814 if self._warm_start_settings:
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\estimator\estimator.py in _call_model_fn(self, features, labels, mode, config)
791
792 logging.info('Calling model_fn.')
--> 793 model_fn_results = self._model_fn(features=features, **kwargs)
794 logging.info('Done calling model_fn.')
795
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\estimator\canned\linear.py in _model_fn(features, labels, mode, config)
314 optimizer=optimizer,
315 partitioner=partitioner,
--> 316 config=config)
317
318 super(LinearClassifier, self).__init__(
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\estimator\canned\linear.py in _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config)
155 logit_fn = _linear_logit_fn_builder(
156 units=head.logits_dimension, feature_columns=feature_columns)
--> 157 logits = logit_fn(features=features)
158
159 def _train_op_fn(loss):
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\estimator\canned\linear.py in linear_logit_fn(features)
96 feature_columns=feature_columns,
97 units=units,
---> 98 cols_to_vars=cols_to_vars)
99 bias = cols_to_vars.pop('bias')
100 if units > 1:
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\feature_column\feature_column.py in linear_model(features, feature_columns, units, sparse_combiner, weight_collections, trainable, cols_to_vars)
422 for column in sorted(feature_columns, key=lambda x: x.name):
423 with variable_scope.variable_scope(
--> 424 None, default_name=column._var_scope_name): # pylint: disable=protected-access
425 ordered_columns.append(column)
426 weighted_sum = _create_weighted_sum(
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\ops\variable_scope.py in __enter__(self)
1901
1902 try:
-> 1903 return self._enter_scope_uncached()
1904 except:
1905 if self._graph_context_manager is not None:
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\ops\variable_scope.py in _enter_scope_uncached(self)
2001 self._default_name)
2002 try:
-> 2003 current_name_scope_name = current_name_scope.__enter__()
2004 except:
2005 current_name_scope.__exit__(*sys.exc_info())
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\ops.py in __enter__(self)
5619 try:
5620 self._name_scope = g.name_scope(self._name)
-> 5621 return self._name_scope.__enter__()
5622 except:
5623 self._g_manager.__exit__(*sys.exc_info())
c:\users\user\appdata\local\programs\python\python35\lib\contextlib.py in __enter__(self)
57 def __enter__(self):
58 try:
---> 59 return next(self.gen)
60 except StopIteration:
61 raise RuntimeError("generator didn't yield") from None
c:\users\user\appdata\local\programs\python\python35\lib\site-packages\tensorflow\python\framework\ops.py in name_scope(self, name)
3942 # (viz. '-', '\', '/', and '_').
3943 if not _VALID_SCOPE_NAME_REGEX.match(name):
-> 3944 raise ValueError("'%s' is not a valid scope name" % name)
3945 else:
3946 # Scopes created in the root must match the more restrictive
ValueError: 'Exterior1st_Wd Sdng' is not a valid scope name
I am not able to understand what the term "Exterior1st_Wd Sdng" means because I don't have any variable named like that. Thanks in advance!
Upvotes: 3
Views: 8611
Reputation: 6096
I was trying to figure out exactly what scope names are allowed in Tensorflow and this page was the first result, so I'll take the opportunity to post the answer here for anyone else with the same query.
At the time of writing, allowable scope names appear to be restricted by this line in the Tensorflow source:
_VALID_SCOPE_NAME_REGEX = re.compile("^[A-Za-z0-9_.\\-/>]*$")
So in other words, scope names can contain letters (upper and lowercase), numbers, and _
, .
, \
, -
, /
,>
. (Notably, they can't include whitespace.)
Hope someone finds this helpful.
Upvotes: 6
Reputation: 4757
Probably the regexp _VALID_SCOPE_NAME_REGEX
doesn't like the names of the dataset columns. Can you check the names of the columns in the housing_data
pandas dataset? Do they contains symbols '-', '\', '/', or '_'? If yes, can you get rid of them and retry?
Upvotes: 1
Reputation: 69
I am not sure this is related to your error, but I've never seen feature columns as a set, normally they are a list.
I think the construct_feature_columns
function should just return [tf.feature_column.numeric_column(my_feature) for my_feature in input_features]
Let me know if this works!!
Upvotes: 1