Roxana Slj
Roxana Slj

Reputation: 331

Random forest visualization in python

TypeError                                 Traceback (most recent call last)
<ipython-input-25-e7781de34abc> in <module>
      3                feature_names = fn,
      4                class_names=cn,
----> 5                filled = False);
      6 fig.savefig('rf_individualtree.png')

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/tree/_export.py in plot_tree(decision_tree, max_depth, feature_names, class_names, label, filled, impurity, node_ids, proportion, rotate, rounded, precision, ax, fontsize)
    174         proportion=proportion, rotate=rotate, rounded=rounded,
    175         precision=precision, fontsize=fontsize)
--> 176     return exporter.export(decision_tree, ax=ax)
    177 
    178 

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/tree/_export.py in export(self, decision_tree, ax)
    565         ax.set_axis_off()
    566         my_tree = self._make_tree(0, decision_tree.tree_,
--> 567                                   decision_tree.criterion)
    568         draw_tree = buchheim(my_tree)
    569 

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/tree/_export.py in _make_tree(self, node_id, et, criterion, depth)
    546         # traverses _tree.Tree recursively, builds intermediate
    547         # "_reingold_tilford.Tree" object
--> 548         name = self.node_to_str(et, node_id, criterion=criterion)
    549         if (et.children_left[node_id] != _tree.TREE_LEAF
    550                 and (self.max_depth is None or depth <= self.max_depth)):

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/tree/_export.py in node_to_str(self, tree, node_id, criterion)
    340                                           np.argmax(value),
    341                                           characters[2])
--> 342             node_string += class_name
    343 
    344         # Clean up any trailing newlines

TypeError: can only concatenate str (not "numpy.int64") to str
import matplotlib.pyplot as plt
import numpy as np
import PIL
import pydot
import warnings
from sklearn import tree
from glob import glob
from IPython.display import display, Image
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_graphviz
%matplotlib inline
warnings.filterwarnings("ignore")

df = pd.read_csv('heart.csv')

df.head()

x = df.loc[:, df.columns != 'target']
y = df.loc[:, 'target'].values

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state=0)


from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)


rf = RandomForestClassifier(n_estimators=100,
                            random_state=0)
rf.fit(x_train, y_train)

fn=features = list(df.columns[1:])
cn=df.target


fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=800)
tree.plot_tree(rf.estimators_[0],
               feature_names = fn, 
               class_names=cn,
               filled = False);
fig.savefig('rf_individualtree.png')


i am following this structure for visualizing my random forest graph

: https://i.sstatic.net/MkH71.png

so when i write the highlghited code i get the error TypeError: can only concatenate str (not “numpy.int64”) to str

im using the dataset from kaggle " https://www.kaggle.com/ronitf/heart-disease-uci "

would appreciate if you can help me

Upvotes: 3

Views: 2147

Answers (1)

SergioR
SergioR

Reputation: 1456

The parameter class_name in plot_tree requires a list of strings but in your code cn is a list of integers (numpy.int64 to be precise). All you need to do is convert that list to strings and problem solved.

#some code before
fn=features = list(df.columns[1:])
cn=df.target

#conversion from list of numpy.int64 to list of string
cn=[str(x) for x in cn]

fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=800)
tree.plot_tree(...
#some code after

Upvotes: 2

Related Questions