Link to the previous post: https://statinfer.com/204-4-7-problem-of-overfitting/
Solution
#We can prune the tree by changing the parameters
tree_bias = tree.DecisionTreeClassifier(criterion='gini',
splitter='best',
max_depth=10,
min_samples_split=30,
min_samples_leaf=30,
max_leaf_nodes=20)
tree_bias.fit(X_train,y_train)
#Training accuracy
tree_bias.score(X_train,y_train)
#Lets prune the tree further. Lets oversimplyfy the model
tree_bias1 = tree.DecisionTreeClassifier(criterion='gini',
splitter='random',
max_depth=1,
min_samples_split=100,
min_samples_leaf=100,
max_leaf_nodes=2)
tree_bias1.fit(X_train,y_train)
#Training Accuracy of new model
tree_bias1.score(X_train,y_train)
#Validation accuracy on test data
tree_bias1.score(X_test,y_test)