Link tot he previous post : https://statinfer.com/204-6-1-introduction-to-svm/
In this practice session we will cover all the things we discussed about simple classifiers in last post.
Practice : Simple Classifiers
- Dataset: Fraud Transaction/Transactions_sample.csv
- Draw a classification graph that shows all the classes
- Build a logistic regression classifier
- Draw the classifier on the data plot
Solution
In [1]:
#Importing the dataset:
import pandas as pd
Transactions_sample = pd.read_csv("datasets/Fraud Transaction/Transactions_sample.csv")
Transactions_sample.head(6)
Out[1]:
In [2]:
#Name of the columns
Transactions_sample.columns
Out[2]:
In [3]:
#The clasification graph distinguishing the two classes with colors or shapes.
import matplotlib.pyplot as plt
%matplotlib inline
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(Transactions_sample.Total_Amount[Transactions_sample.Fraud_id==0],Transactions_sample.Tr_Count_week[Transactions_sample.Fraud_id==0], s=10, c='b', marker="o", label='Fraud_id=0')
ax1.scatter(Transactions_sample.Total_Amount[Transactions_sample.Fraud_id==1],Transactions_sample.Tr_Count_week[Transactions_sample.Fraud_id==1], s=10, c='r', marker="+", label='Fraud_id=1')
plt.legend(loc='upper left');
plt.show()
In [4]:
#build a logistic regression model
###Logistic Regerssion model1
import statsmodels.formula.api as sm
model1 = sm.logit(formula='Fraud_id ~ Total_Amount+Tr_Count_week', data=Transactions_sample)
fitted1 = model1.fit()
fitted1.summary()
Out[4]:
In [5]:
# Getting slope and intercept of the line
#coefficients
coef=fitted1.normalized_cov_params
print(coef)
slope1=coef.Intercept[1]/(-coef.Intercept[2])
intercept1=coef.Intercept[0]/(-coef.Intercept[2])
In [8]:
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(Transactions_sample.Total_Amount[Transactions_sample.Fraud_id==0],Transactions_sample.Tr_Count_week[Transactions_sample.Fraud_id==0], s=30, c='b', marker="o", label='Fraud_id 0')
ax1.scatter(Transactions_sample.Total_Amount[Transactions_sample.Fraud_id==1],Transactions_sample.Tr_Count_week[Transactions_sample.Fraud_id==1], s=30, c='r', marker="+", label='Fraud_id 1')
plt.xlim(min(Transactions_sample.Total_Amount), max(Transactions_sample.Total_Amount))
plt.ylim(min(Transactions_sample.Tr_Count_week), max(Transactions_sample.Tr_Count_week))
plt.legend(loc='upper left');
x_min, x_max = ax1.get_xlim()
ax1.plot([0, x_max], [intercept1, x_max*slope1+intercept1])
plt.show()
In [9]:
#Accuracy of the model
#Creating the confusion matrix
predicted_values=fitted1.predict(Transactions_sample[["Total_Amount"]+["Tr_Count_week"]])
print('Predicted Values: ', predicted_values[1:10])
threshold=0.5
import numpy as np
predicted_class=np.zeros(predicted_values.shape)
predicted_class[predicted_values>threshold]=1
print('Predicted Class: ', predicted_class)
from sklearn.metrics import confusion_matrix as cm
ConfusionMatrix = cm(Transactions_sample[['Fraud_id']],predicted_class)
print('Confusion Matrix: ', ConfusionMatrix)
accuracy=(ConfusionMatrix[0,0]+ConfusionMatrix[1,1])/sum(sum(ConfusionMatrix))
print('Accuracy: ', accuracy)
error=1-accuracy
print('Error: ', error)