1. Pre-processing & EDA
data_transaction.info()
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 customerEmail 623 non-null object
1 transactionId 623 non-null object
2 orderId 623 non-null object
3 paymentMethodId 623 non-null object
4 paymentMethodRegistrationFailure 623 non-null int64
5 paymentMethodType 623 non-null object
6 paymentMethodProvider 623 non-null object
7 transactionAmount 623 non-null int64
8 transactionFailed 623 non-null int64
9 orderState 623 non-null object
dtypes: int64(3), object(7)
data_customer.info()
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 customerEmail 168 non-null object
1 customerPhone 168 non-null object
2 customerDevice 168 non-null object
3 customerIPAddress 168 non-null object
4 customerBillingAddress 168 non-null object
5 No_Transactions 168 non-null int64
6 No_Orders 168 non-null int64
7 No_Payments 168 non-null int64
8 Fraud 168 non-null bool
dtypes: bool(1), int64(3), object(5)
- check missing values using "missingno"
import matplotlib.pyplot as plt
import missingno as msno
import numpy as np
data_nan = (
data_transaction
.copy()
.mask(np.random.random(data_transaction.shape) < .1)
)
msno.matrix(df=data_nan, color=(0.1, 0.6, 0.8), figsize=(10, 6))
plt.title('Transaction data when NA exists', fontsize=20)