本文共 23944 字,大约阅读时间需要 79 分钟。
#!/usr/bin/env python3# -*- coding: utf-8 -*-import pandas as pdimport numpy as np import matplotlib.pyplot as pltfrom sklearn.utils import shuffleimport seaborn as snsimport matplotlib.gridspec as gridspec#读取csv文件data = pd.read_csv('../dataset/creditcard.csv')# .iloc:根据标签的所在位置,从0开始计数,选取列x_train = np.array(data.iloc[:,0:29])y_train = np.array(data.iloc[:,30])
# df.head(n):查看DataFrame对象的前n行print(data.head())
Time V1 V2 V3 V4 V5 V6 V7 \0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 V8 V9 V10 V11 V12 V13 V14 \0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 V15 V16 V17 V18 V19 V20 V21 \0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 V22 V23 V24 V25 V26 V27 V28 \0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 Amount Class 0 149.62 0 1 2.69 0 2 378.66 0 3 123.50 0 4 69.99 0
# 生成描述性统计,总结数据集分布的中心趋势,分散和形状,不包括NaN值。print(data.describe())
Time V1 V2 V3 V4 \count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 94813.859575 1.759072e-12 -8.251146e-13 -9.655448e-13 8.321385e-13 std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 V5 V6 V7 V8 V9 \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 1.649983e-13 4.248434e-13 -3.054696e-13 8.777981e-14 -1.179757e-12 std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 V10 V11 V12 V13 V14 \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 7.092627e-13 1.874974e-12 1.053347e-12 7.127607e-13 -1.474787e-13 std 1.088850e+00 1.020713e+00 9.992014e-01 9.952742e-01 9.585956e-01 min -2.458826e+01 -4.797473e+00 -1.868371e+01 -5.791881e+00 -1.921433e+01 25% -5.354257e-01 -7.624942e-01 -4.055715e-01 -6.485393e-01 -4.255740e-01 50% -9.291738e-02 -3.275735e-02 1.400326e-01 -1.356806e-02 5.060132e-02 75% 4.539234e-01 7.395934e-01 6.182380e-01 6.625050e-01 4.931498e-01 max 2.374514e+01 1.201891e+01 7.848392e+00 7.126883e+00 1.052677e+01 V15 V16 V17 V18 V19 \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean -5.231430e-13 -2.282231e-13 -6.425412e-13 4.950748e-13 7.057401e-13 std 9.153160e-01 8.762529e-01 8.493371e-01 8.381762e-01 8.140405e-01 min -4.498945e+00 -1.412985e+01 -2.516280e+01 -9.498746e+00 -7.213527e+00 25% -5.828843e-01 -4.680368e-01 -4.837483e-01 -4.988498e-01 -4.562989e-01 50% 4.807155e-02 6.641332e-02 -6.567575e-02 -3.636312e-03 3.734823e-03 75% 6.488208e-01 5.232963e-01 3.996750e-01 5.008067e-01 4.589494e-01 max 8.877742e+00 1.731511e+01 9.253526e+00 5.041069e+00 5.591971e+00 V20 V21 V22 V23 V24 \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 1.766109e-12 -3.405785e-13 -5.723165e-13 -9.725860e-13 1.464148e-12 std 7.709250e-01 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 min -5.449772e+01 -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 25% -2.117214e-01 -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 50% -6.248109e-02 -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 75% 1.330408e-01 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 max 3.942090e+01 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 V25 V26 V27 V28 Amount \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 mean -6.987110e-13 -5.617884e-13 3.332082e-12 -3.518875e-12 88.349619 std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 Class count 284807.000000 mean 0.001727 std 0.041527 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000
print(data.isnull().sum())
Time 0V1 0V2 0V3 0V4 0V5 0V6 0V7 0V8 0V9 0V10 0V11 0V12 0V13 0V14 0V15 0V16 0V17 0V18 0V19 0V20 0V21 0V22 0V23 0V24 0V25 0V26 0V27 0V28 0Amount 0Class 0dtype: int64
# Time, 时间维度print("Fraud")# data.Class == 1表示选择Class列值等于1的记录print(data.Time[data.Class == 1].describe())print()print("Normal")print(data.Time[data.Class == 0].describe())print()
Fraudcount 492.000000mean 80746.806911std 47835.365138min 406.00000025% 41241.50000050% 75568.50000075% 128483.000000max 170348.000000Name: Time, dtype: float64Normalcount 284315.000000mean 94838.202258std 47484.015786min 0.00000025% 54230.00000050% 84711.00000075% 139333.000000max 172792.000000Name: Time, dtype: float64
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,4))bins = 50ax1.hist(data.Time[data.Class == 1], bins = bins)ax1.set_title('Fraud')ax2.hist(data.Time[data.Class == 0], bins = bins)ax2.set_title('Normal')plt.xlabel('Time (in Seconds)')plt.ylabel('Number of Transactions')plt.show()
图
# Amount 金额print("Fraud")print(data.Amount[data.Class == 1].describe())print()print("Normal")print(data.Amount[data.Class == 0].describe())
Fraudcount 492.000000mean 122.211321std 256.683288min 0.00000025% 1.00000050% 9.25000075% 105.890000max 2125.870000Name: Amount, dtype: float64Normalcount 284315.000000mean 88.291022std 250.105092min 0.00000025% 5.65000050% 22.00000075% 77.050000max 25691.160000Name: Amount, dtype: float64
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,4))bins = 30ax1.hist(data.Amount[data.Class == 1], bins = bins)ax1.set_title('Fraud')ax2.hist(data.Amount[data.Class == 0], bins = bins)ax2.set_title('Normal')plt.xlabel('Amount ($)')plt.ylabel('Number of Transactions')plt.yscale('log')plt.show()
data['Amount_max_fraud'] = 1data.loc[data.Amount <= 2125.87, 'Amount_max_fraud'] = 0f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,6))ax1.scatter(data.Time[data.Class == 1], data.Amount[data.Class == 1])ax1.set_title('Fraud')ax2.scatter(data.Time[data.Class == 0], data.Amount[data.Class == 0])ax2.set_title('Normal')plt.xlabel('Time (in Seconds)')plt.ylabel('Amount')plt.show()
# analysis the anonymized features.#Select only the anonymized features.v_features = data.iloc[:,1:29].columnsplt.figure(figsize=(12,28*4))gs = gridspec.GridSpec(28, 1)for i, cn in enumerate(data[v_features]): ax = plt.subplot(gs[i]) sns.distplot(data[cn][data.Class == 1], bins=50) # 看两者的形状差异 sns.distplot(data[cn][data.Class == 0], bins=50) ax.set_xlabel('') ax.set_title('histogram of feature: ' + str(cn))plt.show()
#Drop all of the features that have very similar distributions between the two types of transactions.data = data.drop(['V28','V27','V26','V25','V24','V23','V22','V20','V15','V13','V8'], axis =1)#Based on the plots above, these features are created to identify values where fraudulent transaction are more common.data['V1_'] = data.V1.map(lambda x: 1 if x < -3 else 0)data['V2_'] = data.V2.map(lambda x: 1 if x > 2.5 else 0)data['V3_'] = data.V3.map(lambda x: 1 if x < -4 else 0)data['V4_'] = data.V4.map(lambda x: 1 if x > 2.5 else 0)data['V5_'] = data.V5.map(lambda x: 1 if x < -4.5 else 0)data['V6_'] = data.V6.map(lambda x: 1 if x < -2.5 else 0)data['V7_'] = data.V7.map(lambda x: 1 if x < -3 else 0)data['V9_'] = data.V9.map(lambda x: 1 if x < -2 else 0)data['V10_'] = data.V10.map(lambda x: 1 if x < -2.5 else 0)data['V11_'] = data.V11.map(lambda x: 1 if x > 2 else 0)data['V12_'] = data.V12.map(lambda x: 1 if x < -2 else 0)data['V14_'] = data.V14.map(lambda x: 1 if x < -2.5 else 0)data['V16_'] = data.V16.map(lambda x: 1 if x < -2 else 0)data['V17_'] = data.V17.map(lambda x: 1 if x < -2 else 0)data['V18_'] = data.V18.map(lambda x: 1 if x < -2 else 0)data['V19_'] = data.V19.map(lambda x: 1 if x > 1.5 else 0)data['V21_'] = data.V21.map(lambda x: 1 if x > 0.6 else 0)print('每个单一属性的欺诈记录与整车记录的差异统计:')print(data.describe())print(data.sum())
每个单一属性的欺诈记录与整车记录的差异统计: Time V1 V2 V3 V4 \count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 94813.859575 1.759072e-12 -8.251146e-13 -9.655448e-13 8.321385e-13 std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 V5 V6 V7 V9 V10 \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 1.649983e-13 4.248434e-13 -3.054696e-13 -1.179757e-12 7.092627e-13 std 1.380247e+00 1.332271e+00 1.237094e+00 1.098632e+00 1.088850e+00 min -1.137433e+02 -2.616051e+01 -4.355724e+01 -1.343407e+01 -2.458826e+01 25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -6.430976e-01 -5.354257e-01 50% -5.433583e-02 -2.741871e-01 4.010308e-02 -5.142873e-02 -9.291738e-02 75% 6.119264e-01 3.985649e-01 5.704361e-01 5.971390e-01 4.539234e-01 max 3.480167e+01 7.330163e+01 1.205895e+02 1.559499e+01 2.374514e+01 V11 V12 V14 V16 V17 \count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 mean 1.874974e-12 1.053347e-12 -1.474787e-13 -2.282231e-13 -6.425412e-13 std 1.020713e+00 9.992014e-01 9.585956e-01 8.762529e-01 8.493371e-01 min -4.797473e+00 -1.868371e+01 -1.921433e+01 -1.412985e+01 -2.516280e+01 25% -7.624942e-01 -4.055715e-01 -4.255740e-01 -4.680368e-01 -4.837483e-01 50% -3.275735e-02 1.400326e-01 5.060132e-02 6.641332e-02 -6.567575e-02 75% 7.395934e-01 6.182380e-01 4.931498e-01 5.232963e-01 3.996750e-01 max 1.201891e+01 7.848392e+00 1.052677e+01 1.731511e+01 9.253526e+00 V18 V19 V21 Amount Class \count 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 284807.000000 mean 4.950748e-13 7.057401e-13 -3.405785e-13 88.349619 0.001727 std 8.381762e-01 8.140405e-01 7.345240e-01 250.120109 0.041527 min -9.498746e+00 -7.213527e+00 -3.483038e+01 0.000000 0.000000 25% -4.988498e-01 -4.562989e-01 -2.283949e-01 5.600000 0.000000 50% -3.636312e-03 3.734823e-03 -2.945017e-02 22.000000 0.000000 75% 5.008067e-01 4.589494e-01 1.863772e-01 77.165000 0.000000 max 5.041069e+00 5.591971e+00 2.720284e+01 25691.160000 1.000000 Amount_max_fraud V1_ V2_ V3_ \count 284807.000000 284807.000000 284807.000000 284807.000000 mean 0.002117 0.047042 0.024771 0.009838 std 0.045965 0.211730 0.155427 0.098699 min 0.000000 0.000000 0.000000 0.000000 25% 0.000000 0.000000 0.000000 0.000000 50% 0.000000 0.000000 0.000000 0.000000 75% 0.000000 0.000000 0.000000 0.000000 max 1.000000 1.000000 1.000000 1.000000 V4_ V5_ V6_ V7_ \count 284807.000000 284807.000000 284807.000000 284807.000000 mean 0.052794 0.004579 0.006274 0.010059 std 0.223622 0.067510 0.078963 0.099791 min 0.000000 0.000000 0.000000 0.000000 25% 0.000000 0.000000 0.000000 0.000000 50% 0.000000 0.000000 0.000000 0.000000 75% 0.000000 0.000000 0.000000 0.000000 max 1.000000 1.000000 1.000000 1.000000 V9_ V10_ V11_ V12_ \count 284807.000000 284807.000000 284807.000000 284807.000000 mean 0.031530 0.005049 0.018244 0.048408 std 0.174746 0.070877 0.133833 0.214628 min 0.000000 0.000000 0.000000 0.000000 25% 0.000000 0.000000 0.000000 0.000000 50% 0.000000 0.000000 0.000000 0.000000 75% 0.000000 0.000000 0.000000 0.000000 max 1.000000 1.000000 1.000000 1.000000 V14_ V16_ V17_ V18_ \count 284807.000000 284807.000000 284807.000000 284807.000000 mean 0.013697 0.021165 0.002173 0.013943 std 0.116230 0.143935 0.046569 0.117254 min 0.000000 0.000000 0.000000 0.000000 25% 0.000000 0.000000 0.000000 0.000000 50% 0.000000 0.000000 0.000000 0.000000 75% 0.000000 0.000000 0.000000 0.000000 max 1.000000 1.000000 1.000000 1.000000 V19_ V21_ count 284807.000000 284807.000000 mean 0.032952 0.041958 std 0.178512 0.200494 min 0.000000 0.000000 25% 0.000000 0.000000 50% 0.000000 0.000000 75% 0.000000 0.000000 max 1.000000 1.000000 Time 2.700365e+10V1 5.009022e-07V2 -2.350312e-07V3 -2.744665e-07V4 2.368500e-07V5 4.533991e-08V6 1.209676e-07V7 -8.687127e-08V9 -3.359903e-07V10 2.020664e-07V11 5.340173e-07V12 3.000407e-07V14 -4.247506e-08V16 -6.495627e-08V17 -1.830887e-07V18 1.412354e-07V19 2.010940e-07V21 -9.702072e-08Amount 2.516259e+07Class 4.920000e+02Amount_max_fraud 6.030000e+02V1_ 1.339800e+04V2_ 7.055000e+03V3_ 2.802000e+03V4_ 1.503600e+04V5_ 1.304000e+03V6_ 1.787000e+03V7_ 2.865000e+03V9_ 8.980000e+03V10_ 1.438000e+03V11_ 5.196000e+03V12_ 1.378700e+04V14_ 3.901000e+03V16_ 6.028000e+03V17_ 6.190000e+02V18_ 3.971000e+03V19_ 9.385000e+03V21_ 1.195000e+04dtype: float64
#Create a new feature for normal (non-fraudulent) transactions.data.loc[data.Class == 0, 'Normal'] = 1data.loc[data.Class == 1, 'Normal'] = 0#Rename 'Class' to 'Fraud'.data = data.rename(columns={'Class': 'Fraud'})#492 fraudulent transactions, 284,315 normal transactions.#0.172% of transactions were fraud. print('欺诈记录的占比:')print(data.Normal.value_counts())print()print(data.Fraud.value_counts())pd.set_option("display.max_columns",101)print(data.head())
欺诈记录的占比:1.0 2843150.0 492Name: Normal, dtype: int640 2843151 492Name: Fraud, dtype: int64 Time V1 V2 V3 V4 V5 V6 V7 \0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 V9 V10 V11 V12 V14 V16 V17 \0 0.363787 0.090794 -0.551600 -0.617801 -0.311169 -0.470401 0.207971 1 -0.255425 -0.166974 1.612727 1.065235 -0.143772 0.463917 -0.114805 2 -1.514654 0.207643 0.624501 0.066084 -0.165946 -2.890083 1.109969 3 -1.387024 -0.054952 -0.226487 0.178228 -0.287924 -1.059647 -0.684093 4 0.817739 0.753074 -0.822843 0.538196 -1.119670 -0.451449 -0.237033 V18 V19 V21 Amount Fraud Amount_max_fraud V1_ V2_ \0 0.025791 0.403993 -0.018307 149.62 0 0 0 0 1 -0.183361 -0.145783 -0.225775 2.69 0 0 0 0 2 -0.121359 -2.261857 0.247998 378.66 0 0 0 0 3 1.965775 -1.232622 -0.108300 123.50 0 0 0 0 4 -0.038195 0.803487 -0.009431 69.99 0 0 0 0 V3_ V4_ V5_ V6_ V7_ V9_ V10_ V11_ V12_ V14_ V16_ V17_ V18_ \0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 V19_ V21_ Normal 0 0 0 1.0 1 0 0 1.0 2 0 0 1.0 3 0 0 1.0 4 0 0 1.0
#Create dataframes of only Fraud and Normal transactions.Fraud = data[data.Fraud == 1]Normal = data[data.Normal == 1]# Set X_train equal to 80% of the fraudulent transactions.X_train = Fraud.sample(frac=0.8)count_Frauds = len(X_train)# Add 80% of the normal transactions to X_train.X_train = pd.concat([X_train, Normal.sample(frac = 0.8)], axis = 0)# X_test contains all the transaction not in X_train.X_test = data.loc[~data.index.isin(X_train.index)]#Shuffle the dataframes so that the training is done in a random order.X_train = shuffle(X_train)X_test = shuffle(X_test)#Add our target features to y_train and y_test.y_train = X_train.Fraudy_train = pd.concat([y_train, X_train.Normal], axis=1)y_test = X_test.Fraudy_test = pd.concat([y_test, X_test.Normal], axis=1)#Drop target features from X_train and X_test.X_train = X_train.drop(['Fraud','Normal'], axis = 1)X_test = X_test.drop(['Fraud','Normal'], axis = 1)#Check to ensure all of the training/testing dataframes are of the correct lengthprint()print('切割[学习、校验]处理后的记录数量:')print(len(X_train))print(len(y_train))print(len(X_test))print(len(y_test))
切割[学习、校验]处理后的记录数量:2278462278465696156961
'''Due to the imbalance in the data, ratio will act as an equal weighting system for our model. By dividing the number of transactions by those that are fraudulent, ratio will equal the value that when multipliedby the number of fraudulent transactions will equal the number of normal transaction. Simply put: # of fraud * ratio = # of normal'''ratio = len(X_train)/count_Frauds print()print('数据的占比:', ratio)y_train.Fraud *= ratioy_test.Fraud *= ratioprint('训练数据的数量:\n', y_train.Fraud)print('测试数据的数量:\n', y_train.Fraud)
数据的占比: 578.2893401015228训练数据的数量: 22023 0.0185560 0.0112703 0.0165996 0.0245243 0.0238885 0.039966 0.0112043 0.0171013 0.0255567 0.0283619 0.0203942 0.082908 0.0245906 0.0225464 0.013679 0.0107609 0.0140858 0.0156028 0.0158914 0.072341 0.0208184 0.0111027 0.0217998 0.0229747 0.0281186 0.0259994 0.0112170 0.0204651 0.0184758 0.0118430 0.015155 0.028982 0.0193685 0.0209645 0.0201038 0.0226108 0.0219122 0.0266437 0.045419 0.099879 0.0167812 0.0117954 0.020935 0.0238062 0.013355 0.071356 0.054123 0.095958 0.0280240 0.0271372 0.0259493 0.0149400 0.0231110 0.030784 0.0186483 0.074528 0.0187912 0.017719 0.042839 0.0Name: Fraud, Length: 227846, dtype: float64测试数据的数量: 22023 0.0185560 0.0112703 0.0165996 0.0245243 0.0238885 0.039966 0.0112043 0.0171013 0.0255567 0.0283619 0.0203942 0.082908 0.0245906 0.0225464 0.013679 0.0107609 0.0140858 0.0156028 0.0158914 0.072341 0.0208184 0.0111027 0.0217998 0.0229747 0.0281186 0.0259994 0.0112170 0.0204651 0.0184758 0.0118430 0.015155 0.028982 0.0193685 0.0209645 0.0201038 0.0226108 0.0219122 0.0266437 0.045419 0.099879 0.0167812 0.0117954 0.020935 0.0238062 0.013355 0.071356 0.054123 0.095958 0.0280240 0.0271372 0.0259493 0.0149400 0.0231110 0.030784 0.0186483 0.074528 0.0187912 0.017719 0.042839 0.0
转载地址:http://pgvab.baihongyu.com/