1. 程式人生 > >jagbiam1000的專欄

jagbiam1000的專欄

X=data.loc[:,data.columns!='Class'] #loc 通過行標籤索引資料,
# print(X)
y=data.loc[:,data.columns=='Class'] #取label
# print(y)
number_records_fraud=len(data[data.Class==1]) #Class=1的數量
# print(number_records_fraud)
fraud_indices=np.array(data[data.Class==1].index) #取得其索引值
# print(fraud_indices)
normal_indices=np.array(data[data.Class==0
].index) # 把class為0的資料索引拿到手 # print(normal_indices) random_normal_indices=np.random.choice(normal_indices,number_records_fraud,replace=False) # 隨機取樣,並不對原始dataframe進行替換 random_normal_indices=np.array(random_normal_indices) # 轉換成numpy的array格式轉換成矩陣 # print(random_normal_indices) under_sample_indices=np.concatenate([fraud_indices,random_normal_indices
]) # 將兩組索引資料連線成性的資料索引 # print(under_sample_indices) under_sample_data = data.iloc[under_sample_indices,:] #定位到真正資料,iloc通過行號索引行資料 # print(under_sample_data) X_undersample=under_sample_data.loc[:,under_sample_data.columns!='Class'] y_undersample=under_sample_data.loc[:,under_sample_data.columns=='Class'
] print(X_undersample) print(y_undersample) print("Percentage of normal transactions: ", len(under_sample_data[under_sample_data.Class == 0])/len(under_sample_data)) print("Percentage of fraud transactions: ", len(under_sample_data[under_sample_data.Class == 1])/len(under_sample_data)) print("Total number of transactions in resampled data: ", len(under_sample_data))