1. 程式人生 > >天池大賽o2o優惠券第一名程式碼解讀(3)

天池大賽o2o優惠券第一名程式碼解讀(3)

感謝大神!!!

#提取商品的特徵

#對於資料集3
merchant3 = feature3[['merchant_id','coupon_id','distance','date_received','date']]

t = merchant3[['merchant_id']]
#刪除重複行資料
t.drop_duplicates(inplace=True)

#顯示賣出的商品
t1 = merchant3[merchant3.date!='null'][['merchant_id']]
t1['total_sales'] = 1
#顯示每個商品的銷售數量
t1 = t1.groupby('merchant_id'
).agg('sum').reset_index() #顯示使用了優惠券消費的商品,正樣本 t2 = merchant3[(merchant3.date!='null')&(merchant3.coupon_id!='null')][['merchant_id']] t2['sales_use_coupon'] = 1 t2 = t2.groupby('merchant_id').agg('sum').reset_index() #顯示了商品的優惠券的總數量 t3 = merchant3[merchant3.coupon_id != 'null'][['merchant_id'
]] t3 ['total_coupon'] = 1 t3 = t3.groupby('merchant_id').agg('sum').reset_index() #顯示商品銷量和距離的關係 t4 = merchant3[(merchant3.date != 'null')&(merchant3.coupon_id != 'null')][['merchant_id','distance']] #把資料中的null值全部替換為-1 t4.replace('null',-1,inplace=True) t4.distance = t4.distance.astype('int') #再把資料中的-1全部替換為NaN
t4.replace(-1,np.nan,inplace=True) #返回使用者離商品的距離最小值 t5 = t4.groupby('merchant_id').agg('min').reset_index() t5.rename(columns={'distance':'merchant_min_distance'},inplace = True) #返回使用者離商品的距離最大值 t6 = t4.groupby('merchant_id').agg('max').reset_index() t6.rename(columns={'distance':'merchant_max_distance'},inplace = True) #print(t6) #返回距離的平均值 t7 = t4.groupby('merchant_id').agg('mean').reset_index() t7.rename(columns = {'distance':'merchant_mean_distance'},inplace= True) #返回距離的中位值 t8 = t4.groupby('merchant_id').agg('median').reset_index() t8.rename(columns={'distance':'merchant_median_distance'},inplace = True) merchant3_feature = pd.merge(t,t1,on='merchant_id',how='left') #print(merchant3_feature) merchant3_feature = pd.merge(merchant3_feature,t2,on='merchant_id',how='left') #print(merchant3_feature) merchant3_feature = pd.merge(merchant3_feature,t3,on='merchant_id',how='left') #print(merchant3_feature) merchant3_feature = pd.merge(merchant3_feature,t5,on='merchant_id',how='left') #print(merchant3_feature) merchant3_feature = pd.merge(merchant3_feature,t6,on='merchant_id',how='left') #print(merchant3_feature) merchant3_feature = pd.merge(merchant3_feature,t7,on='merchant_id',how='left') #print(merchant3_feature) merchant3_feature = pd.merge(merchant3_feature,t8,on='merchant_id',how='left') #print(merchant3_feature) #將資料中的NaN用0來替換 merchant3_feature.sales_use_coupon = merchant3_feature.sales_use_coupon.replace(np.nan,0) #即優惠券的使用率 merchant3_feature['merchant_coupon_transfer_rate'] = merchant3_feature.sales_use_coupon.astype('float')/merchant3_feature.total_coupon #即賣出商品中使用優惠券的佔比 merchant3_feature['coupon_rate'] = merchant3_feature.sales_use_coupon.astype('float') / merchant3_feature.total_sales #將資料中的NaN用0來替換 merchant3_feature.total_coupon = merchant3_feature.total_coupon.replace(np.nan,0) merchant3_feature.to_csv('data/merchant3_feature.csv',index=None)