1. 程式人生 > >天池大賽o2o優惠券第一名程式碼解讀(2)

天池大賽o2o優惠券第一名程式碼解讀(2)

感謝!!!!

#提取優惠券的相關特徵

def calc_discount_rate(s):
    s = str(s)
    s = s.split(':')
    if len(s) == 1:
        return float(s[0])
    else:
        return 1.0-float(s[1])/float(s[0])
def get_discount_man(s):
    s = str(s)
    s = s.split(':')
    if len(s)==1:
        return 'null'
    else:
        return
int(s[0]) def get_discount_jian(s): s = str(s) s = s.split(':') if len(s) == 1: return 'null' else: return int(s[1]) def is_man_jian(s): s = str(s) s = s.split(':') if len(s)==1: return 0 else: return 1 #對於資料集3 #將時間轉化為第幾周 print(dataset3) #顯示時間是第幾周
dataset3['day_of_week'] = dataset3.date_received.astype('str').apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1) #顯示時間是幾月 dataset3['day_of_month'] = dataset3.date_received.astype('str').apply(lambda x:int(x[6:8])) #顯示時期和截止日之間的天數 dataset3['days_distance'] = dataset3.date_received.astype('str'
).apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,6,30)).days) #顯示滿了多少錢後開始減 dataset3['discount_man'] = dataset3.discount_rate.apply(get_discount_man) #顯示滿減的減少的錢 dataset3['discount_jian'] = dataset3.discount_rate.apply(get_discount_jian) #返回優惠券是否是滿減券 dataset3['is_man_jian'] = dataset3.discount_rate.apply(is_man_jian) #顯示打折力度 dataset3['discount_rate'] = dataset3.discount_rate.apply(calc_discount_rate) d = dataset3[['coupon_id']] d['coupon_count'] = 1 #顯示每一種優惠券的數量 d = d.groupby('coupon_id').agg('sum').reset_index() dataset3 = pd.merge(dataset3,d,on='coupon_id',how='left') print(dataset3) dataset3.to_csv('data/coupon3_feature.csv',index=None) #對於資料集2 dataset2['day_of_week'] = dataset2.date_received.astype('str').apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1) dataset2['day_of_month'] = dataset2.date_received.astype('str').apply(lambda x:int(x[6:8])) dataset2['days_distance'] = dataset2.date_received.astype('str').apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,5,14)).days) dataset2['discount_man'] = dataset2.discount_rate.apply(get_discount_man) dataset2['discount_jian'] = dataset2.discount_rate.apply(get_discount_jian) dataset2['is_man_jian'] = dataset2.discount_rate.apply(is_man_jian) dataset2['discount_rate'] = dataset2.discount_rate.apply(calc_discount_rate) d = dataset2[['coupon_id']] d['coupon_count'] = 1 d = d.groupby('coupon_id').agg('sum').reset_index() dataset2 = pd.merge(dataset2,d,on='coupon_id',how='left') dataset2.to_csv('data/coupon2_feature.csv',index=None) #對於資料集1 dataset1['day_of_week'] = dataset1.date_received.apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1) dataset1['day_of_month'] = dataset1.date_received.apply(lambda x:int(x[6:8])) dataset1['days_distance'] = dataset1.date_received.apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,4,13)).days) dataset1['discount_man'] = dataset1.discount_rate.apply(get_discount_man) dataset1['discount_jian'] = dataset1.discount_rate.apply(get_discount_jian) dataset1['is_man_jian'] = dataset1.discount_rate.apply(is_man_jian) dataset1['discount_rate'] = dataset1.discount_rate.apply(calc_discount_rate) d = dataset1[['coupon_id']] d['coupon_count'] = 1 d = d.groupby('coupon_id').agg('sum').reset_index() dataset1 = pd.merge(dataset1,d,on='coupon_id',how='left') dataset1.to_csv('data/coupon1_feature.csv',index=None)