1. 程式人生 > >Sql行轉列的兩個小例子

Sql行轉列的兩個小例子

SQL行轉列

所謂行轉列就是將某一個category型別的列(nx1的矩陣,實質是有m類),目標是將多行轉換成為多列(新增m列)。

  1. 例1:模擬順豐SQL小例子
##順豐面試題(SQLite編碼實現題目)
import sqlite3
conn=sqlite3.connect('C:/Users/Administrator/Kaggle_in/SQLite/country_state.sqlite')
curs=conn.cursor()
curs.execute('create table waybill_constype(Consign_day int,waybill_no int,Class varchar(20),Userid varchar(20))')
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180401,755200201,'檔案','A001')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180403,755200202,'娛樂','A001')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180405,755200203,'食品','A001')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180407,755299911,'檔案','A001')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180401,755200204,'娛樂','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180401,755200205,'檔案','')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180401,755200207,'食品','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180411,755288819,'檔案','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180401,755200247,'食品','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180401,755200241,'檔案','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180403,755200232,'娛樂','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180405,755200233,'食品','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180507,755299931,'檔案','A004')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200234,'娛樂','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200235,'檔案','')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200237,'食品','A004')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180511,755288829,'檔案','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200217,'食品','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190401,755200234,'娛樂','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190401,755200235,'檔案','')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190401,755200237,'食品','A004')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190411,755288829,'檔案','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190421,755200217,'食品','A002')")

curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180405,755200233,'衣物','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180507,755299931,'託運','A004')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200234,'衣物','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200235,'託運','A001')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200237,'食品','A004')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180511,755288829,'衣物','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20180501,755200217,'託運','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190401,755200234,'衣物','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190401,755200235,'託運','A003')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190401,755200237,'食品','A004')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190411,755288829,'檔案','A002')")
curs.execute("insert into waybill_constype(Consign_day,waybill_no,Class,Userid) values(20190421,755200217,'託運','A002')")

curs.execute("SELECT * FROM waybill_constype")
data2=curs.fetchall()
print(data2)

結果:

[(20180401, 755200201, '檔案', 'A001'), (20180403, 755200202, '娛樂', 'A001'), 
(20180405, 755200203, '食品', 'A001'), (20180407, 755299911, '檔案', 'A001'),
(20180401, 755200204, '娛樂', 'A003'), (20180401, 755200205, '檔案', ''),
(20180401, 755200207, '食品', 'A002'), (20180411, 755288819, '檔案', 'A002'),
(20180401, 755200207, '食品', 'A002'), (20180411, 755288819, '檔案', 'A002'), 
(20180401, 755200247, '食品', 'A002'), (20180401, 755200241, '檔案', 'A003'),
(20180403, 755200232, '娛樂', 'A003'), (20180405, 755200233, '食品', 'A003'), 
(20180507, 755299931, '檔案', 'A004'), (20180501, 755200234, '娛樂', 'A003'), 
(20180501, 755200235, '檔案', ''), (20180501, 755200237, '食品', 'A004'), 
(20180511, 755288829, '檔案', 'A002'), (20180501, 755200217, '食品', 'A002'), 
(20190401, 755200234, '娛樂', 'A003'), (20190401, 755200235, '檔案', ''),
(20190401, 755200237, '食品', 'A004'), (20190411, 755288829, '檔案', 'A002'), 
(20190421, 755200217, '食品', 'A002'), (20180405, 755200233, '衣物', 'A003'), 
(20180507, 755299931, '託運', 'A004'), (20180501, 755200234, '衣物', 'A003'), 
(20180501, 755200235, '託運', 'A001'), (20180501, 755200237, '食品', 'A004'), 
(20180511, 755288829, '衣物', 'A002'), (20180501, 755200217, '託運', 'A002'), 
(20190401, 755200234, '衣物', 'A003'), (20190401, 755200235, '託運', 'A003'),
(20190401, 755200237, '食品', 'A004'), (20190411, 755288829, '檔案', 'A002'),
 (20190421, 755200217, '託運', 'A002')]
#1.1.1選出各個會員在攬收的文化、娛樂、食品的件數攬件型別:
curs.execute("SELECT Userid,count(Userid)  FROM waybill_constype group by Userid")
data3=curs.fetchall()

curs.execute("SELECT Userid,Class,count(Userid)  FROM waybill_constype group by Userid,Class")
data3=curs.fetchall()
print(data3)

結果

[('', '檔案', 3), ('A001', '娛樂', 1), ('A001', '託運', 1), ('A001', '檔案', 2), 
('A001', '食品', 1), ('A002', '託運', 2), ('A002', '檔案', 5), ('A002', '衣物', 1),
 ('A002', '食品', 5), ('A003', '娛樂', 4), ('A003', '託運', 1), ('A003', '檔案', 1),
  ('A003', '衣物', 3), ('A003', '食品', 1), ('A004', '託運', 1), ('A004', '檔案', 1),
   ('A004', '食品', 4)]
##1.1.2選出各個會員在month=4月份(加區間)的攬收的文化、娛樂、食品的件數攬件型別:
curs.execute("SELECT Userid,Class,count(Userid)  FROM waybill_constype 
where  Consign_day between 20180401 and 20180430 
group by Userid,Class")
data3=curs.fetchall()
print(data3)

結果

[('', '檔案', 1),
('A001', '娛樂', 1),
('A001', '檔案', 2),
('A001', '食品', 1),
('A002', '檔案', 2), 
('A002', '食品', 3), 
('A003', '娛樂', 2),
('A003', '檔案', 1),
('A003', '衣物', 1),
('A003', '食品', 1)]

=華麗的分割線==
想要將上面的結果做行轉列

#1.1.4.行列轉換
#1.1.4.1.先處理成行轉列前的資料形式 as target:
#(SELECT Userid,Class,count(Userid) as total FROM waybill_constype \
#where  Consign_day between 20180401 and 20180430  \
#group by Userid,Class)
#1.1.4.2.將:target表中的目標物件列Class從多行(nx1)轉變為成為多列(1xm,m為category的類別數):
#因為Class中主要是:'娛樂','檔案','食品',m=5===>容易推出m=100時這裡寫100句sum語句即可;m=12表示月份需要些12句sum語句。
curs.execute("SELECT Userid, \
sum(CASE WHEN Class='娛樂' THEN total ELSE 0 END) AS '娛樂', \
sum(CASE WHEN Class='檔案' THEN total ELSE 0 END) AS '檔案', \
sum(CASE WHEN Class='食品' THEN total ELSE 0 END) AS '食品',\
sum(CASE WHEN Class='衣物' THEN total ELSE 0 END) AS '衣物',\
sum(CASE WHEN Class='託運' THEN total ELSE 0 END) AS '託運'\
from (SELECT Userid,Class,count(Userid) as total FROM waybill_constype \
where  Consign_day/100%10==4  \
group by Userid,Class)\
group by Userid") #會自動按UserID升序來排列
data3=curs.fetchall()
print(data3)

結果

Userid,娛樂,檔案,食品,衣物,託運
[('', 0, 2, 0, 0, 0), 
('A001', 1, 2, 1, 0, 0),
('A002', 0, 4, 4, 0, 1), 
('A003', 3, 1, 1, 2, 1), 
('A004', 0, 0, 2, 0, 0)]

問題:上述日期是整型變數,現在改為str和date型別做測試

#split()分割後返回的一個list
'2018-04-24'.split('-')[0]
'2018-04-24'.split('-')[1]
'2018-04-24'.split('-')[2]


##對date型別的資料的探索和補充
curs.execute('create table LMS_TJPC_NORMAL1(zldwdm varchar(20),dl_bm date,gqmj double(10))')
curs.execute("insert into LMS_TJPC_NORMAL1(zldwdm,dl_bm,gqmj) values('220882001001','2018-04-20',240)")
curs.execute("insert into LMS_TJPC_NORMAL1(zldwdm,dl_bm,gqmj) values('220882001002','2018-04-12',140)")
curs.execute("insert into LMS_TJPC_NORMAL1(zldwdm,dl_bm,gqmj) values('220882001003','2018-05-20',200)")

#針對date時間的選取過程,使用substr(strftime(time)))的巢狀
curs.execute("select * from LMS_TJPC_NORMAL1")
data4=curs.fetchall()
curs.execute("update LMS_TJPC_NORMAL1 set dl_bm='2018-04-20 20:40:40'")
curs.execute("select * from LMS_TJPC_NORMAL1 where strftime('%m',dl_bm)==4")
curs.execute("select * from LMS_TJPC_NORMAL1 where substr(strftime('%Y.%m.%d %H:%M:%S',dl_bm),8,-2)=='04'")
data4=curs.fetchall()
  1. 例2:模擬將月份做行轉列的L小例子

curs.execute('create table LMS_TJPC_NORMAL(zldwdm varchar(20),dl_bm varchar(3),gqmj double(10))')
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','01',240)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','01',100.58)")

curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','02',7.96)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','03',25)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','03',2.65)")

curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','04',1.65)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','10',29.34)")

curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','11',12.61)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','12',6.72)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','20',754.25)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001001','01',345.53)")

curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','02',7.96)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','03',25)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','03',2.65)")

curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','04',1.65)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','10',29.34)")

curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','11',12.61)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','12',6.72)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','20',754.25)")
curs.execute("insert into LMS_TJPC_NORMAL(zldwdm,dl_bm,gqmj) values('220882001002','01',345.53)")
#選取子串函式substr()
curs.execute("select substr(zldwdm,11,12) as zldwdm,dl_bm as dlbm,sum(gqmj) as mj from LMS_TJPC_NORMAL GROUP BY substr(zldwdm,11,12),dl_bm")
data4=curs.fetchall()
print(data4)

結果

[('01', '01', 686.110), ('01', '02', 7.96), ('01', '03', 27.65), ('01', '04', 1.65),
 ('01', '10', 29.34), ('01', '11', 12.61), ('01', '12', 6.72), ('01', '20', 754.25), 
 ('02', '01', 345.53), ('02', '02', 7.96), ('02', '03', 27.65), ('02', '04', 1.65), 
 ('02', '10', 29.34), ('02', '11', 12.61), ('02', '12', 6.72), ('02', '20', 754.25)]

####行轉列開始####

#資料的多行轉多列:
curs.execute("SELECT zldwdm,sum(mj) as total,\
sum(CASE WHEN dlbm='01' THEN mj ELSE 0 END) AS '01', \
sum(CASE WHEN dlbm='02' THEN mj ELSE 0 END) AS '02', \
sum(CASE WHEN dlbm='03' THEN mj ELSE 0 END) AS '03', \
sum(CASE WHEN dlbm='04' THEN mj ELSE 0 END) AS '04', \
sum(CASE WHEN dlbm='20' THEN mj ELSE 0 END) AS '20', \
sum(CASE WHEN dlbm='10' THEN mj ELSE 0 END) AS '10', \
sum(CASE WHEN dlbm='11' THEN mj ELSE 0 END) AS '11', \
sum(CASE WHEN dlbm='12' THEN mj ELSE 0 END) AS '12'  \
from (select  zldwdm,substr(dl_bm,1,2) as dlbm,sum(gqmj) as mj  \
from LMS_TJPC_NORMAL  \
group by zldwdm,substr(dl_bm,1,2)) \
GROUP BY zldwdm")
data4=curs.fetchall()
print(data4)

結果

編號,01,02,03,04,20,10,11,12
[('220882001001', 1526.29, 686.1099999999999, 7.96, 27.65, 1.65, 754.25, 29.34, 12.61, 6.72), 
('220882001002', 1185.71, 345.53, 7.96, 27.65, 1.65, 754.25, 29.34, 12.61, 6.72)]