1. 程式人生 > >R_Studio(關聯)Apriori演算法尋找頻繁項集的方法

R_Studio(關聯)Apriori演算法尋找頻繁項集的方法

 

 

  使用Apriori演算法尋找頻繁項集

 

  

 

#匯入arules包
install.packages("arules")
library ( arules )

setwd('D:\\data') 
Gary<- read.transactions("Garytxt.txt", format = "basket", sep=",")   

#設定頻繁項集元素個數大於等於2
GarySize<-size(Gary)
Gary_u<-Gary[GarySize>1]
#檢視部分規則
summary(Gary_u) #支援度0.4,置信度0.5,過濾掉lhs為空的規則 Gary_u=apriori(Gary_u,parameter=list(support=0.4,confidence=0.5,minlen=2)) #Gary_u=apriori(Gary_u,parameter=list(support=0.4,confidence=0.5),appearance=list(rhs=c("a"),default="lhs")) #檢視部分規則 inspect(Gary_u)
Gary.R

 

 

實現過程

 

  匯入arules包

  資料預處理

#匯入arules包
install.packages("arules")
library ( arules )

setwd('D:\\data') 
Gary<- read.transactions("Garytxt.txt", format = "basket", sep=",")   

#設定頻繁項集元素個數大於等於2
GarySize<-size(Gary)
Gary_u<-Gary[GarySize>1]
#檢視部分規則

 

  檢視Gary_u中的資料

> summary(Gary_u)
transactions as itemMatrix 
in sparse format with 6 rows (elements/itemsets/transactions) and 5 columns (items) and a density of 0.7666667 most frequent items: B E A C D (Other) 6 5 4 4 4 0 element (itemset/transaction) length distribution: sizes 3 4 5 2 3 1 Min. 1st Qu. Median Mean 3rd Qu. Max. 3.000 3.250 4.000 3.833 4.000 5.000 includes extended item information - examples: labels 1 A 2 B 3 C

 

  生成關聯規則

 

> #Gary_u=apriori(Gary_u,parameter=list(support=0.4,confidence=0.5),appearance=list(rhs=c("a"),default="lhs"))  
> 
> 
> #檢視部分規則
> inspect(Gary_u)
     lhs        rhs support   confidence lift  count
[1]  {C}     => {E} 0.5000000 0.7500000  0.900 3    
[2]  {E}     => {C} 0.5000000 0.6000000  0.900 3    
[3]  {C}     => {B} 0.6666667 1.0000000  1.000 4    
[4]  {B}     => {C} 0.6666667 0.6666667  1.000 4    
[5]  {D}     => {A} 0.5000000 0.7500000  1.125 3    
[6]  {A}     => {D} 0.5000000 0.7500000  1.125 3    
[7]  {D}     => {E} 0.5000000 0.7500000  0.900 3    
[8]  {E}     => {D} 0.5000000 0.6000000  0.900 3    
[9]  {D}     => {B} 0.6666667 1.0000000  1.000 4    
[10] {B}     => {D} 0.6666667 0.6666667  1.000 4    
[11] {A}     => {E} 0.6666667 1.0000000  1.200 4    
[12] {E}     => {A} 0.6666667 0.8000000  1.200 4    
[13] {A}     => {B} 0.6666667 1.0000000  1.000 4    
[14] {B}     => {A} 0.6666667 0.6666667  1.000 4    
[15] {E}     => {B} 0.8333333 1.0000000  1.000 5    
[16] {B}     => {E} 0.8333333 0.8333333  1.000 5    
[17] {C,E}   => {B} 0.5000000 1.0000000  1.000 3    
[18] {B,C}   => {E} 0.5000000 0.7500000  0.900 3    
[19] {B,E}   => {C} 0.5000000 0.6000000  0.900 3    
[20] {A,D}   => {E} 0.5000000 1.0000000  1.200 3    
[21] {D,E}   => {A} 0.5000000 1.0000000  1.500 3    
[22] {A,E}   => {D} 0.5000000 0.7500000  1.125 3    
[23] {A,D}   => {B} 0.5000000 1.0000000  1.000 3    
[24] {B,D}   => {A} 0.5000000 0.7500000  1.125 3    
[25] {A,B}   => {D} 0.5000000 0.7500000  1.125 3    
[26] {D,E}   => {B} 0.5000000 1.0000000  1.000 3    
[27] {B,D}   => {E} 0.5000000 0.7500000  0.900 3    
[28] {B,E}   => {D} 0.5000000 0.6000000  0.900 3    
[29] {A,E}   => {B} 0.6666667 1.0000000  1.000 4    
[30] {A,B}   => {E} 0.6666667 1.0000000  1.200 4    
[31] {B,E}   => {A} 0.6666667 0.8000000  1.200 4    
[32] {A,D,E} => {B} 0.5000000 1.0000000  1.000 3    
[33] {A,B,D} => {E} 0.5000000 1.0000000  1.200 3    
[34] {B,D,E} => {A} 0.5000000 1.0000000  1.500 3    
[35] {A,B,E} => {D} 0.5000000 0.7500000  1.125 3