1. 程式人生 > >特徵選擇之卡方統計 Chi-Square

特徵選擇之卡方統計 Chi-Square

%%%   dataset中的最後一列為分類類別,k為要選擇的特徵個數

function result=chiAttributeEva(dataset,k)

% character_order
character_order=[];
% the count of classes
classes=unique(dataset(:,size(dataset,2)));
n=size(dataset,2)-1;
for i=1:n
    character=dataset(:,i);
    character_value=unique(character,'rows');
    chi_i=0;
    for j=1:length(character_value)
        value_rows_id=find(character==character_value(j));
        value_rows=dataset(value_rows_id,:);
        for m=1;length(classes)
            class_id=find(value_rows(:,size(value_rows,2))==classes(m));
            o_jk=length(class_id);
            o_j=length(value_rows_id);
            o_k=length(find(dataset(:,size(dataset,2)))==classes(m));
            e_jk=o_j*o_k/n;
            chi=(o_jk-e_jk)^2/e_jk;
            chi_i=chi_i+chi;
        end
    end
    character_order(i,:)=[i,chi_i];    
end
character_order=sortrows(character_order,2);
result=character_order(1:k,:);
end