1. 程式人生 > >java實現兩個向量的相關係數演算法

java實現兩個向量的相關係數演算法

有兩個向量V1和V2 V1={1:3,2:2,3:1,5:0},V2={1:3,3:1,4:2,5:0}

以表格的形式展現:


將向量V1和V2帶入相關係數公式並展開展開,結果為:


n值(n = 4):從表格可以看出,向量V1和V2 的第五位元素上都是0,因此該位置可忽略。向量V1第四位元素上值為0,但向量V2第四位元素有值,因此位置4上的元素不能忽略。同理V2元素上的第二位元素也是。因此 n = 4. V1的平均值:(3+2+1)/ 4   (將V1向量非零的值累加,然後除以N值) V2的平均值:(3+2+1)/ 4 V1*V2:3*3+2*0+1*1+0*2     (將V1和V2對應位置上的值相乘,然後將結果累加)

實現程式碼:

package fuse.hang;

import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;

public class Correlation {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		/***
		 * 建立向量V1和V2
		 */
		Vector v1 = new RandomAccessSparseVector(1000);
		v1.set(1, 3);
		v1.set(2, 2);
		v1.set(3, 1);
		v1.set(4, 0);
		v1.set(5, 0);
		Vector v2 = new RandomAccessSparseVector(1000);
		v2.set(1, 3);
		v2.set(2, 0);
		v2.set(3, 1);
		v2.set(4, 2);
		v2.set(5, 0);
		
		correlation(v1, v2);
	}
	
	public static void correlation(Vector v1,Vector v2){
		if(v1 == null || v2 == null) return;
		double dot = v1.dot(v2);
		System.out.println("dot : "+dot);
		double averageV1 = 0;
		double averageV2 = 0;
		double commonCount = 0;
		double sumCount = v1.getNumNonZeroElements() + v2.getNumNonZeroElements();
		double v1SquareSum = 0;
		double v2SquareSum = 0;
		for(Element e : v1.nonZeroes()){
			v1SquareSum += e.get() * e.get();
			double d = v2.get(e.index());
			if(d > 0){
				commonCount ++;
			}
		}
		for(Element e : v2.nonZeroes()){
			v2SquareSum += e.get() * e.get();
		}
		
		sumCount = sumCount - commonCount;
		System.out.println("sumCount: "+sumCount);
		averageV1 = v1.zSum()/sumCount;
		System.out.println("averageV1: "+averageV1);
		averageV2 = v2.zSum()/sumCount;
		System.out.println("averageV2: "+averageV2);
		
		System.out.println("v1SquareSum: "+v1SquareSum);
		System.out.println("v2SquareSum: "+v2SquareSum);
		System.out.println("相關係數值:"+(sumCount*dot - sumCount*sumCount * averageV1 * averageV2)/((Math.sqrt(sumCount*v1SquareSum - sumCount*sumCount * averageV1 * averageV1))*(Math.sqrt(sumCount*v2SquareSum - sumCount*sumCount * averageV2 * averageV2))));
	}

}