利用Python計算資料的Pearson相關係數
阿新 • • 發佈:2019-01-06
步驟一:讀取資料
# _*_ coding: utf-8 _*_
import pandas as pd
import numpy as np
df = pd.read_csv("D:data1.csv",index_col='user_id')
步驟二:異常資料處理(如無需預處理跳過資料預處理程式),進行Pearson相關係數計算
運用箱型圖分析選出異常值,取前後相鄰資料中值替換
def data_Process(df): for i in range(1454): j = i + 1 powero = np.array(df.ix[j]) power = powero[:, 1].flatten() powerf = power for m in range(35, 608): power_s = 0 power_35 = [] for n in range(m-35, m): power_s += power[n] power_35.append(int(power[n])) power_35s = sorted(power_35) power_m = power_s / 35 load_u = power_35s[25] load_l = power_35s[8] iqr = load_u - load_l ud = load_u + 1.5 * iqr ld = load_l - 1.5 * iqr for q in range(m - 35, m): if q < 600: if power[q] > ud: powerf[q] = (power[q-1] + power[q + 1]) /2 elif power[q] < ld: powerf[q] = (power[q - 1] + power[q + 1]) / 2 else: if power[q] > ud: powerf[q] = (power[q - 1] + power[q + 1]) /2 elif power[q] < ld: powerf[q] = (power[q - 1] + power[q + 1]) / 2 load_a[j] = powerf return load_a list_re = data_Process(df) load_ar = np.array(list_re).reshape(1454, 609) load_art = load_ar.T #將需要進行相關係數計算的資料作為列 index1 = pd.Series(np.arange(1,1455)) #1454列資料進行相關計算 index1 = index1.astype(str) index1 = 'A'+index1 index2 = pd.Series(np.arange(1,610)) #每個樣本609個數據 index2 = index2.astype(str) index2 = 'B'+index2 #Pearson相關係數計算 load_pcp = pd.DataFrame(load_art, index=index2, columns=index1) load_pc = load_pcp.corr() print load_pc
輸出效果部分截圖如下: