Python pandas 單條 染色體體 位置 區間 SNP 資料 提取 haploview
阿新 • • 發佈:2019-01-13
import pandas as pd sheet1 = pd.read_excel('C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_map.xlsx') sheet1 print(type(sheet1)) sheet1.dtypes #傳入SNP的postion,並提取SNP位點資訊 pos = 315320300 type(pos) data10 = sheet1[ sheet1['4'] > (pos-500000)] data11 = data10[ data10['4'] <(pos + 500000)] data11 data11.shape data12 = data11.drop(['1', '3'], axis=1) data11.shape data12 #output map.txt data12.to_csv("C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_test_map.csv", sep = '\t', index = False, header = False) #提取SNP位點資訊的SNPname,並生成列表 data12 = data11.iloc[:, 1] data12 type(data12) data13 = data12.tolist() len(data13) #按照SNPname提取ped檔案中的目標鹼基佇列 sheet2 = pd.read_excel('C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_snp.xlsx') sheet2 sheet2.columns data20 = sheet2.iloc[:, :6] data20 data21 = sheet2.iloc[:, 6:] data21 for i in data13: data20 = pd.concat([data20, data21[i]], axis =1) data20 = pd.concat([data20, data21[i + str('.1')]], axis =1) #由於列名重合,對重合列的提取 data20.head(20) data20.columns data20.shape data20 #output snp.txt data20.to_csv("C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_test_snp.csv", sep = '\t', index = False, header = False)