1. 程式人生 > >Python pandas 單條 染色體體 位置 區間 SNP 資料 提取 haploview

Python pandas 單條 染色體體 位置 區間 SNP 資料 提取 haploview

import pandas as pd

sheet1 = pd.read_excel('C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_map.xlsx')
sheet1
print(type(sheet1))
sheet1.dtypes

#傳入SNP的postion,並提取SNP位點資訊
pos = 315320300
type(pos)
data10 = sheet1[ sheet1['4'] > (pos-500000)]
data11 = data10[ data10['4'] <(pos + 500000)]
data11
data11.shape
data12 = data11.drop(['1', '3'], axis=1)
data11.shape
data12

#output map.txt
data12.to_csv("C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_test_map.csv", sep = '\t', index = False, header = False)

#提取SNP位點資訊的SNPname,並生成列表
data12 = data11.iloc[:, 1]
data12
type(data12)
data13 = data12.tolist()
len(data13)

#按照SNPname提取ped檔案中的目標鹼基佇列
sheet2 = pd.read_excel('C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_snp.xlsx')
sheet2
sheet2.columns
data20 = sheet2.iloc[:, :6]
data20
data21 = sheet2.iloc[:, 6:]
data21

for i in data13:
    data20 = pd.concat([data20, data21[i]], axis =1)  
    data20 = pd.concat([data20, data21[i + str('.1')]], axis =1)  #由於列名重合,對重合列的提取

data20.head(20)
data20.columns
data20.shape
data20

#output snp.txt
data20.to_csv("C:\\Users\\windows10\\Desktop\\Python練習\\文字流\\chrom 1\\chrom1_test_snp.csv", sep = '\t', index = False, header = False)