1. 程式人生 > >python學習筆記之讀寫excel檔案

python學習筆記之讀寫excel檔案

python 處理excel資料的兩種方式:

  • 首選pandas庫裡pandas.read_excel函式,相對比較簡單
  • 其次使用xlrd庫,感覺沒有pandas好用
  • 如何寫excel,後續更新
#coding=utf-8 
"""
Created on Wed Nov 28 18:39:17 2018
@author: **
"""
import xlrd
import xlwt
import pandas
import numpy as np

def pandas_parse_xls(filename, imgname_col_index = [
1], sub_index = None): """ reference: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html#pandas.read_excel func: parse the xlsx file into the ndarray or list args: imgname_col_index:, list of int, refer the col index of image name field mos_col_index: list of int, refer the col index of MOS field std_index: list of ints, refer the col index of subjetive score area return: list of imgname, MOS and std_value """
#header=1,index_col=1,usecols=[1,3] #DataFrame:Attributes and underlying data #DataFrame.get_values() #DataFrame.values:Return a Numpy representation of the DataFrame. #區域對應的列索引列表,從第4列到第15列 if sub_index is None: Sub_score_area = range(3,15) #解析字元型資料欄位時,注意將‘unicode’型別轉為‘str’型,
imgname_list = pandas.read_excel(filename, parse_cols = imgname_col_index).astype('str').values #將返回的陣列降維,並轉為列表 imgname_list = np.squeeze(imgname_list).tolist() ##讀取數值區域,計算每行的標準差,返回一維標準差## #<class 'pandas.core.frame.DataFrame'> sub_DataFrame = pandas.read_excel(filename, parse_cols = Sub_score_area) # 將DataFrame型別轉為ndarray,二維陣列 sub_array = sub_DataFrame.values #計算標準差 std_array = np.std(sub_array,axis=1,ddof=1) return imgname_list, std_array def xlrd_parse_xls(path_xls): """ reference: https://xlrd.readthedocs.io/en/latest/api.html func: #Cell object in the given row and column. xlrd.sheet.cell(rowx,colx) #Value of the cell in the given row,column. xlrd.sheet.cell_value(rowx,colx) #Returns a slice of the values of the cells in the given column. xlrd.sheet.col_values(index_col) # the first index value is 1 #Returns a slice of the values of the cells in the given row. xlrd.sheet.row_values(index_col) """ data = xlrd.open_workbook(path_xls) # get sheet table = data.sheets()[0] # equal to data.sheet_by_index(0) #將‘unicode’轉為‘str’ name_list = [item.encode('utf-8') for item in table.col_values(1,1)] score_list = table.col_values(15,1) #print(name_list,score_list) return name_list,score_list #============================================================================== # result = [] # #迴圈遍歷讀取 # row ,col = table.nrows ,table.ncols # for i in range(1,2): # for j in range(1,row): # #get cell value # temp_str = table.cell_value(j,i) # print(temp_str) # result.append(temp_str) #============================================================================== def write_xls(dest_xls): #creat workbook work_book = xlwt.Workbook(encoding = 'ascii') work_sheet = work_book.add_sheet('sheet1') work_sheet.write(0, 0, label = 'Row 0, Column 0 Value') work_book.save('Excel_Workbook.xls') if __name__=='__main__': pandas_parse_xls('MOS.xlsx') xlrd_parse_xls('MOS.xlsx')

參考地址