pyhon通過讀取excel的表結構資料,形成建表語句
阿新 • • 發佈:2018-11-02
excel中需要包含:第一列為欄位名稱,第二列為欄位型別,第三列為欄位長度,第四列為是否為主鍵利用Y/N區分
import xlrd import os def convert_type(data_type): """Normalize MySQL `data_type`""" if 'CHAR' == data_type: return 'varchar' elif 'CLNT' == data_type: return 'varchar' elif 'NUMC' == data_type: return 'numeric' elif 'DATS' == data_type: return 'timestamp' elif 'QUAN' == data_type: return 'varchar' elif 'CUKY' == data_type: return 'varchar' elif 'CURR' == data_type: return 'varchar' elif 'DEC' == data_type: return 'varchar' elif 'INT4' == data_type: return 'varchar' elif 'TIMS' == data_type: return 'varchar' else: return data_type # 在postgresql中建立表 def postgres_create(fields): stg_table_name = 'dw_stg.stg_cus_dim_' + fields[0]['table_name'] edw_table_name = 'dw_edw.edw_cus_dim_' + fields[0]['table_name'] columns = [] primary_key = [] for field in fields: if field['primary_key'] == 'Y': primary_key.append(field['column_name']) if field['length'] == '' or field['length'] == None: table_column = field['column_name'] + ' ' + field['type'] + ',\n' else: table_column = field['column_name'] + ' ' + field['type'] + '(' + str(field['length']) + ')' + ',\n' #print(table_column) columns.append(table_column) #print(columns) stg_create_columns = ''.join( columns) + 'modify_date_etl timestamp default now(),\nload_dt timestamp default now(),\n' edw_create_columns = ''.join(columns) + 'load_dt timestamp default now(),\n' create_primary_key = ','.join(primary_key) create_stg_sql = "create table %s (\n%sprimary key(%s)\n)\ndistributed by (%s);" % ( stg_table_name, stg_create_columns, create_primary_key, create_primary_key) create_edw_sql = "create table %s (\n%sprimary key(%s)\n)\ndistributed by (%s);" % ( edw_table_name, edw_create_columns, create_primary_key, create_primary_key) print(create_stg_sql) print(create_edw_sql) paths = [r'C:/Users/zhudong/Desktop/sap-table/'] for path in paths: for filename in os.listdir(path): if filename.endswith(".xlsx"): worksheet = xlrd.open_workbook(path + filename) table_name = worksheet.sheet_names() for n in range(len(table_name)): sheet = worksheet.sheet_by_index(n) nrows = sheet.nrows fields = [] for i in range(nrows): res = sheet.row_values(i) desc = { 'column_name': res[0].lower(), 'table_name': table_name[n].lower(), 'type': convert_type(res[1]), 'length': res[2], 'primary_key': res[3], } fields.append(desc) #print(fields) postgres_create(fields)