1. 程式人生 > >python遞迴解析JSON轉換為excel輸出

python遞迴解析JSON轉換為excel輸出

參考了此部落格的內容,在此基礎上在對資料進行處理:

程式碼如下:

import json
import pandas as pd

def dict_generator(indict, pre=None):
    """
        把json遞迴的解析為key,value結構
    """
    pre = pre[:] if pre else []
    if isinstance(indict, dict):
        for key, value in indict.items():
            if isinstance(value, dict):
                if len(value) == 0:
                    yield pre+[key, '{}']
                else:
                    for d in dict_generator(value, pre + [key]):
                        yield d
            elif isinstance(value, list):
                if len(value) == 0:                   
                    yield pre+[key, '[]']
                else:
                    for v in value:
                        for d in dict_generator(v, pre + [key]):
                            yield d
            elif isinstance(value, tuple):
                if len(value) == 0:
                    yield pre+[key, '()']
                else:
                    for v in value:
                        for d in dict_generator(v, pre + [key]):
                            yield d
            else:
                yield pre + [key, value]
    else:
        yield indict

def  get_all_record_list(read_file_name):
    """
        每一個物件的json的dict  轉換為list
    """
        all_record_list = []
        record_dict = {}
        columns_set = set() 
        num = 0
        KEY_INDEX_NAME =  'hits.hits._index'
        
        
        fh = open(read_file_name,'r')
        sJOSN = fh.read()
        sValue = json.loads(sJOSN)
        
        for line in dict_generator(sValue):
            key = '.'.join(line[0:-1])
            value = line[-1]
            columns_set.add(key)
            record_dict[key] = value 
            if key == KEY_INDEX_NAME and num > 0:   
                all_record_list.append(record_dict.copy())
                record_dict.clear()
                record_dict[key] = value
            num = num + 1  
        all_record_list.append(record_dict)
        return all_record_list,columns_set


def list_convert_df(all_record_list,columns_set):
    """
        每一個物件的json的dict  轉換為list,並且把缺失的欄位補上。然後轉換為df
    """
    
        record_list = []
        combin_list = []
        
        for  record in  all_record_list:
            for column in columns_set:
                record_list.append(record.get(column,''))
            combin_list.append(record_list.copy())    
            record_list.clear()
        
        df = pd.DataFrame(combin_list,columns=columns_set)
        print ("write over")  
        return df

def change_id_to_first(df): 
    """
      把每一個小的json的id轉換為df之後,調到最前頭。
    """
        KEY_ID_NAME = 'hits.hits._id'
        df_id = df[KEY_ID_NAME]
        df = df.drop(KEY_ID_NAME,axis=1)
        df.insert(0,KEY_ID_NAME,df_id)
        return df


    
    
if __name__ == "__main__":
            
    read_file_name = 'file/esdata20181030.txt'
    write_file_name = 'file/wirte20181030.csv'
    
    
    all_record_list,columns_set  = get_all_record_list(read_file_name)
    df = list_convert_df(all_record_list,columns_set)
    df = change_id_to_first(df)
    
    df.to_excel('file/excel_to_python.xlsx', sheet_name='mysheet')

    #df.to_csv('file/excel_to_python.csv',index=False)