1. 程式人生 > >如何優雅的實現pandas DataFrame 和spark dataFrame 相互轉換

如何優雅的實現pandas DataFrame 和spark dataFrame 相互轉換

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun  8 16:27:57 2018
@author: luogan
"""

import pandas as pd
from pyspark.sql import SparkSession
spark= SparkSession\
                .builder \
                .appName("dataFrame") \
                .getOrCreate()
# Loads data.


ll3=pd.DataFrame([[1
,2],[3,4]],columns=['a','b']) cc=ll3.values.tolist() dd=list(ll3.columns) #df=spark.createDataFrame(ll3) #turn pandas.DataFrame to spark.dataFrame spark_df = spark.createDataFrame(cc, dd) print('spark.dataFram=',spark_df.show()) #turn spark.dataFrame to pandas.DataFrame pandas_df = spark_df .toPandas() print('pandas.DataFrame='
,pandas_df)
+---+---+
|  a|  b|
+---+---+
|  1|  2|
|  3|  4|
+---+---+

spark.dataFram= None
pandas.DataFrame=    a  b
0  1  2
1  3  4