spark dataframe筆記 -- 對dataframe一列值保留4位小數
阿新 • • 發佈:2018-12-10
指令碼如下:
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
@author:
@contact:
@time:
@context:對dataframe一列值保留4位小數
"""
from __future__ import print_function
from pyspark.sql import SparkSession
import os, time,sys
reload(sys)
sys.setdefaultencoding("utf-8")
from pyspark.sql import Row
from pyspark.sql import functions as F
os.environ['SPARK_HOME'] = "E:/data_page/spark-2.0.2-bin-hadoop2.7"
spark = SparkSession.builder.appName("indexOrder").getOrCreate()
sc = spark.sparkContext
df= sc.parallelize([1.335454331,2.22254542,3.444553545]).map(lambda x: Row(Rank=x)).toDF()
df.show()
df1 = df.select(F.bround("Rank", scale=4) .alias('RankPoint'))
df1.show()
time.sleep(1)
spark.stop()
結果如下:
+-----------+
| Rank|
+-----------+
|1.335454331|
| 2.22254542|
|3.444553545|
+-----------+
+---------+
|RankPoint|
+---------+
| 1.3355|
| 2.2225|
| 3.4446|
+---------+