1. 程式人生 > >生成dataset的幾種方式

生成dataset的幾種方式

 public <A extends Product> Dataset<Row> createDataFrame(RDD<A> rdd, TypeTags.TypeTag<A> evidence$2)
  {
    SparkSession..MODULE$.setActiveSession(this);
    StructType schema = (StructType)ScalaReflection..MODULE$.schemaFor(evidence$2).dataType();
    Seq attributeSeq = schema.toAttributes();
    RDD rowRDD 
= RDDConversions..MODULE$.productToRowRdd(rdd, (Seq)schema.map(new SparkSession..anonfun.1(this), Seq..MODULE$.canBuildFrom())); return Dataset..MODULE$.ofRows(this, new LogicalRDD(attributeSeq, rowRDD, this)); } @Experimental public <A extends Product> Dataset<Row> createDataFrame(Seq<A> data, TypeTags.TypeTag<A> evidence$3) { SparkSession..MODULE$.setActiveSession(
this); StructType schema = (StructType)ScalaReflection..MODULE$.schemaFor(evidence$3).dataType(); Seq attributeSeq = schema.toAttributes(); return Dataset..MODULE$.ofRows(this, LocalRelation..MODULE$.fromProduct(attributeSeq, data)); } @DeveloperApi public Dataset<Row> createDataFrame(RDD<Row> rowRDD, StructType schema) {
return createDataFrame(rowRDD, schema, true); } @DeveloperApi public Dataset<Row> createDataFrame(JavaRDD<Row> rowRDD, StructType schema) { return createDataFrame(rowRDD.rdd(), schema); } @DeveloperApi public Dataset<Row> createDataFrame(List<Row> rows, StructType schema) { return Dataset..MODULE$.ofRows(this, LocalRelation..MODULE$.fromExternalRows(schema.toAttributes(), (Seq)JavaConverters..MODULE$.asScalaBufferConverter(rows).asScala())); } public Dataset<Row> createDataFrame(RDD<?> rdd, Class<?> beanClass) { Seq attributeSeq = getSchema(beanClass); String className = beanClass.getName(); RDD rowRdd = rdd.mapPartitions(new SparkSession..anonfun.2(this, attributeSeq, className), rdd.mapPartitions$default$2(), ClassTag..MODULE$.apply(InternalRow.class)); return Dataset..MODULE$.ofRows(this, new LogicalRDD(attributeSeq, rowRdd, this)); } public Dataset<Row> createDataFrame(JavaRDD<?> rdd, Class<?> beanClass) { return createDataFrame(rdd.rdd(), beanClass); } public Dataset<Row> createDataFrame(List<?> data, Class<?> beanClass) { Seq attrSeq = getSchema(beanClass); BeanInfo beanInfo = Introspector.getBeanInfo(beanClass); Iterator rows = SQLContext..MODULE$.beansToRows(((IterableLike)JavaConverters..MODULE$.asScalaBufferConverter(data).asScala()).iterator(), beanInfo, attrSeq); return Dataset..MODULE$.ofRows(this, new LocalRelation(attrSeq, rows.toSeq())); } public Dataset<Row> baseRelationToDataFrame(BaseRelation baseRelation) { return Dataset..MODULE$.ofRows(this, new LogicalRelation(baseRelation, LogicalRelation..MODULE$.apply$default$2(), LogicalRelation..MODULE$.apply$default$3())); } @Experimental public <T> Dataset<T> createDataset(Seq<T> data, Encoder<T> evidence$4) { ExpressionEncoder enc = org.apache.spark.sql.catalyst.encoders.package..MODULE$.encoderFor(evidence$4); Seq attributes = enc.schema().toAttributes(); Seq encoded = (Seq)data.map(new SparkSession..anonfun.3(this, enc), Seq..MODULE$.canBuildFrom()); LocalRelation plan = new LocalRelation(attributes, encoded); return Dataset..MODULE$.apply(this, plan, evidence$4); } @Experimental public <T> Dataset<T> createDataset(RDD<T> data, Encoder<T> evidence$5) { ExpressionEncoder enc = org.apache.spark.sql.catalyst.encoders.package..MODULE$.encoderFor(evidence$5); Seq attributes = enc.schema().toAttributes(); RDD encoded = data.map(new SparkSession..anonfun.4(this, enc), ClassTag..MODULE$.apply(InternalRow.class)); LogicalRDD plan = new LogicalRDD(attributes, encoded, this); return Dataset..MODULE$.apply(this, plan, evidence$5); } @Experimental public <T> Dataset<T> createDataset(List<T> data, Encoder<T> evidence$6) { return createDataset((Seq)JavaConverters..MODULE$.asScalaBufferConverter(data).asScala(), evidence$6); }