1. 程式人生 > >Spark Mlib(七)用spark實現LogisticRegression

Spark Mlib(七)用spark實現LogisticRegression

logistic迴歸又稱logistic迴歸分析,是一種廣義的線性迴歸分析模型,常用於資料探勘,疾病自動診斷,經濟預測等領域.以下是spark中該演算法的實現方式,原地址為http://spark.apache.org/docs/latest/mllib-linear-methods.html#classification

package alg

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import
org.apache.spark.mllib.evaluation.MulticlassMetrics import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils object logisticRegression { def main(args:Array[String]):Unit={ val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition"
) val sc = new SparkContext(sparkConf) //1.載入資料 val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") //2.劃分訓練集和測試集 val splitData=data.randomSplit(Array(0.6,0.4),seed=11L) val training=splitData(0).cache() val test=splitData(1) val model=new LogisticRegressionWithLBFGS
() .setNumClasses(10) .run(training) //4.在測試集上驗證 val predictionAndLabels = test.map { case LabeledPoint(label, features) => val prediction = model.predict(features) (prediction, label) } val metrics=new MulticlassMetrics(predictionAndLabels) val accuracy=metrics.accuracy println(s"Accuracy=$accuracy") model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel") } }