1. 程式人生 > >Spark 多項式邏輯回歸__多分類

Spark 多項式邏輯回歸__多分類

ring red 不包含 ray str 使用 5.5 ont take

package Spark_MLlib

import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession

object 多項式邏輯回歸__多分類 { val spark=SparkSession.builder().master("local").getOrCreate() import spark.implicits._ //支持把一個RDD隱式轉換為一個DataFrame def main(args: Array[String]): Unit = { val df =spark.sparkContext.textFile("file:///home/soyo/桌面/spark編程測試數據/soyo.txt") .map(_.split(",")).map(x=>data_schema(Vectors.dense(x(0
).toDouble,x(1).toDouble,x(2).toDouble,x(3).toDouble),x(4))).toDF() // df.show(150) val labelIndexer=new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(df) val featureIndexer=new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").fit(df) //目的在特征向量中建類別索引
val Array(trainData,testData)=df.randomSplit(Array(0.7,0.3)) val lr=new LogisticRegression().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures").setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8).setFamily("multinomial")//設置elasticnet混合參數為0.8,setFamily("multinomial"):設置為多項邏輯回歸,不設置setFamily為二項邏輯回歸 val labelConverter=new IndexToString().setInputCol("prediction").setOutputCol("predictionLabel").setLabels(labelIndexer.labels) val lrPipeline=new Pipeline().setStages(Array(labelIndexer,featureIndexer,lr,labelConverter)) val lrPipeline_Model=lrPipeline.fit(trainData) val lrPrediction=lrPipeline_Model.transform(testData) lrPrediction.show(150) // lrPrediction.take(100).foreach(println) //模型評估 val evaluator=new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction") val lrAccuracy=evaluator.evaluate(lrPrediction) println("準確率為: "+lrAccuracy) val lrError=1-lrAccuracy println("錯誤率為: "+lrError) val LRmodel=lrPipeline_Model.stages(2).asInstanceOf[LogisticRegressionModel] println("二項邏輯回歸模型系數矩陣: "+LRmodel.coefficientMatrix) println("二項邏輯回歸模型的截距向量: "+LRmodel.interceptVector) println("類的數量(標簽可以使用的值): "+LRmodel.numClasses) println("模型所接受的特征的數量: "+LRmodel.numFeatures) //多項式邏輯回歸不包含對模型的摘要總結 println(LRmodel.hasSummary) } }

結果:

+-----------------+-----+------------+-----------------+--------------------+--------------------+----------+---------------+
| features|label|indexedLabel| indexedFeatures| rawPrediction| probability|prediction|predictionLabel|
+-----------------+-----+------------+-----------------+--------------------+--------------------+----------+---------------+
|[4.4,3.2,1.3,0.2]|soyo1| 1.0|[4.4,3.2,1.3,0.2]|[0.06313829278191...|[0.23858281707128...| 1.0| soyo1|
|[4.6,3.4,1.4,0.3]|soyo1| 1.0|[4.6,3.4,1.4,0.3]|[0.06313829278191...|[0.23750012598226...| 1.0| soyo1|
|[4.7,3.2,1.6,0.2]|soyo1| 1.0|[4.7,3.2,1.6,0.2]|[0.06313829278191...|[0.24710416166321...| 1.0| soyo1|
|[4.8,3.4,1.6,0.2]|soyo1| 1.0|[4.8,3.4,1.6,0.2]|[0.06313829278191...|[0.23716995683018...| 1.0| soyo1|
|[4.8,3.4,1.9,0.2]|soyo1| 1.0|[4.8,3.4,1.9,0.2]|[0.06313829278191...|[0.24567798276462...| 1.0| soyo1|
|[4.9,2.4,3.3,1.0]|soyo2| 0.0|[4.9,2.4,3.3,1.0]|[0.06313829278191...|[0.38071131817453...| 0.0| soyo2|
|[5.0,3.2,1.2,0.2]|soyo1| 1.0|[5.0,3.2,1.2,0.2]|[0.06313829278191...|[0.23576075216827...| 1.0| soyo1|
|[5.0,3.5,1.3,0.3]|soyo1| 1.0|[5.0,3.5,1.3,0.3]|[0.06313829278191...|[0.22978111243935...| 1.0| soyo1|
|[5.2,4.1,1.5,0.1]|soyo1| 1.0|[5.2,4.1,1.5,0.1]|[0.06313829278191...|[0.19523110424215...| 1.0| soyo1|
|[5.4,3.9,1.3,0.4]|soyo1| 1.0|[5.4,3.9,1.3,0.4]|[0.06313829278191...|[0.21630436073381...| 1.0| soyo1|
|[5.5,2.4,3.8,1.1]|soyo2| 0.0|[5.5,2.4,3.8,1.1]|[0.06313829278191...|[0.39807479409636...| 0.0| soyo2|
|[5.5,2.5,4.0,1.3]|soyo2| 0.0|[5.5,2.5,4.0,1.3]|[0.06313829278191...|[0.40810357240132...| 0.0| soyo2|
|[5.6,2.8,4.9,2.0]|soyo3| 2.0|[5.6,2.8,4.9,2.0]|[0.06313829278191...|[0.44454733071968...| 0.0| soyo2|
|[5.7,2.9,4.2,1.3]|soyo2| 0.0|[5.7,2.9,4.2,1.3]|[0.06313829278191...|[0.39634982244233...| 0.0| soyo2|
|[5.8,2.6,4.0,1.2]|soyo2| 0.0|[5.8,2.6,4.0,1.2]|[0.06313829278191...|[0.39930520027794...| 0.0| soyo2|
|[5.8,2.7,4.1,1.0]|soyo2| 0.0|[5.8,2.7,4.1,1.0]|[0.06313829278191...|[0.38762610877473...| 0.0| soyo2|
|[5.8,2.7,5.1,1.9]|soyo3| 2.0|[5.8,2.7,5.1,1.9]|[0.06313829278191...|[0.44792417666537...| 0.0| soyo2|
|[5.9,3.0,5.1,1.8]|soyo3| 2.0|[5.9,3.0,5.1,1.8]|[0.06313829278191...|[0.43418725338764...| 0.0| soyo2|
|[6.0,2.2,4.0,1.0]|soyo2| 0.0|[6.0,2.2,4.0,1.0]|[0.06313829278191...|[0.40634099537710...| 0.0| soyo2|
|[6.0,2.7,5.1,1.6]|soyo2| 0.0|[6.0,2.7,5.1,1.6]|[0.06313829278191...|[0.43688076686419...| 0.0| soyo2|
|[6.0,3.4,4.5,1.6]|soyo2| 0.0|[6.0,3.4,4.5,1.6]|[0.06313829278191...|[0.39704954911011...| 0.0| soyo2|
|[6.2,2.2,4.5,1.5]|soyo2| 0.0|[6.2,2.2,4.5,1.5]|[0.06313829278191...|[0.43847273913421...| 0.0| soyo2|
|[6.2,2.8,4.8,1.8]|soyo3| 2.0|[6.2,2.8,4.8,1.8]|[0.06313829278191...|[0.43518321759857...| 0.0| soyo2|
|[6.3,2.7,4.9,1.8]|soyo3| 2.0|[6.3,2.7,4.9,1.8]|[0.06313829278191...|[0.44055947195014...| 0.0| soyo2|
|[6.3,2.9,5.6,1.8]|soyo3| 2.0|[6.3,2.9,5.6,1.8]|[0.06313829278191...|[0.44715759200377...| 0.0| soyo2|
|[6.3,3.4,5.6,2.4]|soyo3| 2.0|[6.3,3.4,5.6,2.4]|[0.06313829278191...|[0.45196576310313...| 0.0| soyo2|
|[6.4,2.8,5.6,2.1]|soyo3| 2.0|[6.4,2.8,5.6,2.1]|[0.06313829278191...|[0.46017875340546...| 0.0| soyo2|
|[6.4,2.8,5.6,2.2]|soyo3| 2.0|[6.4,2.8,5.6,2.2]|[0.06313829278191...|[0.46321910727428...| 0.0| soyo2|
|[6.4,3.1,5.5,1.8]|soyo3| 2.0|[6.4,3.1,5.5,1.8]|[0.06313829278191...|[0.43862320280893...| 0.0| soyo2|
|[6.4,3.2,4.5,1.5]|soyo2| 0.0|[6.4,3.2,4.5,1.5]|[0.06313829278191...|[0.40056786531830...| 0.0| soyo2|
|[6.5,3.0,5.5,1.8]|soyo3| 2.0|[6.5,3.0,5.5,1.8]|[0.06313829278191...|[0.44199581778961...| 0.0| soyo2|
|[6.6,2.9,4.6,1.3]|soyo2| 0.0|[6.6,2.9,4.6,1.3]|[0.06313829278191...|[0.40579282648595...| 0.0| soyo2|
|[6.7,2.5,5.8,1.8]|soyo3| 2.0|[6.7,2.5,5.8,1.8]|[0.06313829278191...|[0.46287803722998...| 0.0| soyo2|
|[6.7,3.0,5.2,2.3]|soyo3| 2.0|[6.7,3.0,5.2,2.3]|[0.06313829278191...|[0.45387841693477...| 0.0| soyo2|
|[6.7,3.1,4.7,1.5]|soyo2| 0.0|[6.7,3.1,4.7,1.5]|[0.06313829278191...|[0.40924150360290...| 0.0| soyo2|
|[6.7,3.3,5.7,2.5]|soyo3| 2.0|[6.7,3.3,5.7,2.5]|[0.06313829278191...|[0.45972648058424...| 0.0| soyo2|
|[6.8,3.0,5.5,2.1]|soyo3| 2.0|[6.8,3.0,5.5,2.1]|[0.06313829278191...|[0.45251276088924...| 0.0| soyo2|
|[6.8,3.2,5.9,2.3]|soyo3| 2.0|[6.8,3.2,5.9,2.3]|[0.06313829278191...|[0.45975331380088...| 0.0| soyo2|
|[6.9,3.2,5.7,2.3]|soyo3| 2.0|[6.9,3.2,5.7,2.3]|[0.06313829278191...|[0.45642868507279...| 0.0| soyo2|
|[7.2,3.0,5.8,1.6]|soyo3| 2.0|[7.2,3.0,5.8,1.6]|[0.06313829278191...|[0.44031726493318...| 0.0| soyo2|
|[7.2,3.2,6.0,1.8]|soyo3| 2.0|[7.2,3.2,6.0,1.8]|[0.06313829278191...|[0.44483171938259...| 0.0| soyo2|
|[7.6,3.0,6.6,2.1]|soyo3| 2.0|[7.6,3.0,6.6,2.1]|[0.06313829278191...|[0.47047723863543...| 0.0| soyo2|
|[7.7,3.0,6.1,2.3]|soyo3| 2.0|[7.7,3.0,6.1,2.3]|[0.06313829278191...|[0.46845272424381...| 0.0| soyo2|
|[7.7,3.8,6.7,2.2]|soyo3| 2.0|[7.7,3.8,6.7,2.2]|[0.06313829278191...|[0.45233124776236...| 0.0| soyo2|
+-----------------+-----+------------+-----------------+--------------------+--------------------+----------+---------------+

準確率為: 0.36458333333333337
錯誤率為: 0.6354166666666666
二項邏輯回歸模型系數矩陣: 3 x 4 CSCMatrix
(1,1) 0.35559564188466614
(1,2) -0.203185158868005
(1,3) -0.43876460704959996
(2,3) 0.0283914830858408
二項邏輯回歸模型的截距向量: [0.06313829278191783,0.1708622138778958,-0.23400050665981365]
類的數量(標簽可以使用的值): 3
模型所接受的特征的數量: 4
false

Spark 多項式邏輯回歸__多分類