 支援向量機(Support Vector Machine,SVM)是Corinna Cortes和Vapnik等於1995年首先提出的,它在解決小樣本、非線性及高維模式識別中表現出許多特有的優勢,並能夠推廣應用到函式擬合等其他機器學習問題中。



package org.apache.spark.mllib.learning.regression import java.text.SimpleDateFormat import java.util.Date import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD} import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics import org.apache.spark.mllib.linalg.Vectors import org.apache
.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils import org.apache.spark.{SparkConf, SparkContext} /** * Created by xubo on 2016/5/23. * SVM */ object SVMFromSparkLearning { def main(args: Array[String]) { val conf = new SparkConf().setMaster("local[4]").setAppName
(this.getClass().getSimpleName().filter(!_.equals('$'))) val sc = new SparkContext(conf) // Load training data in LIBSVM format. val data = MLUtils.loadLibSVMFile(sc, "file/data/mllib/input/regression/sample_libsvm_data.txt") // Split data into training (60%) and test (40%). val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L) val training = splits(0).cache() val test = splits(1) // Run training algorithm to build the model val numIterations = 100 val model = SVMWithSGD.train(training, numIterations) // Clear the default threshold. model.clearThreshold() // Compute raw scores on the test set. val scoreAndLabels = test.map { point => val score = model.predict(point.features) (score, point.label) } // Get evaluation metrics. val metrics = new BinaryClassificationMetrics(scoreAndLabels) val auROC = metrics.areaUnderROC() println("Area under ROC = " + auROC) println(model.weights) println("model.weights.size"+model.weights.size) scoreAndLabels.take(10).foreach(println) // Save and load model val iString = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) val path = "file/data/mllib/output/regression/sample_libsvm_data" + iString + "/result" model.save(sc, path) val sameModel = SVMModel.load(sc, path) println(sameModel.weights) sc.stop } }


Area under ROC = 1.0
