Spark Mlib(六)用spark實現貝葉斯分類器
阿新 • • 發佈:2018-11-13
貝葉斯分類器是各種分類器中分類錯誤概率最小或者在預先給定代價的情況下平均風險最小的分類器。下面是spark官網(http://spark.apache.org/docs/latest/mllib-naive-bayes.html)給出的例子
package alg
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
import org.apache.spark.mllib.util.MLUtils
object naiveBayes {
def main(args:Array[String]):Unit={
val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition")
val sc = new SparkContext(sparkConf)
val data=MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
val Array(traning,test)=data.randomSplit(Array(0.6, 0.4))
val model=NaiveBayes.train(traning,lambda = 1.0,modelType ="multinomial")
val predictAndLabel=test.map(p=>(model.predict((p.features)),p.label))
val accuracy=1.0*predictAndLabel.filter(x=>x._1==x._2).count()/test.count()
print("accuracy:"+accuracy)
//save and load model
model.save(sc, "target/tmp/myNaiveBayesModel")
val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel")
}
}