使用jpmml-sparkml-executable生成PMML模型文件

原创
2018/11/20 18:36
阅读数 2.7K
加载依赖的jar包
通过./spark-shell  --jars ./jpmml-sparkml-executable-1.2.13.jar 启动spark
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.PipelineStage
import org.apache.spark.ml.feature.RFormula
import org.jpmml.sparkml.PMMLBuilder
import java.io.File

val df = (spark.read.format("csv")
  .option("sep", ",")
  .option("inferSchema", "true")
  .option("header", "true")
  .load("/user/spark/security/Wholesale_customers_data.csv"))
  
  
val formula = new RFormula().setFormula("target ~ .")
val lr = new LogisticRegression()

val pipeline = new Pipeline().setStages(Array(formula,lr))
  
val schema = df.schema

val pipelineModel = pipeline.fit(df)
val pmml = new PMMLBuilder(schema, pipelineModel)

val file = pmml.buildFile(new File("/data/data2/tmp/logit_pipeline.pmml"))

展开阅读全文
加载中
点击加入讨论🔥(1) 发布并加入讨论🔥
打赏
1 评论
0 收藏
0
分享
返回顶部
顶部