加载依赖的jar包
通过./spark-shell --jars ./jpmml-sparkml-executable-1.2.13.jar 启动spark
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.PipelineStage
import org.apache.spark.ml.feature.RFormula
import org.jpmml.sparkml.PMMLBuilder
import java.io.File
val df = (spark.read.format("csv")
.option("sep", ",")
.option("inferSchema", "true")
.option("header", "true")
.load("/user/spark/security/Wholesale_customers_data.csv"))
val formula = new RFormula().setFormula("target ~ .")
val lr = new LogisticRegression()
val pipeline = new Pipeline().setStages(Array(formula,lr))
val schema = df.schema
val pipelineModel = pipeline.fit(df)
val pmml = new PMMLBuilder(schema, pipelineModel)
val file = pmml.buildFile(new File("/data/data2/tmp/logit_pipeline.pmml"))