方法1:在foreachRDD之前,就是streams之后过滤
我取的是type值为null(不是"null")的数据
val sv= stream.map(_.value())
.filter(jd => {
val json1 = jd.substring(0, jd.lastIndexOf("}")+1)
JSON.parseObject(json1).getString("type")==null
})
sv.foreachRDD(rdd => {
方法2:在foreachRDD之后,
stream.foreachRDD(rdd => { //rdd是kafkardd
val ds: Dataset[String] = spark.createDataset(rdd.map(_.value()))
val offsetRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
val jsondata= ds.filter(JSON.parseObject(_).getString("type")==null)
jsondata.map(j => {
val json = JSON.parseObject(j.toString)
val coupon_batch_id = json.getString("coupon_batch_id")
val prom_id = json.getString("prom_id")
val coupon_gt = json.getString("coupon_gt")
val coupon_minus = json.getString("coupon_minus")
val prom_msg = json.getString("prom_prom_msg")
val prom_key = json.getString("prom_key")
val url = json.getString("url")
val title = json.getString("title")
val img_url = json.getString("img_url")
val brand_name = json.getString("brand_name")
val cat1_id = json.getString("root_cat_id")
val cat2_id = json.getString("cat_id")
val cat3_id = json.getString("cat3_id")
val cat1_name = json.getString("cat_name_1")
val cat2_name = json.getString("cat_name_2")
val cat3_name = json.getString("cat_name_3")
val price = json.getString("platform_price")
val vip_price = json.getString("vip_price")
var cp_type = ""
/**
* json串里的"coupon_deal_type": null 这个null不加引号就是空 判断需要coupon_deal_type==null, 加了引号"null"才是字符串才能用equals("null")
*/
if(prom_msg==null && coupon_gt==null) {
cp_type=0.toString
}else{
cp_type=1.toString
}
(coupon_batch_id+"-"+prom_id,coupon_gt+"-"+coupon_minus,prom_msg,prom_key,url,title,img_url,brand_name,price,vip_price,cat1_id,cat2_id,cat3_id,cat1_name,cat2_name,cat3_name,cp_type)
}).toDF("cp_id", "copn_msg ".trim, "prom_msg ".trim, "prom_key ".trim, "url ".trim, "title ".trim, "img_url ".trim, "brand_name".trim, "price ".trim, "vip_price ".trim, "cat1_id ".trim, "cat2_id ".trim, "cat3_id ".trim, "cat1_name ".trim, "cat2_name ".trim, "cat3_name ".trim, "cp_type ".trim)
.createOrReplaceTempView("cpdata")
val result = spark.sql("select * from cpdata")
result.show()
val props = new Properties()
props.setProperty("user",load.getString("jdba_user"))
props.setProperty("password",load.getString("jdbc_password"))
result.write.mode(SaveMode.Append).jdbc(load.getString("jdbc_url"),load.getString("jdbc_tableName"),props)
})
ssc.start()
ssc.awaitTermination()```