📅  最后修改于: 2022-03-11 14:47:18.904000             🧑  作者: Mango
x = [(0,18.0),(1,19.0),(2,8.0),(3,5.0),(4,2.2),(5,4.0)]
d = spark.createDataFrame(x,["id","hour"])
d.show()
from pyspark.ml.feature import Bucketizer
splits = [0,1,2,3,4,float("Inf")]
buck = Bucketizer(splits=splits,inputCol="BATHSTOTAL",outputCol="baths")
df = buck.transform(df)
df[["BATHSTOTAL","baths"]].show()