📅  最后修改于: 2022-03-11 14:45:47.487000             🧑  作者: Mango
# Use the Spark CSV datasource with options specifying:
# - First line of file is a header
# - Automatically infer the schema of the data
data = spark.read.format("csv")
.option("header", "true")
.option("inferSchema", "true")
.load("/databricks-datasets/samples/population-vs-price/data_geo.csv")
data.cache() # Cache data for faster reuse