whylabs · manickam91 · May 13, 2022
diff --git a/docs/integration-spark.md b/docs/integration-spark.md
@@ -73,7 +73,7 @@ from whyspark import new_profiling_session
 raw_df = spark.read.option("header", "true").csv("/databricks-datasets/timeseries/Fires/Fire_Department_Calls_for_Service.csv")
 df = raw_df.withColumn("call_date", to_timestamp(col("Call Date"), "MM/dd/YYYY"))
 
-profiles = new_profiling_session(newProfilingSession("profilingSession"), name="fire_station_calls", time_colum="call_date") \
+profiles = df.new_profiling_session(newProfilingSession("profilingSession"), name="fire_station_calls", time_colum="call_date") \
                  .groupBy("City", "Priority") \
                  .aggProfiles()
 pdf = profiles.toPandas() # you get a Pandas dataset profile of whylogs
@@ -85,4 +85,4 @@ You can then extract and analyze individual profiles:
 from whylogs import DatasetProfile
 prof = DatasetProfile.parse_delimited(pdf['why_profile'][0])[0]
 # prof is a whylogs DatasetProfile that can be analyzed using utilities such as whylogs.viz
-```
+```