-
Notifications
You must be signed in to change notification settings - Fork 216
Open
Description
I am testing the latest master
(d3ed432) with Spark Connect 4.0.1 and found a serialization issue.
The class com.google.cloud.spark.bigquery.v2.context.ArrowInputPartitionContext
does not define a fixed serialVersionUID
. As a result, serialization fails when running in cluster mode. In local mode (single instance), it works correctly.
Error I am seeing in logs
25/09/26 14:30:47 INFO SessionHolder: Session with userId: tomas and sessionId: 11c0d5f8-e024-4d11-80e1-3460893dd721 accessed,time 1758897047664 ms.
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1295)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3207)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3141)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3130)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:50)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1009)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2484)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2505)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2524)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:544)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:497)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:58)
at org.apache.spark.sql.classic.Dataset.collectFromPlan(Dataset.scala:2244)
at org.apache.spark.sql.classic.Dataset.$anonfun$head$1(Dataset.scala:1379)
at org.apache.spark.sql.classic.Dataset.$anonfun$withAction$2(Dataset.scala:2234)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
at org.apache.spark.sql.classic.Dataset.$anonfun$withAction$1(Dataset.scala:2232)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:272)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:125)
at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:186)
at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:102)
at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:295)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:124)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:237)
at org.apache.spark.sql.classic.Dataset.withAction(Dataset.scala:2232)
at org.apache.spark.sql.classic.Dataset.head(Dataset.scala:1379)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2810)
at org.apache.spark.sql.classic.Dataset.getRows(Dataset.scala:339)
at org.apache.spark.sql.classic.Dataset.showString(Dataset.scala:375)
at org.apache.spark.sql.connect.planner.SparkConnectPlanner.transformShowString(SparkConnectPlanner.scala:307)
at org.apache.spark.sql.connect.planner.SparkConnectPlanner.$anonfun$transformRelation$1(SparkConnectPlanner.scala:150)
at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$usePlanCache$3(SessionHolder.scala:477)
at scala.Option.getOrElse(Option.scala:201)
at org.apache.spark.sql.connect.service.SessionHolder.usePlanCache(SessionHolder.scala:476)
at org.apache.spark.sql.connect.planner.SparkConnectPlanner.transformRelation(SparkConnectPlanner.scala:147)
at org.apache.spark.sql.connect.execution.SparkConnectPlanExecution.handlePlan(SparkConnectPlanExecution.scala:74)
at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.handlePlan(ExecuteThreadRunner.scala:314)
at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1(ExecuteThreadRunner.scala:225)
at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1$adapted(ExecuteThreadRunner.scala:196)
at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$2(SessionHolder.scala:341)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$1(SessionHolder.scala:341)
at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:186)
at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:102)
at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
at org.apache.spark.sql.connect.service.SessionHolder.withSession(SessionHolder.scala:340)
at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.executeInternal(ExecuteThreadRunner.scala:196)
at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.org$apache$spark$sql$connect$execution$ExecuteThreadRunner$$execute(ExecuteThreadRunner.scala:125)
at org.apache.spark.sql.connect.execution.ExecuteThreadRunner$ExecutionThread.run(ExecuteThreadRunner.scala:347)
Caused by: java.io.InvalidClassException: com.google.cloud.spark.bigquery.v2.context.ArrowInputPartitionContext; local class incompatible: stream classdesc serialVersionUID = -6698485693266257704, local class serialVersionUID = -7814378108998539748
at java.base/java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:598)
at java.base/java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2078)
at java.base/java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1927)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2252)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1762)
at java.base/java.io.ObjectInputStream$FieldValues.<init>(ObjectInputStream.java:2618)
at java.base/java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2469)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2284)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1762)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:540)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:498)
at scala.collection.generic.DefaultSerializationProxy.readObject(DefaultSerializationProxy.scala:58)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52)
at java.base/java.lang.reflect.Method.invoke(Method.java:580)
at java.base/java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1102)
at java.base/java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2444)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2284)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1762)
at java.base/java.io.ObjectInputStream$FieldValues.<init>(ObjectInputStream.java:2618)
at java.base/java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2469)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2284)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1762)
at java.base/java.io.ObjectInputStream$FieldValues.<init>(ObjectInputStream.java:2618)
at java.base/java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2469)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2284)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1762)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:540)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:498)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:88)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:136)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:602)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.base/java.lang.Thread.run(Thread.java:1583)
Metadata
Metadata
Assignees
Labels
No labels