Details
-
Bug
-
Resolution: Fixed
-
Major
-
None
-
None
-
None
-
1
-
SDK46: Limit Err, CB++, Spark
Description
Environment : Scala 2.12 and Spark 3.1.1
import com.couchbase.spark.analytics.AnalyticsOptions |
val hotels = spark.read.format("couchbase.analytics") |
.option(AnalyticsOptions.Scope, "inventory") |
.option(AnalyticsOptions.Dataset, "hotel") |
.load()
|
hotels.show()
|
This code errors out with a similar error as previously reported for Query SQL
Job aborted due to stage failure. Caused by: NoSuchMethodError: org.apache.spark.sql.catalyst.json.CreateJacksonParser$.string(Lcom/fasterxml/jackson/core/JsonFactory;Ljava/lang/String;)Lcom/fasterxml/jackson/core/JsonParser; at org.apache.spark.sql.CouchbaseJsonUtils$.$anonfun$createParser$1(CouchbaseJsonUtils.scala:41) at org.apache.spark.sql.catalyst.json.JacksonParser.$anonfun$parse$1(JacksonParser.scala:463) at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2906) at org.apache.spark.sql.catalyst.json.JacksonParser.parse(JacksonParser.scala:463) at com.couchbase.spark.analytics.AnalyticsPartitionReader.$anonfun$rows$2(AnalyticsPartitionReader.scala:53) at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245) at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242) at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108) at com.couchbase.spark.analytics.AnalyticsPartitionReader.rows$lzycompute(AnalyticsPartitionReader.scala:51) at com.couchbase.spark.analytics.AnalyticsPartitionReader.rows(AnalyticsPartitionReader.scala:48) at com.couchbase.spark.analytics.AnalyticsPartitionReader.rowIterator$lzycompute(AnalyticsPartitionReader.scala:60) at com.couchbase.spark.analytics.AnalyticsPartitionReader.rowIterator(AnalyticsPartitionReader.scala:60) at com.couchbase.spark.analytics.AnalyticsPartitionReader.next(AnalyticsPartitionReader.scala:63) at org.apache.spark.sql.execution.datasources.v2.PartitionIterator.hasNext(DataSourceRDD.scala:79) at org.apache.spark.sql.execution.datasources.v2.MetricsIterator.hasNext(DataSourceRDD.scala:112) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:757) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:178) at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150) at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.scheduler.Task.run(Task.scala:91) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:812) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1643) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:815) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:671) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2765) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2712) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2706) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2706) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1255) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1255) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1255) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2973) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2914) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2902) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1028) at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2446) at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:289) at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:299) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:82) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:88) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:75) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:62) at org.apache.spark.sql.execution.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:512) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:511) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:399) at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:59) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3018) at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3810) at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2742) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3802) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:126) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:267) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:104) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:852) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:217) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3800) at org.apache.spark.sql.Dataset.head(Dataset.scala:2742) at org.apache.spark.sql.Dataset.take(Dataset.scala:2949) at org.apache.spark.sql.Dataset.getRows(Dataset.scala:306) at org.apache.spark.sql.Dataset.showString(Dataset.scala:343) at org.apache.spark.sql.Dataset.show(Dataset.scala:839) at org.apache.spark.sql.Dataset.show(Dataset.scala:798) at org.apache.spark.sql.Dataset.show(Dataset.scala:807) |