mongo-spark-connector 解决 Mongo长精度 0.0引起的Bug Decimal scale (12) cannot be greater than precision (1).

王高邈
2023-12-01

User class threw exception: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, njtest-cdh5-dn02.nj, executor 2): org.apache.spark.sql.AnalysisException: Decimal scale (12) cannot be greater than precision (1).;
at org.apache.spark.sql.types.DecimalType.<init>(DecimalType.scala:45)
at org.apache.spark.sql.types.DecimalType$.apply(DecimalType.scala:42)
at org.apache.spark.sql.types.DataTypes.createDecimalType(DataTypes.java:123)
at com.mongodb.spark.sql.MongoInferSchema$.com$mongodb$spark$sql$MongoInferSchema$$getDataType(MongoInferSchema.scala:248)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$com$mongodb$spark$sql$MongoInferSchema$$getSchemaFromDocument$1.apply(MongoInferSchema.scala:114)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$com$mongodb$spark$sql$MongoInferSchema$$getSchemaFromDocument$1.apply(MongoInferSchema.scala:114)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at com.mongodb.spark.sql.MongoInferSchema$.com$mongodb$spark$sql$MongoInferSchema$$getSchemaFromDocument(MongoInferSchema.scala:114)
at com.mongodb.spark.sql.MongoInferSchema$.com$mongodb$spark$sql$MongoInferSchema$$getDataType(MongoInferSchema.scala:231)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$getCompatibleArraySchema$1.apply(MongoInferSchema.scala:278)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$getCompatibleArraySchema$1.apply(MongoInferSchema.scala:274)
at scala.collection.IterableLike$class.takeWhile(IterableLike.scala:160)
at scala.collection.AbstractIterable.takeWhile(Iterable.scala:54)
at com.mongodb.spark.sql.MongoInferSchema$.getCompatibleArraySchema(MongoInferSchema.scala:274)
at com.mongodb.spark.sql.MongoInferSchema$.getSchemaFromArray(MongoInferSchema.scala:257)
at com.mongodb.spark.sql.MongoInferSchema$.com$mongodb$spark$sql$MongoInferSchema$$getDataType(MongoInferSchema.scala:227)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$com$mongodb$spark$sql$MongoInferSchema$$getSchemaFromDocument$1.apply(MongoInferSchema.scala:114)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$com$mongodb$spark$sql$MongoInferSchema$$getSchemaFromDocument$1.apply(MongoInferSchema.scala:114)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at com.mongodb.spark.sql.MongoInferSchema$.com$mongodb$spark$sql$MongoInferSchema$$getSchemaFromDocument(MongoInferSchema.scala:114)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$2.apply(MongoInferSchema.scala:78)
at com.mongodb.spark.sql.MongoInferSchema$$anonfun$2.apply(MongoInferSchema.scala:78)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:214)
at scala.collection.AbstractIterator.aggregate(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1135)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1135)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:796)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:796)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:

File-download-link修改好的jar 已经上传,请自行下载!!!

 类似资料: