具体我的部分报错信息如下:
[INFO] org.apache.hadoop.conf.Configuration.deprecation(1129) mapred.tip.id is deprecated. Instead, use mapreduce.task.id [INFO] org.apache.hadoop.conf.Configuration.deprecation(1129) mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id [INFO] org.apache.hadoop.conf.Configuration.deprecation(1129) mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap [INFO] org.apache.hadoop.conf.Configuration.deprecation(1129) mapred.task.partition is deprecated. Instead, use mapreduce.task.partition [INFO] org.apache.hadoop.conf.Configuration.deprecation(1129) mapred.job.id is deprecated. Instead, use mapreduce.job.id [ERROR] org.apache.spark.executor.Executor(96) Exception in task 0.0 in stage 0.0 (TID 0) java.lang.UnsatisfiedLinkError: org.apache.hadoop.util.NativeCrc32.nativeComputeChunkedSums(IILjava/nio/ByteBuffer;ILjava/nio/ByteBuffer;IILjava/lang/String;JZ)V at org.apache.hadoop.util.NativeCrc32.nativeComputeChunkedSums(Native Method) at org.apache.hadoop.util.NativeCrc32.verifyChunkedSums(NativeCrc32.java:59) at org.apache.hadoop.util.DataChecksum.verifyChunkedSums(DataChecksum.java:301) at org.apache.hadoop.hdfs.RemoteBlockReader2.readNextPacket(RemoteBlockReader2.java:216) at org.apache.hadoop.hdfs.RemoteBlockReader2.read(RemoteBlockReader2.java:146) at org.apache.hadoop.hdfs.DFSInputStream$ByteArrayStrategy.doRead(DFSInputStream.java:734) at org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:790) at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:848) at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:889) at java.io.DataInputStream.read(DataInputStream.java:149) at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62) at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216) at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174) at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94) at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:208) at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:246) at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:48) at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:248) at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:216) at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) [ERROR] org.apache.spark.util.SparkUncaughtExceptionHandler(96) Uncaught exception in thread Thread[Executor task launch worker-0,5,main] java.lang.UnsatisfiedLinkError: org.apache.hadoop.util.NativeCrc32.nativeComputeChunkedSums(IILjava/nio/ByteBuffer;ILjava/nio/ByteBuffer;IILjava/lang/String;JZ)V at org.apache.hadoop.util.NativeCrc32.nativeComputeChunkedSums(Native Method) at org.apache.hadoop.util.NativeCrc32.verifyChunkedSums(NativeCrc32.java:59) at org.apache.hadoop.util.DataChecksum.verifyChunkedSums(DataChecksum.java:301) at org.apache.hadoop.hdfs.RemoteBlockReader2.readNextPacket(RemoteBlockReader2.java:216) at org.apache.hadoop.hdfs.RemoteBlockReader2.read(RemoteBlockReader2.java:146) at org.apache.hadoop.hdfs.DFSInputStream$ByteArrayStrategy.doRead(DFSInputStream.java:734) at org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:790) at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:848) at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:889) at java.io.DataInputStream.read(DataInputStream.java:149) at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62) at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216) at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174) at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94) at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:208) at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:246) at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:48) at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:248) at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:216) at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
查看了这里:https://issues.apache.org/jira/browse/HADOOP-11064
最后解决办法为: window 下开发,需要使用 hadoop.dll, winutils.exe 等,将此版本升级到与 项目所使用版本一致,即可解决。