spark读取hbase空指针异常,跪求大神指导
spark版本:1.2.1
hbase版本:0.98
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object HBaseTest {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("HBaseTest")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
//hbase-site.xml已加入到工程,并且打包到了jar包中,同样的配置,本地写了个程序能成功读取HBase,用spark就不好使了
conf.set("hbase.zookeeper.quorum", "node1,node2,node3")
conf.set("hbase.zookeeper.property.clientPort", "2181")
conf.set("hbase.master", "node1:60000")
conf.set(TableInputFormat.INPUT_TABLE, "student") //hbase中已创建student表
val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val count = hBaseRDD.count()
println("Users RDD Coun: " + count)
sc.stop()
}
}
提交方式如下:
./bin/spark-submit --class HBaseTest --master local[2] --num-executors 3 --driver-memory 512m --executor-memory 512m --executor-cores 1 /usr/local/share/testhbase.jar
出现以下异常,貌似没连上Hbase
15/07/20 20:59:45 INFO RecoverableZooKeeper: Process identifier=hconnection-0x633e79e7 connecting to ZooKeeper ensemble=node2:2181,node1:2181,node3:2181
15/07/20 20:59:45 INFO ClientCnxn: Opening socket connection to server node2.fd.h3c.com/192.38.18.102:2181. Will not attempt to authenticate using SASL (unknown error)
15/07/20 20:59:45 INFO ClientCnxn: Socket connection established to node2.fd.h3c.com/192.38.18.102:2181, initiating session
15/07/20 20:59:45 INFO ClientCnxn: Session establishment complete on server node2.fd.h3c.com/192.38.18.102:2181, sessionid = 0x24e9fc3f322001a, negotiated timeout = 40000
15/07/20 20:59:45 INFO ZooKeeperRegistry: ClusterId read in ZooKeeper is null
15/07/20 20:59:45 INFO RegionSizeCalculator: Calculating region sizes for table "student".
Error: application failed with exception
java.lang.RuntimeException: java.lang.NullPointerException
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:209)
at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:288)
at org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:268)
at org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:140)
at org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:135)
at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:802)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:200)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:85)
at org.apache.hadoop.hbase.client.MetaScanner.allTableRegions(MetaScanner.java:310)
at org.apache.hadoop.hbase.client.HTable.getRegionLocations(HTable.java:666)
at org.apache.hadoop.hbase.util.RegionSizeCalculator.<init>(RegionSizeCalculator.java:79)
at org.apache.hadoop.hbase.util.RegionSizeCalculator.<init>(RegionSizeCalculator.java:64)
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:160)
at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:98)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:220)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:218)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:218)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1335)
at org.apache.spark.rdd.RDD.count(RDD.scala:925)
at HBaseTest$.main(HBaseTest.scala:27)
at HBaseTest.main(HBaseTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:367)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:77)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NullPointerException
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.getMetaReplicaNodes(ZooKeeperWatcher.java:269)
at org.apache.hadoop.hbase.zookeeper.MetaRegionTracker.blockUntilAvailable(MetaRegionTracker.java:241)
at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getMetaRegionLocation(ZooKeeperRegistry.java:62)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateMeta(ConnectionManager.java:1213)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1174)
at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.getRegionLocations(RpcRetryingCallerWithReadReplicas.java:294)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:130)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:55)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:201)
... 28 more