spark读取hbase空指针异常,跪求大神指导

dingyuanpu 2015-07-20 10:17:18
spark版本:1.2.1
hbase版本:0.98

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

object HBaseTest {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("HBaseTest")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()

//hbase-site.xml已加入到工程,并且打包到了jar包中,同样的配置,本地写了个程序能成功读取HBase,用spark就不好使了
conf.set("hbase.zookeeper.quorum", "node1,node2,node3")
conf.set("hbase.zookeeper.property.clientPort", "2181")
conf.set("hbase.master", "node1:60000")

conf.set(TableInputFormat.INPUT_TABLE, "student") //hbase中已创建student表

val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])

val count = hBaseRDD.count()
println("Users RDD Coun: " + count)
sc.stop()
}
}

提交方式如下:
./bin/spark-submit --class HBaseTest --master local[2] --num-executors 3 --driver-memory 512m --executor-memory 512m --executor-cores 1 /usr/local/share/testhbase.jar


出现以下异常,貌似没连上Hbase
15/07/20 20:59:45 INFO RecoverableZooKeeper: Process identifier=hconnection-0x633e79e7 connecting to ZooKeeper ensemble=node2:2181,node1:2181,node3:2181
15/07/20 20:59:45 INFO ClientCnxn: Opening socket connection to server node2.fd.h3c.com/192.38.18.102:2181. Will not attempt to authenticate using SASL (unknown error)
15/07/20 20:59:45 INFO ClientCnxn: Socket connection established to node2.fd.h3c.com/192.38.18.102:2181, initiating session
15/07/20 20:59:45 INFO ClientCnxn: Session establishment complete on server node2.fd.h3c.com/192.38.18.102:2181, sessionid = 0x24e9fc3f322001a, negotiated timeout = 40000
15/07/20 20:59:45 INFO ZooKeeperRegistry: ClusterId read in ZooKeeper is null
15/07/20 20:59:45 INFO RegionSizeCalculator: Calculating region sizes for table "student".
Error: application failed with exception
java.lang.RuntimeException: java.lang.NullPointerException
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:209)
at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:288)
at org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:268)
at org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:140)
at org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:135)
at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:802)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:200)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:85)
at org.apache.hadoop.hbase.client.MetaScanner.allTableRegions(MetaScanner.java:310)
at org.apache.hadoop.hbase.client.HTable.getRegionLocations(HTable.java:666)
at org.apache.hadoop.hbase.util.RegionSizeCalculator.<init>(RegionSizeCalculator.java:79)
at org.apache.hadoop.hbase.util.RegionSizeCalculator.<init>(RegionSizeCalculator.java:64)
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:160)
at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:98)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:220)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:218)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:218)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1335)
at org.apache.spark.rdd.RDD.count(RDD.scala:925)
at HBaseTest$.main(HBaseTest.scala:27)
at HBaseTest.main(HBaseTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:367)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:77)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NullPointerException
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.getMetaReplicaNodes(ZooKeeperWatcher.java:269)
at org.apache.hadoop.hbase.zookeeper.MetaRegionTracker.blockUntilAvailable(MetaRegionTracker.java:241)
at org.apache.hadoop.hbase.client.ZooKeeperRegistry.getMetaRegionLocation(ZooKeeperRegistry.java:62)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateMeta(ConnectionManager.java:1213)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1174)
at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.getRegionLocations(RpcRetryingCallerWithReadReplicas.java:294)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:130)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:55)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:201)
... 28 more
...全文
1458 4 打赏 收藏 转发到动态 举报
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
ybao123 2018-09-27
  • 打赏
  • 举报
回复
楼主解决了吗,我在本地用集群操作java新建hbase表就可以运行,但是切换到公司集群就报了上面的错误,没有头绪
  • 打赏
  • 举报
回复
出现这个异常很可能是因为HBase里面配置的zookeeper.znode.parent节点信息和Zookeeper里面的保存HBase节点信息的节点名称不一致(可以用zooCli来进行查看)。
c00700703 2015-08-22
  • 打赏
  • 举报
回复
楼主问题解决了吗,我也遇到类似的问题,找不到原因。
pandaSLP 2015-08-05
  • 打赏
  • 举报
回复
楼主问题解决了吗,我也遇到类似的问题,找不到原因。

1,261

社区成员

发帖
与我相关
我的任务
社区描述
Spark由Scala写成,是UC Berkeley AMP lab所开源的类Hadoop MapReduce的通用的并行计算框架,Spark基于MapReduce算法实现的分布式计算。
社区管理员
  • Spark
  • shiter
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧