How to read from hbase using spark
TableInputFormat has the following attributes:
SCAN_ROW_START
SCAN_ROW_STOP
conf.set(TableInputFormat.SCAN_ROW_START, "startrowkey")
conf.set(TableInputFormat.SCAN_ROW_STOP, "stoprowkey")
val myConf = HBaseConfiguration.create()
myConf.set("hbase.zookeeper.quorum", "master4,slave41,slave42")
myConf.set("hbase.master", "master4:60000")
myConf.set("hbase.zookeeper.property.clientPort", "2181")
myConf.set("hbase.defaults.for.version.skip", "true")
myConf.set(TableInputFormat.INPUT_TABLE, "test")
myConf.set(TableInputFormat.SCAN_ROW_START,"1")
myConf.set(TableInputFormat.SCAN_ROW_STOP,"2")//不会取到stopRowkey
myConf.set(TableInputFormat.SCAN_COLUMNS, "cf:1Column")
val hbaseRDD = sc.newAPIHadoopRDD(myConf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val count = hbaseRDD.count()
val newHbaseRDD = hbaseRDD.map{ case(_,result) =>
val key = Bytes.toString(result.getRow)
key
}
newHbaseRDD.collect().foreach(
println(_)
)
//output
1_0
1_1
1_2