版本/平台原因,SparkSql无法读取实时ORC分区表,采用Pyhs2读取
import pyhs2
#连接hive客户端
def get_hiveserver_connect():
con=pyhs2.connect(host="localhost",
port=8000,
authMechanism="KERBEROS",
user="xxxx",
database="default")
return con
#执行语句,返回 List[Dict]
def get_hive_result(consql):
con = get_hiveserver_connect()
print con
cur = con.cursor()
print cur
cur.execute(consql)
columns=[]
list_data=[]
#获取列字段信息
for j in cur.getSchema():
columns.append(j['columnName'])
data_list=cur.fetch()
#将字段信息和数据映射为字典
for data in data_list:
list_data.append(dict(zip(columns,data)))
con.close()
return list_data
consql = "select pv,uv from test1"
print get_hive_result(consql)
# Return: [{'uv': 200, 'pv': 100}, {'uv': 220, 'pv': 110}]