-----------------
Apache Gora是一个开源的ORM框架,主要为大数据提供内存数据模型与数据的持久化。目前Gora支持对于列数据、key-value数据,文档数据与RDBMS数据的存储,还支持使用Apache Hadoop来对对大数据进行分析。
虽然目前市面上有很多不错的关系数据库的ORM框架,但是基于数据模型的框架如JDO还是有一些不足,如对于列数据模型的存储与持久化。Gora正好弥补了这个问题,它能使用户很容易对大数据时行 内存建模与持久化,而且支持Hadoop来对大数据进行分析。
Gora源代码以模块的形式来组织,其中gora-core是主要核心模块。所有其它模块都依赖这个核心模块,当然你可以扩展自己的模块,当前实现的模块如下
$ bin/start-hbase.sh
bin/hbase shell
$ tar zxvf src/main/resources/access.log.tar.gz -C src/main/resources/
88.254.190.73 - - [10/Mar/2009:20:40:26 +0200] "GET / HTTP/1.1" 200 43 "http://www.buldinle.com/" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; GTB5; .NET CLR 2.0.50727; InfoPath.2)"
78.179.56.27 - - [11/Mar/2009:00:07:40 +0200] "GET /index.php?i=3&a=1__6x39kovbji8&k=3750105 HTTP/1.1" 200 43 "http://www.buldinle.com/index.php?i=3&a=1__6X39Kovbji8&k=3750105" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; OfficeLiveConnector.1.3; OfficeLivePatch.0.0)"
78.163.99.14 - - [12/Mar/2009:18:18:25 +0200] "GET /index.php?a=3__x7l72c&k=4476881 HTTP/1.1" 200 43 "http://www.buldinle.com/index.php?a=3__x7l72c&k=4476881" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; InfoPath.1)"
{
"type": "record",
"name": "Pageview",
"namespace": "org.apache.gora.tutorial.log.generated",
"fields" : [
{"name": "url", "type": "string"},
{"name": "timestamp", "type": "long"},
{"name": "ip", "type": "string"},
{"name": "httpMethod", "type": "string"},
{"name": "httpStatusCode", "type": "int"},
{"name": "responseSize", "type": "int"},
{"name": "referrer", "type": "string"},
{"name": "userAgent", "type": "string"}
]
}
$ bin/gora compile
$ Usage: SpecificCompiler <schema file> <output dir>
$ bin/gora compile gora-tutorial/src/main/avro/pageview.json gora-tutorial/src/main/java/
public class Pageview extends PersistentBase {
private Utf8 url;
private long timestamp;
private Utf8 ip;
private Utf8 httpMethod;
private int httpStatusCode;
private int responseSize;
private Utf8 referrer;
private Utf8 userAgent;
...
public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\", ... ");
public static enum Field {
URL(0,"url"),
TIMESTAMP(1,"timestamp"),
IP(2,"ip"),
HTTP_METHOD(3,"httpMethod"),
HTTP_STATUS_CODE(4,"httpStatusCode"),
RESPONSE_SIZE(5,"responseSize"),
REFERRER(6,"referrer"),
USER_AGENT(7,"userAgent"),
;
private int index;
private String name;
Field(int index, String name) {this.index=index;this.name=name;}
public int getIndex() {return index;}
public String getName() {return name;}
public String toString() {return name;}
};
public static final String[] _ALL_FIELDS = {"url","timestamp","ip","httpMethod"
,"httpStatusCode","responseSize","referrer","userAgent",};
...
}
<gora-orm>
<table name="Pageview"> <!-- optional descriptors for tables -->
<family name="common"/> <!-- This can also have params like compression, bloom filters -->
<family name="http"/>
<family name="misc"/>
</table>
<class name="org.apache.gora.tutorial.log.generated.Pageview" keyClass="java.lang.Long" table="AccessLog">
<field name="url" family="common" qualifier="url"/>
<field name="timestamp" family="common" qualifier="timestamp"/>
<field name="ip" family="common" qualifier="ip" />
<field name="httpMethod" family="http" qualifier="httpMethod"/>
<field name="httpStatusCode" family="http" qualifier="httpStatusCode"/>
<field name="responseSize" family="http" qualifier="responseSize"/>
<field name="referrer" family="misc" qualifier="referrer"/>
<field name="userAgent" family="misc" qualifier="userAgent"/>
</class>
...
</gora-orm>
$ bin/gora logmanager
which lists the usage as:
LogManager -parse <input_log_file>
-get <lineNum>
-query <lineNum>
-query <startLineNum> <endLineNum>
-delete <lineNum>
-deleteByQuery <startLineNum> <endLineNum>
$ bin/gora logmanager -parse gora-tutorial/src/main/resources/access.log
hbase(main):004:0> scan 'AccessLog', {LIMIT=>1}
ROW COLUMN+CELL
\x00\x00\x00\x00\x00\x00\x00\x00 column=common:ip, timestamp=1342791952462, value=88.240.129.183
\x00\x00\x00\x00\x00\x00\x00\x00 column=common:timestamp, timestamp=1342791952462, value=\x00\x00\x01\x1F\xF1\xAElP
\x00\x00\x00\x00\x00\x00\x00\x00 column=common:url, timestamp=1342791952462, value=/index.php?a=1__wwv40pdxdpo&k=218978
\x00\x00\x00\x00\x00\x00\x00\x00 column=http:httpMethod, timestamp=1342791952462, value=GET
\x00\x00\x00\x00\x00\x00\x00\x00 column=http:httpStatusCode, timestamp=1342791952462, value=\x00\x00\x00\xC8
\x00\x00\x00\x00\x00\x00\x00\x00 column=http:responseSize, timestamp=1342791952462, value=\x00\x00\x00+
\x00\x00\x00\x00\x00\x00\x00\x00 column=misc:referrer, timestamp=1342791952462, value=http://www.buldinle.com/index.php?a=1__WWV
40pdxdpo&k=218978
\x00\x00\x00\x00\x00\x00\x00\x00 column=misc:userAgent, timestamp=1342791952462, value=Mozilla/4.0 (compatible; MSIE 6.0; Window
s NT 5.1)
1 row(s) in 0.0180 seconds
public LogManager() {
try {
init();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private void init() throws IOException {
dataStore = DataStoreFactory.getDataStore(Long.class, Pageview.class);
}
private void parse(String input) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new FileReader(input));
long lineCount = 0;
try {
String line = reader.readLine();
do {
Pageview pageview = parseLine(line);
if(pageview != null) {
//store the pageview
storePageview(lineCount++, pageview);
}
line = reader.readLine();
} while(line != null);
} finally {
reader.close();
}
}
private Pageview parseLine(String line) throws ParseException {
StringTokenizer matcher = new StringTokenizer(line);
//parse the log line
String ip = matcher.nextToken();
...
//construct and return pageview object
Pageview pageview = new Pageview();
pageview.setIp(new Utf8(ip));
pageview.setTimestamp(timestamp);
...
return pageview;
}
/** Stores the pageview object with the given key */
private void storePageview(long key, Pageview pageview) throws IOException {
dataStore.put(key, pageview);
}
private void close() throws IOException {
//It is very important to close the datastore properly, otherwise
//some data loss might occur.
if(dataStore != null)
dataStore.close();
}
/** Fetches a single pageview object and prints it*/
private void get(long key) throws IOException {
Pageview pageview = dataStore.get(key);
printPageview(pageview);
}
/** Queries and prints pageview object that have keys between startKey and endKey*/
private void query(long startKey, long endKey) throws IOException {
Query<Long, Pageview> query = dataStore.newQuery();
//set the properties of query
query.setStartKey(startKey);
query.setEndKey(endKey);
Result<Long, Pageview> result = query.execute();
printResult(result);
}
/**Deletes the pageview with the given line number */
private void delete(long lineNum) throws Exception {
dataStore.delete(lineNum);
dataStore.flush(); //write changes may need to be flushed before
//they are committed
}
/** This method illustrates delete by query call */
private void deleteByQuery(long startKey, long endKey) throws IOException {
//Constructs a query from the dataStore. The matching rows to this query will be deleted
QueryLong, Pageview> query = dataStore.newQuery();
//set the properties of query
query.setStartKey(startKey);
query.setEndKey(endKey);
dataStore.deleteByQuery(query);
}