BeansDB Source Code Reading (2)

燕飞文

2023-12-01

http://code.google.com/p/beansdb-java/

BeansDB java

HashTree

package open.douban; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; /** * a hash tree similar to beansdb: * http://code.google.com/p/beansdb/source/browse/trunk/htree.c * ----------------------- 0(root) *---------------------- 1 ... 16 *--------------------- 17 ... *-------------------- 289 ... *------------------- 4913 ... *------------------ 83521 ... *----------------- 1419857 ... *---------------- 24137569 ... *--------------- 410338673 ... 4294967296 */ public class HashTree { public final static int g_index[] = { 0, 1, 17, 289, 4913, 83521, 1419857, 24137569, 410338673 }; // limit the height to avoid out of memory // otherwise we can not use array to store all the node in memory. public final static int MAX_HEIGHT = 6; public final static int MAX_ITEM_PER_NODE = 1 << 4 * (8 - MAX_HEIGHT); private final static int[] NUM_OF_MUTEX = new int[16]; static { NUM_OF_MUTEX[0] = 1; for (int i = 1; i < 16; i++) NUM_OF_MUTEX[i] = NUM_OF_MUTEX[i - 1] * 97; } private Node[] tree = null; private int height = 0; // tree height, always MAX_HEIGHT here private String path = null; public HashTree(String path) throws IOException { this.path = path; init(); } private void init() throws IOException { // height = MAX_HEIGHT; height = 0; tree = new Node[g_index[height + 1]]; createNode(0); // root File f = new File(path); if (f.exists()) { load(); } } /* ********************************************************* * * Inner class * * ******************************************************* */ public static class Item { int key; // track_id int hash_v; // gps number of a track_id in a given day } public static class Node { boolean isLeaf = false; int id = -1; // id in tree int height = 0; // node height in tree Item[] items = null; int item_cnt = 0; int hash_v = 0; } /** * breadth first transfer */ public abstract class BSTransfer { public void transfer() { for (Node n : tree) transfer(n); } public abstract void transfer(Node n); } /* ********************************************************* * * File operation * * ******************************************************* */ /** * save into disk : save Item one by one */ public void save() throws IOException { // 1) use a temp file and rename the file // 2) use FileChannel DataOutputStream out = new DataOutputStream(new BufferedOutputStream( new FileOutputStream(path))); for (Node n : tree) { if (n != null && n.isLeaf) { Item[] items = n.items; for (int i = 0; i < n.item_cnt; i++) { out.writeInt(items[i].key); out.writeInt(items[i].hash_v); } } } out.flush(); out.close(); } /** * load from disk : read all the Item into memory. construct tree. */ public void load() throws IOException { DataInputStream input = new DataInputStream(new BufferedInputStream( new FileInputStream(path))); File f = new File(path); int num = (int) (f.length() / 8); for (int i = 0; i < num; i++) { int key = input.readInt(); int hash_v = input.readInt(); this.addItem(key, hash_v, false); } } /* ********************************************************* * * Item operation * * ******************************************************* */ /** * not used in Meas. use {@link #updateItem(int, int)} instead. * * @param key * @param hash_v */ public void addItem(int key, int hash_v, boolean auto_save) throws IOException { Item item = new Item(); item.key = key; item.hash_v = hash_v; // use recursion instead of find and add Node leaf = findLeaf(key); addItem(leaf, item, auto_save, true); } private void addItem(Node leaf, Item item, boolean auto_save, boolean update) throws IOException { if (leaf.items == null) { leaf.items = new Item[MAX_ITEM_PER_NODE + 1]; leaf.item_cnt = 0; } boolean inst = insertSort(leaf.items, leaf.item_cnt, item); if (inst) { leaf.item_cnt++; leaf.hash_v += item.key * item.hash_v; if (update) updateParent(leaf, 1, item.key * item.hash_v); if (leaf.item_cnt > MAX_ITEM_PER_NODE) split(leaf); } if (auto_save) save(); } /** * a special API used in Meas Server. update hash value of given key by a * delta. if not exist, create an item. * * @param key * @param hash_v */ public void updateItem(int key, int delta_hash_v, boolean auto_save) throws IOException { // use recursion instead of find and update Node leaf = findLeaf(key); if (leaf.item_cnt == 0) { addItem(key, delta_hash_v, false); } else { int i = binarySearch(leaf.items, leaf.item_cnt, key); if (i != -1) { leaf.items[i].hash_v += delta_hash_v; } else { addItem(key, delta_hash_v, false); } } if (auto_save) save(); } public Item findItem(int key) { Node leaf = findLeaf(key); if (leaf.item_cnt == 0) return null; int i = binarySearch(leaf.items, leaf.item_cnt, key); return i != -1 ? leaf.items[i] : null; } /** * never used. * * @param key */ public void removeItem(int key, boolean auto_save) throws IOException { // use recursion instead of find and remove Node leaf = findLeaf(key); if (leaf.items == null) return; int i = binarySearch(leaf.items, leaf.item_cnt, key); if (i != -1) { leaf.item_cnt--; leaf.hash_v -= leaf.items[i].key * leaf.items[i].hash_v; for (int j = i; j < leaf.item_cnt; j++) leaf.items[j] = leaf.items[j + 1]; updateParent(leaf, -1, -leaf.items[i].key * leaf.items[i].hash_v); if (leaf != tree[0]) { Node parent = getParent(leaf); if (parent.item_cnt <= MAX_ITEM_PER_NODE) merge(parent); } } if (auto_save) save(); } private boolean insertSort(Item[] items, int length, Item item) { if (length == 0) { items[0] = item; return true; } // find position int i = length - 1; while (i >= 0 && items[i].key > item.key) { items[i + 1] = items[i]; i--; } if (i >= 0 && items[i].key == item.key) { items[i].hash_v = item.hash_v; return false; } else { items[i + 1] = item; return true; } } /** * @param items * @param length * @param key * @return index in item, -1 if not found */ private int binarySearch(Item[] items, int length, int key) { int l = 0, h = length; while (l <= h) { int m = (l + h) / 2; if (items[m].key == key) return m; else if (items[m].key < key) l = m + 1; else h = m - 1; } return -1; } /** * findLeaf && updateParent is double cost. If we can update parent in * finding leaf, then we can speed up by 1x. * * @param n * @param delta * @throws IOException */ private void updateParent(Node n, int item_delta, int hash_delta) throws IOException { Node parent = getParent(n); if (parent == null) return; parent.item_cnt += item_delta; int idx = n.id - child_id(parent, 0); hash_delta = hash_delta * NUM_OF_MUTEX[idx]; parent.hash_v += hash_delta; updateParent(parent, item_delta, hash_delta); // while ((parent = getParent(parent)) != null) { // parent.item_cnt += item_delta; // } } /* ********************************************************* * * Node operation * * ******************************************************* */ public final static int hex2int(char b) { if (('0' <= b && b <= '9') || ('a' <= b && b <= 'f')) { return (b >= 'a') ? (b - 'a' + 10) : (b - '0'); } else { return -1; } } public final static char[] int2hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; /** * @param dir * an octal number, each digit represent an index in a layer. E.g * 0x7e, the dir (path in the tree) is: root -> 7 child -> e * child. In our case, at most 6 octal number. "" means root node * dir. * @return if a leaf node, list all the items. [key, hash_v].otherwise, list * all the child node hash value and item count [i, hash_v, * item_cnt] */ public static final String BLANK = " "; public static final String LINE = "/n"; public String listDir(String dir) { Node n = tree[0]; for (int i = 0; i < dir.length(); i++) { int idx = hex2int(dir.charAt(i)); if (!n.isLeaf) n = this.getChild(n, idx); } StringBuilder sb = new StringBuilder(); if (n.isLeaf) { for (int i = 0; i < n.item_cnt; i++) { Item item = n.items[i]; sb.append(item.key).append(BLANK); sb.append(item.hash_v).append(LINE); } } else { for (int i = 0; i < 16; i++) { Node c = this.getChild(n, i); sb.append(int2hex[i]).append(BLANK); sb.append(c == null ? 0 : c.hash_v).append(BLANK); sb.append(c == null ? 0 : c.item_cnt).append(LINE); } } return sb.toString(); } private Node createNode(int id) { boolean enlarge = false; while (id >= g_index[height + 1]) { this.height++; enlarge = true; } assert height <= MAX_HEIGHT : "never enlarge the pool to " + MAX_HEIGHT; if (enlarge) { Node[] temp = new Node[g_index[height + 1]]; System.arraycopy(tree, 0, temp, 0, tree.length); tree = temp; } Node n = new Node(); n.id = id; n.isLeaf = true; int i = 1; for (; i < g_index.length; i++) { if (g_index[i] > id) { n.height = i - 1; break; } } tree[id] = n; return n; } /** * * @param n * @return */ private Node getParent(Node n) { if (n == tree[0]) return null; int parent_id = parent_id(n); return tree[parent_id]; } private Node getChild(Node n, int i) { int child_id = child_id(n, i); if (tree[child_id] == null) { createNode(child_id); } return tree[child_id]; } private int parent_id(Node n) { return g_index[n.height - 1] + ((n.id - g_index[n.height] >> 4)); } private int child_id(Node n, int index) { int cid = g_index[n.height + 1] + ((n.id - g_index[n.height]) << 4) + index; return cid; } private int index(int key, Node n) { // n.height is in [0,7], 8th layer do not have child // return 0x0f & (key >> (7 - n.height) * 4); return 0x0f & (key >> n.height * 4); } private Node findLeaf(int key) { Node n = tree[0]; while (!n.isLeaf) { int i = index(key, n); n = getChild(n, i); } return n; } private void split(Node n) throws IOException { assert n.isLeaf : "only leaf node can be split"; assert n.height < 8 : "inner node height is less than 8"; n.isLeaf = false; Node[] childs = new Node[16]; int first_child_id = child_id(n, 0); for (int i = 0; i < 16; i++) { childs[i] = createNode(first_child_id + i); // System.out.println(tree.length + " " + (first_child_id + i)); } int cnt = n.item_cnt; for (int i = 0; i < cnt; i++) { Item item = n.items[i]; n.items[i] = null; int j = index(item.key, n); addItem(childs[j], item, false, false); } } /** * never used in Meas. * * @param n */ private void merge(Node n) throws IOException { assert !n.isLeaf : "only inner node can be merged"; int first_child_id = child_id(n, 0); n.item_cnt = 0; // re-build for (int i = 0; i < 16; i++) { Node child = tree[first_child_id + i]; assert child.isLeaf : "only leaf node parent can be merged"; for (int j = 0; j < child.item_cnt; j++) addItem(n, child.items[j], false, false); tree[first_child_id + i] = null; } n.isLeaf = true; } }

Mirror

package open.douban; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; /** * a sync using hash tree similar to beansdb: * http://code.google.com/p/beansdb/source/browse/trunk/python/sync.py */ public class HashTreeMirror { private static int dir_flag = -1; public static void sync(HashTree h1, HashTree h2) throws IOException { mirror(h1, h2, ""); // h1 -> h2 mirror(h2, h1, ""); // h2 -> h1 } private static Map<Integer, Pair> parseStatus(String s) { HashMap<Integer, Pair> map = new HashMap<Integer, Pair>(); String[] split = s.split(HashTree.LINE); for (int i = 0; i < split.length; i++) { String[] ss = split[i].split(HashTree.BLANK); if (ss.length == 3) { Pair p = new Pair(); p.hash_v = Integer.parseInt(ss[1]); // hash_v p.item_cnt = Integer.parseInt(ss[2]); // item_cnt map.put(i, p); map.put(dir_flag, null); } else { Pair p = new Pair(); p.hash_v = Integer.parseInt(ss[1]); // hash_v p.item_cnt = 1; // item_cnt is 1 for single item map.put(Integer.parseInt(ss[0]), p); } } return map; } private static void mirror(HashTree src, HashTree dest, String path) throws IOException { String s1 = src.listDir(path); String s2 = dest.listDir(path); if (s1.equals(s2)) return; Map<Integer, Pair> srcMap = parseStatus(s1); Map<Integer, Pair> destMap = parseStatus(s2); if (srcMap.containsKey(dir_flag)) { for (int i = 0; i < 16; i++) { Pair p1 = srcMap.get(i); Pair p2 = destMap.get(i); if (!p1.equals(p2)) { mirror(src, dest, path + HashTree.int2hex[i]); } } } else if (destMap.containsKey(dir_flag)) { for (int i = 0; i < 16; i++) { mirror(dest, src, path + HashTree.int2hex[i]); } } else if (!srcMap.containsKey(dir_flag) && !destMap.containsKey(dir_flag)) { syncItem(src, dest, srcMap, destMap); } } private static void syncItem(HashTree src, HashTree dest, Map<Integer, Pair> srcMap, Map<Integer, Pair> destMap) throws IOException { Iterator<Integer> it = srcMap.keySet().iterator(); while (it.hasNext()) { Integer key = it.next(); Pair v = srcMap.get(key); Pair v2 = destMap.get(key); if (v2 == null) { System.out.println(key + " " + src + " --> " + dest); dest.addItem(key, v.hash_v, false); } else if (v.hash_v > v2.hash_v) { dest.updateItem(key, v.hash_v - v2.hash_v, false); } } } /* ********************************************************* * * Inner class * * ******************************************************* */ private static class Pair { int hash_v; int item_cnt; @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + hash_v; result = prime * result + item_cnt; return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; Pair other = (Pair) obj; if (hash_v != other.hash_v) return false; if (item_cnt != other.item_cnt) return false; return true; } } }

BeansDB Source Code Reading (2)

相关阅读

相关文章

相关问答

相关文档