1. 程式人生 > >如何用B+樹設計資料庫中的索引檔案

如何用B+樹設計資料庫中的索引檔案

宣告:
1、B+樹的程式碼不是我寫的,是網上的,關於java寫的B+樹的都是這個程式碼,我也不知道怎麼寫原作者。
2、如果不懂B+樹是肯定看不懂這篇blog的。
3、我在原有程式碼上簡單修改了兩個地方:第一、在葉子節點的屬性集合裡添加了file屬性。第二、在BplusTree裡添加了將葉子節點的連結串列儲存到檔案的程式碼

為什麼需要用B+樹設計索引檔案?

因為單個索引檔案太大了,當你進行查詢的時候需要開啟檔案,然後在磁碟上尋道,讀取檔案內容,這會花費大量的時間。所以我們需要通過B+樹來將原來的大的索引檔案分成很多個小的索引檔案。比如說,本來一個檔案有一億個int數。那當你查詢某一個數時要花費很長世間【要讀取所有的檔案內容】。但是如果你通過將一億個數分成一千個檔案,每個檔案平均一萬個數,然後B+數查詢的複雜度是log(一億),約為27。然後開啟葉子節點的檔案,再讀取這一萬個數,再用二分查詢,就可以減少大量的讀檔案內容時間。

不難看出,讀一億個數和讀一萬個數差距還是很大的。當然,索引檔案會佔用大量的儲存空間。【這裡就是用空間換時間】

怎麼實現呢?

不懂B+樹的同志需要自己去研究一下B+樹了。這裡就不多講了。

我們知道B+樹的葉子節點會有一個連結串列。那麼我們將資料生成B+樹之後完全可以將葉子節點的資料儲存到每一個葉子結點自己的檔案裡。

我是用的Java寫的,直接將連結串列通過物件序列化,寫到檔案裡。
然後將B+樹通過物件序列化寫到檔案裡。

下次查詢的時候,讀取檔案裡的B+樹,然後通過節點上的關鍵字最終決定開啟哪一個葉子節點的檔案。

Show me code!

這裡的B+樹程式碼是網上的,沒錯,大家搜java實現B+樹就能搜到。我只是稍作修改。

public interface B {
    public Object get(Comparable key);   //查詢
    public void remove(Comparable key);    //移除
    public void insertOrUpdate(Comparable key, Object obj); //插入或者更新,如果已經存在,就更新,否則插入
}
package IndexTree;

/**
 * Created by hms on 2016/12/12.
 */
import java.io.*;
import java.util.ArrayList;
import
java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; public class BplusTree implements B , Serializable{ /** 根節點 */ protected Node root; /** 階數,M值 */ protected int order; /** 葉子節點的連結串列頭*/ protected Node head; public Node getHead() { return head; } public void setHead(Node head) { this.head = head; } public Node getRoot() { return root; } public void setRoot(Node root) { this.root = root; } public int getOrder() { return order; } public void setOrder(int order) { this.order = order; } @Override public Object get(Comparable key) { return root.get(key); } @Override public void remove(Comparable key) { root.remove(key, this); } @Override public void insertOrUpdate(Comparable key, Object obj) { root.insertOrUpdate(key, obj, this); } public BplusTree(int order){ if (order < 3) { System.out.print("order must be greater than 2"); System.exit(0); } this.order = order; root = new Node(true, true); head = root; } //測試 public static void main(String[] args) throws IOException { BplusTree tree = new BplusTree(6); Random random = new Random(); long current = System.currentTimeMillis(); for (int j = 0; j < 100000; j++) { for (int i = 0; i < 100; i++) { int randomNumber = random.nextInt(1000); tree.insertOrUpdate(randomNumber, randomNumber); } for (int i = 0; i < 100; i++) { int randomNumber = random.nextInt(1000); tree.remove(randomNumber); } } long duration = System.currentTimeMillis() - current; System.out.println("time elpsed for duration: " + duration); int search = 80; System.out.print(tree.get(search)); Node next = tree.getHead(); int count = 0; while(true){ if(next == null) break; ++count; List<Entry<Comparable, Object>> entries = next.getEntries(); File file = new File(String.valueOf(count) + ".txt"); next.setFile(file); ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(file)); objectOutputStream.writeObject(next); objectOutputStream.close(); next = next.getNext(); } File treeFile = new File("BplusTree.txt"); ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(treeFile)); objectOutputStream.writeObject(tree); objectOutputStream.close(); } }
package IndexTree;

/**
 * Created by hms on 2016/12/12.
 */
import java.io.File;
import java.io.Serializable;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;

public class Node implements Serializable {

    /** 是否為葉子節點 */
    protected boolean isLeaf;

    /** 是否為根節點*/
    protected boolean isRoot;

    /** 父節點 */
    protected Node parent;

    /** 葉節點的前節點*/
    protected Node previous;

    /** 葉節點的後節點*/
    protected Node next;

    /** 節點的關鍵字 */
    protected List<Entry<Comparable, Object>> entries;

    /** 子節點 */
    protected List<Node> children;

    /**每個葉子節點對應的索引檔案*/
    protected File file;

    public Node(boolean isLeaf) {
        this.isLeaf = isLeaf;
        entries = new ArrayList<Entry<Comparable, Object>>();

        if (!isLeaf) {
            children = new ArrayList<Node>();
        }
    }

    public Node(boolean isLeaf, boolean isRoot) {
        this(isLeaf);
        this.isRoot = isRoot;
    }

    public Object get(Comparable key) {

        //如果是葉子節點
        if (isLeaf) {
            for (Entry<Comparable, Object> entry : entries) {
                if (entry.getKey().compareTo(key) == 0) {
                    //返回找到的物件
                    return entry.getValue();
                }
            }
            //未找到所要查詢的物件
            return null;

            //如果不是葉子節點
        }else {
            //如果key小於等於節點最左邊的key,沿第一個子節點繼續搜尋
            if (key.compareTo(entries.get(0).getKey()) <= 0) {
                return children.get(0).get(key);
                //如果key大於節點最右邊的key,沿最後一個子節點繼續搜尋
            }else if (key.compareTo(entries.get(entries.size()-1).getKey()) >= 0) {
                return children.get(children.size()-1).get(key);
                //否則沿比key大的前一個子節點繼續搜尋
            }else {
                for (int i = 0; i < entries.size(); i++) {
                    if (entries.get(i).getKey().compareTo(key) <= 0 && entries.get(i+1).getKey().compareTo(key) > 0) {
                        return children.get(i).get(key);
                    }
                }
            }
        }

        return null;
    }

    public void insertOrUpdate(Comparable key, Object obj, BplusTree tree){
        //如果是葉子節點
        if (isLeaf){
            //不需要分裂,直接插入或更新
            if (contains(key) || entries.size() < tree.getOrder()){
                insertOrUpdate(key, obj);
                if (parent != null) {
                    //更新父節點
                    parent.updateInsert(tree);
                }

                //需要分裂
            }else {
                //分裂成左右兩個節點
                Node left = new Node(true);
                Node right = new Node(true);
                //設定連結
                if (previous != null){
                    previous.setNext(left);
                    left.setPrevious(previous);
                }
                if (next != null) {
                    next.setPrevious(right);
                    right.setNext(next);
                }
                if (previous == null){
                    tree.setHead(left);
                }

                left.setNext(right);
                right.setPrevious(left);
                previous = null;
                next = null;

                //左右兩個節點關鍵字長度
                int leftSize = (tree.getOrder() + 1) / 2 + (tree.getOrder() + 1) % 2;
                int rightSize = (tree.getOrder() + 1) / 2;
                //複製原節點關鍵字到分裂出來的新節點
                insertOrUpdate(key, obj);
                for (int i = 0; i < leftSize; i++){
                    left.getEntries().add(entries.get(i));
                }
                for (int i = 0; i < rightSize; i++){
                    right.getEntries().add(entries.get(leftSize + i));
                }

                //如果不是根節點
                if (parent != null) {
                    //調整父子節點關係
                    int index = parent.getChildren().indexOf(this);
                    parent.getChildren().remove(this);
                    left.setParent(parent);
                    right.setParent(parent);
                    parent.getChildren().add(index,left);
                    parent.getChildren().add(index + 1, right);
                    setEntries(null);
                    setChildren(null);

                    //父節點插入或更新關鍵字
                    parent.updateInsert(tree);
                    setParent(null);
                    //如果是根節點
                }else {
                    isRoot = false;
                    Node parent = new Node(false, true);
                    tree.setRoot(parent);
                    left.setParent(parent);
                    right.setParent(parent);
                    parent.getChildren().add(left);
                    parent.getChildren().add(right);
                    setEntries(null);
                    setChildren(null);

                    //更新根節點
                    parent.updateInsert(tree);
                }


            }

            //如果不是葉子節點
        }else {
            //如果key小於等於節點最左邊的key,沿第一個子節點繼續搜尋
            if (key.compareTo(entries.get(0).getKey()) <= 0) {
                children.get(0).insertOrUpdate(key, obj, tree);
                //如果key大於節點最右邊的key,沿最後一個子節點繼續搜尋
            }else if (key.compareTo(entries.get(entries.size()-1).getKey()) >= 0) {
                children.get(children.size()-1).insertOrUpdate(key, obj, tree);
                //否則沿比key大的前一個子節點繼續搜尋
            }else {
                for (int i = 0; i < entries.size(); i++) {
                    if (entries.get(i).getKey().compareTo(key) <= 0 && entries.get(i+1).getKey().compareTo(key) > 0) {
                        children.get(i).insertOrUpdate(key, obj, tree);
                        break;
                    }
                }
            }
        }
    }

    /** 插入節點後中間節點的更新 */
    protected void updateInsert(BplusTree tree){

        validate(this, tree);

        //如果子節點數超出階數,則需要分裂該節點
        if (children.size() > tree.getOrder()) {
            //分裂成左右兩個節點
            Node left = new Node(false);
            Node right = new Node(false);
            //左右兩個節點關鍵字長度
            int leftSize = (tree.getOrder() + 1) / 2 + (tree.getOrder() + 1) % 2;
            int rightSize = (tree.getOrder() + 1) / 2;
            //複製子節點到分裂出來的新節點,並更新關鍵字
            for (int i = 0; i < leftSize; i++){
                left.getChildren().add(children.get(i));
                left.getEntries().add(new SimpleEntry(children.get(i).getEntries().get(0).getKey(), null));
                children.get(i).setParent(left);
            }
            for (int i = 0; i < rightSize; i++){
                right.getChildren().add(children.get(leftSize + i));
                right.getEntries().add(new SimpleEntry(children.get(leftSize + i).getEntries().get(0).getKey(), null));
                children.get(leftSize + i).setParent(right);
            }

            //如果不是根節點
            if (parent != null) {
                //調整父子節點關係
                int index = parent.getChildren().indexOf(this);
                parent.getChildren().remove(this);
                left.setParent(parent);
                right.setParent(parent);
                parent.getChildren().add(index,left);
                parent.getChildren().add(index + 1, right);
                setEntries(null);
                setChildren(null);

                //父節點更新關鍵字
                parent.updateInsert(tree);
                setParent(null);
                //如果是根節點
            }else {
                isRoot = false;
                Node parent = new Node(false, true);
                tree.setRoot(parent);
                left.setParent(parent);
                right.setParent(parent);
                parent.getChildren().add(left);
                parent.getChildren().add(right);
                setEntries(null);
                setChildren(null);

                //更新根節點
                parent.updateInsert(tree);
            }
        }
    }

    /** 調整節點關鍵字*/
    protected static void validate(Node node, BplusTree tree) {

        // 如果關鍵字個數與子節點個數相同
        if (node.getEntries().size() == node.getChildren().size()) {
            for (int i = 0; i < node.getEntries().size(); i++) {
                Comparable key = node.getChildren().get(i).getEntries().get(0).getKey();
                if (node.getEntries().get(i).getKey().compareTo(key) != 0) {
                    node.getEntries().remove(i);
                    node.getEntries().add(i, new SimpleEntry(key, null));
                    if(!node.isRoot()){
                        validate(node.getParent(), tree);
                    }
                }
            }
            // 如果子節點數不等於關鍵字個數但仍大於M / 2並且小於M,並且大於2
        } else if (node.isRoot() && node.getChildren().size() >= 2
                ||node.getChildren().size() >= tree.getOrder() / 2
                && node.getChildren().size() <= tree.getOrder()
                && node.getChildren().size() >= 2) {
            node.getEntries().clear();
            for (int i = 0; i < node.getChildren().size(); i++) {
                Comparable key = node.getChildren().get(i).getEntries().get(0).getKey();
                node.getEntries().add(new SimpleEntry(key, null));
                if (!node.isRoot()) {
                    validate(node.getParent(), tree);
                }
            }
        }
    }

    /** 刪除節點後中間節點的更新*/
    protected void updateRemove(BplusTree tree) {

        validate(this, tree);

        // 如果子節點數小於M / 2或者小於2,則需要合併節點
        if (children.size() < tree.getOrder() / 2 || children.size() < 2) {
            if (isRoot) {
                // 如果是根節點並且子節點數大於等於2,OK
                if (children.size() >= 2) {
                    return;
                    // 否則與子節點合併
                } else {
                    Node root = children.get(0);
                    tree.setRoot(root);
                    root.setParent(null);
                    root.setRoot(true);
                    setEntries(null);
                    setChildren(null);
                }
            } else {
                //計算前後節點
                int currIdx = parent.getChildren().indexOf(this);
                int prevIdx = currIdx - 1;
                int nextIdx = currIdx + 1;
                Node previous = null, next = null;
                if (prevIdx >= 0) {
                    previous = parent.getChildren().get(prevIdx);
                }
                if (nextIdx < parent.getChildren().size()) {
                    next = parent.getChildren().get(nextIdx);
                }

                // 如果前節點子節點數大於M / 2並且大於2,則從其處借補
                if (previous != null
                        && previous.getChildren().size() > tree.getOrder() / 2
                        && previous.getChildren().size() > 2) {
                    //前葉子節點末尾節點新增到首位
                    int idx = previous.getChildren().size() - 1;
                    Node borrow = previous.getChildren().get(idx);
                    previous.getChildren().remove(idx);
                    borrow.setParent(this);
                    children.add(0, borrow);
                    validate(previous, tree);
                    validate(this, tree);
                    parent.updateRemove(tree);

                    // 如果後節點子節點數大於M / 2並且大於2,則從其處借補
                } else if (next != null
                        && next.getChildren().size() > tree.getOrder() / 2
                        && next.getChildren().size() > 2) {
                    //後葉子節點首位新增到末尾
                    Node borrow = next.getChildren().get(0);
                    next.getChildren().remove(0);
                    borrow.setParent(this);
                    children.add(borrow);
                    validate(next, tree);
                    validate(this, tree);
                    parent.updateRemove(tree);

                    // 否則需要合併節點
                } else {
                    // 同前面節點合併
                    if (previous != null
                            && (previous.getChildren().size() <= tree.getOrder() / 2 || previous.getChildren().size() <= 2)) {

                        for (int i = previous.getChildren().size() - 1; i >= 0; i--) {
                            Node child = previous.getChildren().get(i);
                            children.add(0, child);
                            child.setParent(this);
                        }
                        previous.setChildren(null);
                        previous.setEntries(null);
                        previous.setParent(null);
                        parent.getChildren().remove(previous);
                        validate(this, tree);
                        parent.updateRemove(tree);

                        // 同後面節點合併
                    } else if (next != null
                            && (next.getChildren().size() <= tree.getOrder() / 2 || next.getChildren().size() <= 2)) {

                        for (int i = 0; i < next.getChildren().size(); i++) {
                            Node child = next.getChildren().get(i);
                            children.add(child);
                            child.setParent(this);
                        }
                        next.setChildren(null);
                        next.setEntries(null);
                        next.setParent(null);
                        parent.getChildren().remove(next);
                        validate(this, tree);
                        parent.updateRemove(tree);
                    }
                }
            }
        }
    }

    public void remove(Comparable key, BplusTree tree){
        //如果是葉子節點
        if (isLeaf){

            //如果不包含該關鍵字,則直接返回
            if (!contains(key)){
                return;
            }

            //如果既是葉子節點又是跟節點,直接刪除
            if (isRoot) {
                remove(key);
            }else {
                //如果關鍵字數大於M / 2,直接刪除
                if (entries.size() > tree.getOrder() / 2 && entries.size() > 2) {
                    remove(key);
                }else {
                    //如果自身關鍵字數小於M / 2,並且前節點關鍵字數大於M / 2,則從其處借補
                    if (previous != null
                            && previous.getEntries().size() > tree.getOrder() / 2
                            && previous.getEntries().size() > 2
                            && previous.getParent() == parent) {
                        int size = previous.getEntries().size();
                        Entry<Comparable, Object> entry = previous.getEntries().get(size - 1);
                        previous.getEntries().remove(entry);
                        //新增到首位
                        entries.add(0, entry);
                        remove(key);
                        //如果自身關鍵字數小於M / 2,並且後節點關鍵字數大於M / 2,則從其處借補
                    }else if (next != null
                            && next.getEntries().size() > tree.getOrder() / 2
                            && next.getEntries().size() > 2
                            && next.getParent() == parent) {
                        Entry<Comparable, Object> entry = next.getEntries().get(0);
                        next.getEntries().remove(entry);
                        //新增到末尾
                        entries.add(entry);
                        remove(key);
                        //否則需要合併葉子節點
                    }else {
                        //同前面節點合併
                        if (previous != null
                                && (previous.getEntries().size() <= tree.getOrder() / 2 || previous.getEntries().size() <= 2)
                                && previous.getParent() == parent) {
                            for (int i = previous.getEntries().size() - 1; i >=0; i--) {
                                //從末尾開始新增到首位
                                entries.add(0, previous.getEntries().get(i));
                            }
                            remove(key);
                            previous.setParent(null);
                            previous.setEntries(null);
                            parent.getChildren().remove(previous);
                            //更新連結串列
                            if (previous.getPrevious() != null) {
                                Node temp = previous;
                                temp.getPrevious().setNext(this);
                                previous = temp.getPrevious();
                                temp.setPrevious(null);
                                temp.setNext(null);
                            }else {
                                tree.setHead(this);
                                previous.setNext(null);
                                previous = null;
                            }
                            //同後面節點合併
                        }else if(next != null
                                && (next.getEntries().size() <= tree.getOrder() / 2 || next.getEntries().size() <= 2)
                                && next.getParent() == parent){
                            for (int i = 0; i < next.getEntries().size(); i++) {
                                //從首位開始新增到末尾
                                entries.add(next.getEntries().get(i));
                            }
                            remove(key);
                            next.setParent(null);
                            next.setEntries(null);
                            parent.getChildren().remove(next);
                            //更新連結串列
                            if (next.getNext() != null) {
                                Node temp = next;
                                temp.getNext().setPrevious(this);
                                next = temp.getNext();
                                temp.setPrevious(null);
                                temp.setNext(null);
                            }else {
                                next.setPrevious(null);
                                next = null;
                            }
                        }
                    }
                }
                parent.updateRemove(tree);
            }
            //如果不是葉子節點
        }else {
            //如果key小於等於節點最左邊的key,沿第一個子節點繼續搜尋
            if (key.compareTo(entries.get(0).getKey()) <= 0) {
                children.get(0).remove(key, tree);
                //如果key大於節點最右邊的key,沿最後一個子節點繼續搜尋
            }else if (key.compareTo(entries.get(entries.size()-1).getKey()) >= 0) {
                children.get(children.size()-1).remove(key, tree);
                //否則沿比key大的前一個子節點繼續搜尋
            }else {
                for (int i = 0; i < entries.size(); i++) {
                    if (entries.get(i).getKey().compareTo(key) <= 0 && entries.get(i+1).getKey().compareTo(key) > 0) {
                        children.get(i).remove(key, tree);
                        break;
                    }
                }
            }
        }
    }

    /** 判斷當前節點是否包含該關鍵字*/
    protected boolean contains(Comparable key) {
        for (Entry<Comparable, Object> entry : entries) {
            if (entry.getKey().compareTo(key) == 0) {
                return true;
            }
        }
        return false;
    }

    /** 插入到當前節點的關鍵字中*/
    protected void insertOrUpdate(Comparable key, Object obj){
        Entry<Comparable, Object> entry = new SimpleEntry<Comparable, Object>(key, obj);
        //如果關鍵字列表長度為0,則直接插入
        if (entries.size() == 0) {
            entries.add(entry);
            return;
        }
        //否則遍歷列表
        for (int i = 0; i < entries.size(); i++) {
            //如果該關鍵字鍵值已存在,則更新
            if (entries.get(i).getKey().compareTo(key) == 0) {
                entries.get(i).setValue(obj);
                return;
                //否則插入
            }else if (entries.get(i).getKey().compareTo(key) > 0){
                //插入到鏈首
                if (i == 0) {
                    entries.add(0, entry);
                    return;
                    //插入到中間
                }else {
                    entries.add(i, entry);
                    return;
                }
            }
        }
        //插入到末尾
        entries.add(entries.size(), entry);
    }

    /** 刪除節點*/
    protected void remove(Comparable key){
        int index = -1;
        for (int i = 0; i < entries.size(); i++) {
            if (entries.get(i).getKey().compareTo(key) == 0) {
                index = i;
                break;
            }
        }
        if (index != -1) {
            entries.remove(index);
        }
    }

    public Node getPrevious() {
        return previous;
    }

    public void setPrevious(Node previous) {
        this.previous = previous;
    }

    public Node getNext() {
        return next;
    }

    public void setNext(Node next) {
        this.next = next;
    }

    public boolean isLeaf() {
        return isLeaf;
    }

    public void setLeaf(boolean isLeaf) {
        this.isLeaf = isLeaf;
    }

    public Node getParent() {
        return parent;
    }

    public void setParent(Node parent) {
        this.parent = parent;
    }

    public List<Entry<Comparable, Object>> getEntries() {
        return entries;
    }

    public void setEntries(List<Entry<Comparable, Object>> entries) {
        this.entries = entries;
    }

    public List<Node> getChildren() {
        return children;
    }

    public void setChildren(List<Node> children) {
        this.children = children;
    }

    public boolean isRoot() {
        return isRoot;
    }

    public void setRoot(boolean isRoot) {
        this.isRoot = isRoot;
    }

    public File getFile() {
        return file;
    }

    public void setFile(File file) {
        this.file = this.file;
    }

    public String toString(){
        StringBuilder sb = new StringBuilder();
        sb.append("isRoot: ");
        sb.append(isRoot);
        sb.append(", ");
        sb.append("isLeaf: ");
        sb.append(isLeaf);
        sb.append(", ");
        sb.append("keys: ");
        for (Entry entry : entries){
            sb.append(entry.getKey());
            sb.append(", ");
        }
        sb.append(", ");
        return sb.toString();
    }

}

生成的檔案,截個圖:

這裡寫圖片描述