1. 程式人生 > >String 部分原始碼分析

String 部分原始碼分析

String

  • 無引數建構函式
    /**
     * 底層儲存字串的目標位元組陣列,
     * Jdk 8 之前都是字元陣列 private final char[] value;
     */
    @Stable
    private final byte[] value;
    /**
     * 編碼底層位元組陣列的字符集,支援 LATIN1、UTF16
     */
    private final byte coder;
    /**
     * 字串的雜湊碼值,預設為 0
     */
    private int hash; // Default to 0

    /**
     * 建立一個空字串
     * created by ZXD at 18 Nov 2018 T 11:17:48
     */
    public String() {
        this.value = "".value;
        this.coder = "".coder;
    }    
  • 基於位元組陣列建立字串
    public String(byte[] bytes) {
        this(bytes, 0, bytes.length);
    }

    public String(byte bytes[], int offset, int length) {
        checkBoundsOffCount(offset, length, bytes.length);
        // 對目標位元組陣列進行編碼    
        StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
        // 獲取編碼後的位元組陣列
        this.value = ret.value;
        // 獲取編碼後的字符集
        this.coder = ret.coder;
    }

    public String(byte bytes[], Charset charset) {
        this(bytes, 0, bytes.length, charset);
    }
    
    public String(byte bytes[], int offset, int length, Charset charset) {
        // 防禦式程式設計,null 校驗
        if (charset == null)
            throw new NullPointerException("charset");
        checkBoundsOffCount(offset, length, bytes.length);
        // 根據指定的字符集對位元組陣列進行編碼
        StringCoding.Result ret =
            StringCoding.decode(charset, bytes, offset, length);
        this.value = ret.value;
        this.coder = ret.coder;
    }

    public String(byte bytes[], String charsetName)
            throws UnsupportedEncodingException {
        this(bytes, 0, bytes.length, charsetName);
    }

    public String(byte bytes[], int offset, int length, String charsetName)
            throws UnsupportedEncodingException {
        if (charsetName == null)
            throw new NullPointerException("charsetName");
        checkBoundsOffCount(offset, length, bytes.length);
        // 根據指定的字符集對位元組陣列進行編碼,編碼名稱錯誤時,丟擲 UnsupportedEncodingException 異常
        StringCoding.Result ret =
            StringCoding.decode(charsetName, bytes, offset, length);
        this.value = ret.value;
        this.coder = ret.coder;
    }
  • 基於字元陣列建立字串
    public String(char value[]) {
        this(value, 0, value.length, null);
    }

    public String(char value[], int offset, int count) {
        this(value, offset, count, rangeCheck(value, offset, count));
    }

    private static Void rangeCheck(char[] value, int offset, int count) {
        // 字串下標合法性校驗
        checkBoundsOffCount(offset, count, value.length);
        return null;
    }

    String(char[] value, int off, int len, Void sig) {
        // 特殊場景優化處理
        if (len == 0) {
            this.value = "".value;
            this.coder = "".coder;
            return;
        }
        if (COMPACT_STRINGS) {
            // 如果啟用壓縮,則將字元陣列壓縮,字符集設定為 LATIN1
            byte[] val = StringUTF16.compress(value, off, len);
            if (val != null) {
                this.value = val;
                this.coder = LATIN1;
                return;
            }
        }
        // 字元陣列不壓縮時,字符集設定為 UTF16
        this.coder = UTF16;
        this.value = StringUTF16.toBytes(value, off, len);
    }
  • 字串內容相等性比較
    public boolean equals(Object anObject) {
        // 地址相等則直接返回 true
        if (this == anObject) {
            return true;
        }
        // 形參物件為字串
        if (anObject instanceof String) {
            String aString = (String)anObject;
            // 字元編碼相同時才能做比較
            if (coder() == aString.coder()) {
                return isLatin1() ? StringLatin1.equals(value, aString.value)
                                  : StringUTF16.equals(value, aString.value);
            }
        }
        return false;
    }
  • 字串的長度
    public int length() {
        return value.length >> coder();
    }

    byte coder() {
        return COMPACT_STRINGS ? coder : UTF16;
    }

    @Native static final byte LATIN1 = 0;
    @Native static final byte UTF16  = 1; // Unicode字符集的抽象碼位對映為16位長的整數
  • 比較字串內容並且不區分大小寫
    public boolean equalsIgnoreCase(String anotherString) {
        return (this == anotherString) ? true
                : (anotherString != null) // 形參字串不為 null
                && (anotherString.length() == length()) // 兩個字串長度一致
                && regionMatches(true, 0, anotherString, 0, length()); // 編碼後的區域是否匹配
    }
  • 字串拼接
    public String concat(String str) {
        int olen = str.length();
        if (olen == 0) {
            return this;
        }

        // 字符集相同時,直接通過陣列拷貝進行拼接
        if (coder() == str.coder()) {
            byte[] val = this.value;
            byte[] oval = str.value;
            int len = val.length + oval.length;
            byte[] buf = Arrays.copyOf(val, len);
            System.arraycopy(oval, 0, buf, val.length, oval.length);
            return new String(buf, coder);
        }
        int len = length();
        // 使用 UTF16 編碼計算目標位元組陣列長度,並將它們都拷貝進去。
        byte[] buf = StringUTF16.newBytesFor(len + olen);
        getBytes(buf, 0, UTF16);
        str.getBytes(buf, len, UTF16);
        return new String(buf, UTF16);
    }
  • 字串擷取
    public String substring(int beginIndex, int endIndex) {
        int length = length();
        // 索引合法性檢測
        checkBoundsBeginEnd(beginIndex, endIndex, length);
        int subLen = endIndex - beginIndex;
        // 特殊場景優化處理,擷取的子字串就是目標字串
        if (beginIndex == 0 && endIndex == length) {
            return this;
        }
        return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
                          : StringUTF16.newString(value, beginIndex, subLen);
    }
    
    /**
     * 起始索引和結束索引不在 0到 length()-1 範圍內,則丟擲 IndexOutOfBoundsException 異常
     * 結束索引大於起始索引,則丟擲 IndexOutOfBoundsException 異常
     */ 
    static void checkBoundsBeginEnd(int begin, int end, int length) {
        if (begin < 0 || begin > end || end > length) {
            throw new StringIndexOutOfBoundsException(
                "begin " + begin + ", end " + end + ", length " + length);
        }
    }
  • 獲取字串中指定索引處的單個字元
    public char charAt(int index) {
        if (isLatin1()) {
            return StringLatin1.charAt(value, index);
        } else {
            return StringUTF16.charAt(value, index);
        }
    }

    StringLatin1#charAt
    public static char charAt(byte[] value, int index) {
        if (index < 0 || index >= value.length) {
            throw new StringIndexOutOfBoundsException(index);
        }
        return (char)(value[index] & 0xff);
    }
  • 目標字串是否包含子字串
    public boolean contains(CharSequence s) {
        return indexOf(s.toString()) >= 0;
    }
  • 字串是否為空
    public boolean isEmpty() {
        return value.length == 0;
    }
  • 字串替換
    public String replace(char oldChar, char newChar) {
        if (oldChar != newChar) {
            String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
                                    : StringUTF16.replace(value, oldChar, newChar);
            if (ret != null) {
                return ret;
            }
        }
        return this;
    }

    public String replace(CharSequence target, CharSequence replacement) {
        // 需要查詢的字元序列
        String tgtStr = target.toString();
        // 需要替換的字元序列
        String replStr = replacement.toString();
        // 如果要查詢的字元序列沒有在目標字串中,則返回其本身
        int j = indexOf(tgtStr);
        if (j < 0) {
            return this;
        }
        // 查詢字元序列的長度
        int tgtLen = tgtStr.length();
        int tgtLen1 = Math.max(tgtLen, 1);
        // 當期字串的長度
        int thisLen = length();

        int newLenHint = thisLen - tgtLen + replStr.length();
        if (newLenHint < 0) {
            throw new OutOfMemoryError();
        }
        StringBuilder sb = new StringBuilder(newLenHint);
        int i = 0;
        // 在 StringBuilder 指定的索引處追加字串,並重新獲取要查詢的子字串索引進行迴圈替換。
        do {
            sb.append(this, i, j).append(replStr);
            i = j + tgtLen;
        } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0);
        return sb.append(this, i, thisLen).toString();
    }
  • 基於正則表示式替換字串
    public String replaceAll(String regex, String replacement) {
        return Pattern.compile(regex).matcher(this).replaceAll(replacement);
    }
  • 基於正則表示式替換首次出現的字串
    public String replaceFirst(String regex, String replacement) {
        return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
    }