1. 程式人生 > >Boost中string的split用法

Boost中string的split用法

標頭檔案

boost/algorithm/string/split.hpp

作用

string的split包括如下API

find_all:在字串string str中查詢string sub,返回找到的所有的 sub。

ifind_all:在字串string str中查詢string sub,同時忽略大小,返回找到的所有的 sub。

split:將字串按照某些條件,進行分割,返回分割後的字串列表

舉例

#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
// equals predicate is used for result comparison
#include <boost/algorithm/string/predicate.hpp>

// Include unit test framework
#include <boost/test/unit_test.hpp>

#include <string>
#include <vector>
#include <list>
#include <iostream>

#include <boost/test/test_tools.hpp>


using namespace std;
using namespace boost;

template< typename T1, typename T2 >
void deep_compare( const T1& X, const T2& Y )
{
    BOOST_REQUIRE( X.size() == Y.size() );
    for( unsigned int nIndex=0; nIndex<X.size(); ++nIndex )
    {
        BOOST_CHECK( equals( X[nIndex], Y[nIndex] ) );
    }
}

void iterator_test()
{
    string str1("xx-abc--xx-abb");
    string str2("Xx-abc--xX-abb-xx");
    string str3("xx");
    string strempty("");
    const char* pch1="xx-abc--xx-abb";
    vector<string> tokens;
    vector< vector<int> > vtokens;
    
    // find_all tests
    find_all(
        tokens,
        pch1,
        "xx" );

    BOOST_REQUIRE( tokens.size()==2 );
    BOOST_CHECK( tokens[0]==string("xx") );
    BOOST_CHECK( tokens[1]==string("xx") );

    ifind_all(
        tokens,
        str2,
        "xx" );

    BOOST_REQUIRE( tokens.size()==3 );
    BOOST_CHECK( tokens[0]==string("Xx") );
    BOOST_CHECK( tokens[1]==string("xX") );
    BOOST_CHECK( tokens[2]==string("xx") );

    find_all(
        tokens,
        str1,
        "xx" );

    BOOST_REQUIRE( tokens.size()==2 );
    BOOST_CHECK( tokens[0]==string("xx") );
    BOOST_CHECK( tokens[1]==string("xx") );

    find_all(
        vtokens,
        str1,
        string("xx") );
    deep_compare( tokens, vtokens );

    // split tests
    split(
        tokens,
        str2,
        is_any_of("xX"),
        token_compress_on );

    BOOST_REQUIRE( tokens.size()==4 );
    BOOST_CHECK( tokens[0]==string("") );
    BOOST_CHECK( tokens[1]==string("-abc--") );
    BOOST_CHECK( tokens[2]==string("-abb-") );
    BOOST_CHECK( tokens[3]==string("") );

    split(
        tokens,
        pch1,
        is_any_of("x"),
        token_compress_on );

    BOOST_REQUIRE( tokens.size()==3 );
    BOOST_CHECK( tokens[0]==string("") );
    BOOST_CHECK( tokens[1]==string("-abc--") );
    BOOST_CHECK( tokens[2]==string("-abb") );

    split(
        vtokens,
        str1,
        is_any_of("x"),
        token_compress_on );
    deep_compare( tokens, vtokens );

    split(
        tokens,
        str1,
        is_punct(),
        token_compress_off );

    BOOST_REQUIRE( tokens.size()==5 );
    BOOST_CHECK( tokens[0]==string("xx") );
    BOOST_CHECK( tokens[1]==string("abc") );
    BOOST_CHECK( tokens[2]==string("") );
    BOOST_CHECK( tokens[3]==string("xx") );
    BOOST_CHECK( tokens[4]==string("abb") );

    split(
        tokens,
        str3,
        is_any_of(","),
        token_compress_off);

    BOOST_REQUIRE( tokens.size()==1 );
    BOOST_CHECK( tokens[0]==string("xx") );

    split(
        tokens,
        strempty,
        is_punct(),
        token_compress_off);

    BOOST_REQUIRE( tokens.size()==1 );
    BOOST_CHECK( tokens[0]==string("") );


    find_iterator<string::iterator> fiter=make_find_iterator(str1, first_finder("xx"));
    find_iterator<string::iterator> fiter2;
    
    BOOST_CHECK(equals(*fiter, "xx"));
    ++fiter;
    
    fiter2 = fiter;
    BOOST_CHECK(equals(*fiter,  "xx"));
    BOOST_CHECK(equals(*fiter2, "xx"));

    ++fiter;
    BOOST_CHECK(fiter==find_iterator<string::iterator>());
    BOOST_CHECK(equals(*fiter2, "xx"));

    ++fiter2;
    BOOST_CHECK(fiter2==find_iterator<string::iterator>());

    split_iterator<string::iterator> siter=make_split_iterator(str1, token_finder(is_any_of("-"), token_compress_on));
    split_iterator<string::iterator> siter2;
    BOOST_CHECK(equals(*siter, "xx"));
    ++siter;

    siter2 = siter;
    BOOST_CHECK(equals(*siter,  "abc"));
    BOOST_CHECK(equals(*siter2, "abc"));
    
    ++siter;
    BOOST_CHECK(equals(*siter,  "xx"));
    BOOST_CHECK(equals(*siter2, "abc"));
    
    ++siter;
    BOOST_CHECK(equals(*siter, "abb"));
    ++siter;
    BOOST_CHECK(siter==split_iterator<string::iterator>(siter));
    BOOST_CHECK(siter==split_iterator<string::iterator>());

//  Make sure we work with forward iterators
//  See bug #7989
    list<char> l1;
    find_iterator<list<char>::iterator> liter=make_find_iterator(l1, first_finder("xx"));
}

int main(int argc, char* [])
{
    iterator_test();
}

原始碼

namespace boost {
    namespace algorithm {

//  find_all  ------------------------------------------------------------//

        //! Find all algorithm
        /*!
            This algorithm finds all occurrences of the search string
            in the input.
            
            Each part is copied and added as a new element to the
            output container.
            Thus the result container must be able to hold copies
            of the matches (in a compatible structure like std::string) or
            a reference to it (e.g. using the iterator range class).
            Examples of such a container are \c std::vector<std::string>
            or \c std::list<boost::iterator_range<std::string::iterator>>

            \param Result A container that can hold copies of references to the substrings
            \param Input A container which will be searched.
            \param Search A substring to be searched for.
            \return A reference the result

            \note Prior content of the result will be overwritten.

            \note This function provides the strong exception-safety guarantee
        */
        template< typename SequenceSequenceT, typename Range1T, typename Range2T >
        inline SequenceSequenceT& find_all(
            SequenceSequenceT& Result,
            Range1T& Input,
            const Range2T& Search)
        {
            return ::boost::algorithm::iter_find(
                Result,
                Input,
                ::boost::algorithm::first_finder(Search) );        
        }

        //! Find all algorithm ( case insensitive ) 
        /*!
            This algorithm finds all occurrences of the search string
            in the input. 
            Each part is copied and added as a new element to the
            output container. Thus the result container must be able to hold copies
            of the matches (in a compatible structure like std::string) or
            a reference to it (e.g. using the iterator range class).
            Examples of such a container are \c std::vector<std::string>
            or \c std::list<boost::iterator_range<std::string::iterator>>

            Searching is case insensitive.

            \param Result A container that can hold copies of references to the substrings
            \param Input A container which will be searched.
            \param Search A substring to be searched for.
            \param Loc A locale used for case insensitive comparison
            \return A reference the result

            \note Prior content of the result will be overwritten.

            \note This function provides the strong exception-safety guarantee
        */
        template< typename SequenceSequenceT, typename Range1T, typename Range2T >
        inline SequenceSequenceT& ifind_all(
            SequenceSequenceT& Result,
            Range1T& Input,
            const Range2T& Search,
            const std::locale& Loc=std::locale() )
        {
            return ::boost::algorithm::iter_find(
                Result,
                Input,
                ::boost::algorithm::first_finder(Search, is_iequal(Loc) ) );        
        }


//  tokenize  -------------------------------------------------------------//

        //! Split algorithm
        /*! 
            Tokenize expression. This function is equivalent to C strtok. Input
            sequence is split into tokens, separated by separators. Separators 
            are given by means of the predicate.

            Each part is copied and added as a new element to the
            output container.
            Thus the result container must be able to hold copies
            of the matches (in a compatible structure like std::string) or
            a reference to it (e.g. using the iterator range class).
            Examples of such a container are \c std::vector<std::string>
            or \c std::list<boost::iterator_range<std::string::iterator>>
    
            \param Result A container that can hold copies of references to the substrings          
            \param Input A container which will be searched.
            \param Pred A predicate to identify separators. This predicate is 
                supposed to return true if a given element is a separator.
            \param eCompress If eCompress argument is set to token_compress_on, adjacent 
                separators are merged together. Otherwise, every two separators
                delimit a token.
            \return A reference the result

            \note Prior content of the result will be overwritten.

            \note This function provides the strong exception-safety guarantee
        */
        template< typename SequenceSequenceT, typename RangeT, typename PredicateT >
        inline SequenceSequenceT& split(
            SequenceSequenceT& Result,
            RangeT& Input,
            PredicateT Pred,
            token_compress_mode_type eCompress=token_compress_off )
        {
            return ::boost::algorithm::iter_split(
                Result,
                Input,
                ::boost::algorithm::token_finder( Pred, eCompress ) );         
        }

    } // namespace algorithm

    // pull names to the boost namespace
    using algorithm::find_all;
    using algorithm::ifind_all;
    using algorithm::split;    

} // namespace boost