Boost.Locale
|
00001 // 00002 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 00003 // 00004 // Distributed under the Boost Software License, Version 1.0. (See 00005 // accompanying file LICENSE_1_0.txt or copy at 00006 // http://www.boost.org/LICENSE_1_0.txt) 00007 // 00008 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED 00009 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED 00010 00011 #include <boost/locale/config.hpp> 00012 #include <boost/locale/boundary/types.hpp> 00013 #include <boost/locale/boundary/facets.hpp> 00014 #include <boost/locale/boundary/segment.hpp> 00015 #include <boost/locale/boundary/boundary_point.hpp> 00016 #include <boost/iterator/iterator_facade.hpp> 00017 #include <boost/type_traits/is_same.hpp> 00018 #include <boost/shared_ptr.hpp> 00019 #include <boost/cstdint.hpp> 00020 #include <boost/assert.hpp> 00021 #ifdef BOOST_MSVC 00022 # pragma warning(push) 00023 # pragma warning(disable : 4275 4251 4231 4660) 00024 #endif 00025 #include <string> 00026 #include <locale> 00027 #include <vector> 00028 #include <iterator> 00029 #include <algorithm> 00030 #include <stdexcept> 00031 00032 #include <iostream> 00033 00034 namespace boost { 00035 00036 namespace locale { 00037 00038 namespace boundary { 00046 00048 00049 namespace details { 00050 00051 template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category> 00052 struct mapping_traits { 00053 typedef typename std::iterator_traits<IteratorType>::value_type char_type; 00054 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l) 00055 { 00056 std::basic_string<char_type> str(b,e); 00057 return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size()); 00058 } 00059 }; 00060 00061 template<typename CharType,typename SomeIteratorType> 00062 struct linear_iterator_traits { 00063 static const bool is_linear = 00064 is_same<SomeIteratorType,CharType*>::value 00065 || is_same<SomeIteratorType,CharType const*>::value 00066 || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value 00067 || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value 00068 || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value 00069 || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value 00070 ; 00071 }; 00072 00073 00074 00075 template<typename IteratorType> 00076 struct mapping_traits<IteratorType,std::random_access_iterator_tag> { 00077 00078 typedef typename std::iterator_traits<IteratorType>::value_type char_type; 00079 00080 00081 00082 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l) 00083 { 00084 index_type result; 00085 00086 // 00087 // Optimize for most common cases 00088 // 00089 // C++0x requires that string is continious in memory and all known 00090 // string implementations 00091 // do this because of c_str() support. 00092 // 00093 00094 if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e) 00095 { 00096 char_type const *begin = &*b; 00097 char_type const *end = begin + (e-b); 00098 index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end); 00099 result.swap(tmp); 00100 } 00101 else { 00102 std::basic_string<char_type> str(b,e); 00103 index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size()); 00104 result.swap(tmp); 00105 } 00106 return result; 00107 } 00108 }; 00109 00110 template<typename BaseIterator> 00111 class mapping { 00112 public: 00113 typedef BaseIterator base_iterator; 00114 typedef typename std::iterator_traits<base_iterator>::value_type char_type; 00115 00116 00117 mapping(boundary_type type, 00118 base_iterator begin, 00119 base_iterator end, 00120 std::locale const &loc) 00121 : 00122 index_(new index_type()), 00123 begin_(begin), 00124 end_(end) 00125 { 00126 index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc); 00127 index_->swap(idx); 00128 } 00129 00130 mapping() 00131 { 00132 } 00133 00134 index_type const &index() const 00135 { 00136 return *index_; 00137 } 00138 00139 base_iterator begin() const 00140 { 00141 return begin_; 00142 } 00143 00144 base_iterator end() const 00145 { 00146 return end_; 00147 } 00148 00149 private: 00150 boost::shared_ptr<index_type> index_; 00151 base_iterator begin_,end_; 00152 }; 00153 00154 template<typename BaseIterator> 00155 class segment_index_iterator : 00156 public boost::iterator_facade< 00157 segment_index_iterator<BaseIterator>, 00158 segment<BaseIterator>, 00159 boost::bidirectional_traversal_tag, 00160 segment<BaseIterator> const & 00161 > 00162 { 00163 public: 00164 typedef BaseIterator base_iterator; 00165 typedef mapping<base_iterator> mapping_type; 00166 typedef segment<base_iterator> segment_type; 00167 00168 segment_index_iterator() : current_(0,0),map_(0) 00169 { 00170 } 00171 00172 segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) : 00173 map_(map), 00174 mask_(mask), 00175 full_select_(full_select) 00176 { 00177 set(p); 00178 } 00179 segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) : 00180 map_(map), 00181 mask_(mask), 00182 full_select_(full_select) 00183 { 00184 if(is_begin) 00185 set_begin(); 00186 else 00187 set_end(); 00188 } 00189 00190 segment_type const &dereference() const 00191 { 00192 return value_; 00193 } 00194 00195 bool equal(segment_index_iterator const &other) const 00196 { 00197 return map_ == other.map_ && current_.second == other.current_.second; 00198 } 00199 00200 void increment() 00201 { 00202 std::pair<size_t,size_t> next = current_; 00203 if(full_select_) { 00204 next.first = next.second; 00205 while(next.second < size()) { 00206 next.second++; 00207 if(valid_offset(next.second)) 00208 break; 00209 } 00210 if(next.second == size()) 00211 next.first = next.second - 1; 00212 } 00213 else { 00214 while(next.second < size()) { 00215 next.first = next.second; 00216 next.second++; 00217 if(valid_offset(next.second)) 00218 break; 00219 } 00220 } 00221 update_current(next); 00222 } 00223 00224 void decrement() 00225 { 00226 std::pair<size_t,size_t> next = current_; 00227 if(full_select_) { 00228 while(next.second >1) { 00229 next.second--; 00230 if(valid_offset(next.second)) 00231 break; 00232 } 00233 next.first = next.second; 00234 while(next.first >0) { 00235 next.first--; 00236 if(valid_offset(next.first)) 00237 break; 00238 } 00239 } 00240 else { 00241 while(next.second >1) { 00242 next.second--; 00243 if(valid_offset(next.second)) 00244 break; 00245 } 00246 next.first = next.second - 1; 00247 } 00248 update_current(next); 00249 } 00250 00251 private: 00252 00253 void set_end() 00254 { 00255 current_.first = size() - 1; 00256 current_.second = size(); 00257 value_ = segment_type(map_->end(),map_->end(),0); 00258 } 00259 void set_begin() 00260 { 00261 current_.first = current_.second = 0; 00262 value_ = segment_type(map_->begin(),map_->begin(),0); 00263 increment(); 00264 } 00265 00266 void set(base_iterator p) 00267 { 00268 size_t dist=std::distance(map_->begin(),p); 00269 index_type::const_iterator b=map_->index().begin(),e=map_->index().end(); 00270 index_type::const_iterator 00271 boundary_point=std::upper_bound(b,e,break_info(dist)); 00272 while(boundary_point != e && (boundary_point->rule & mask_)==0) 00273 boundary_point++; 00274 00275 current_.first = current_.second = boundary_point - b; 00276 00277 if(full_select_) { 00278 while(current_.first > 0) { 00279 current_.first --; 00280 if(valid_offset(current_.first)) 00281 break; 00282 } 00283 } 00284 else { 00285 if(current_.first > 0) 00286 current_.first --; 00287 } 00288 value_.first = map_->begin(); 00289 std::advance(value_.first,get_offset(current_.first)); 00290 value_.second = value_.first; 00291 std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first)); 00292 00293 update_rule(); 00294 } 00295 00296 void update_current(std::pair<size_t,size_t> pos) 00297 { 00298 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first); 00299 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second); 00300 std::advance(value_.first,first_diff); 00301 std::advance(value_.second,second_diff); 00302 current_ = pos; 00303 update_rule(); 00304 } 00305 00306 void update_rule() 00307 { 00308 if(current_.second != size()) { 00309 value_.rule(index()[current_.second].rule); 00310 } 00311 } 00312 size_t get_offset(size_t ind) const 00313 { 00314 if(ind == size()) 00315 return index().back().offset; 00316 return index()[ind].offset; 00317 } 00318 00319 bool valid_offset(size_t offset) const 00320 { 00321 return offset == 0 00322 || offset == size() // make sure we not acess index[size] 00323 || (index()[offset].rule & mask_)!=0; 00324 } 00325 00326 size_t size() const 00327 { 00328 return index().size(); 00329 } 00330 00331 index_type const &index() const 00332 { 00333 return map_->index(); 00334 } 00335 00336 00337 segment_type value_; 00338 std::pair<size_t,size_t> current_; 00339 mapping_type const *map_; 00340 rule_type mask_; 00341 bool full_select_; 00342 }; 00343 00344 template<typename BaseIterator> 00345 class boundary_point_index_iterator : 00346 public boost::iterator_facade< 00347 boundary_point_index_iterator<BaseIterator>, 00348 boundary_point<BaseIterator>, 00349 boost::bidirectional_traversal_tag, 00350 boundary_point<BaseIterator> const & 00351 > 00352 { 00353 public: 00354 typedef BaseIterator base_iterator; 00355 typedef mapping<base_iterator> mapping_type; 00356 typedef boundary_point<base_iterator> boundary_point_type; 00357 00358 boundary_point_index_iterator() : current_(0),map_(0) 00359 { 00360 } 00361 00362 boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) : 00363 map_(map), 00364 mask_(mask) 00365 { 00366 if(is_begin) 00367 set_begin(); 00368 else 00369 set_end(); 00370 } 00371 boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) : 00372 map_(map), 00373 mask_(mask) 00374 { 00375 set(p); 00376 } 00377 00378 boundary_point_type const &dereference() const 00379 { 00380 return value_; 00381 } 00382 00383 bool equal(boundary_point_index_iterator const &other) const 00384 { 00385 return map_ == other.map_ && current_ == other.current_; 00386 } 00387 00388 void increment() 00389 { 00390 size_t next = current_; 00391 while(next < size()) { 00392 next++; 00393 if(valid_offset(next)) 00394 break; 00395 } 00396 update_current(next); 00397 } 00398 00399 void decrement() 00400 { 00401 size_t next = current_; 00402 while(next>0) { 00403 next--; 00404 if(valid_offset(next)) 00405 break; 00406 } 00407 update_current(next); 00408 } 00409 00410 private: 00411 void set_end() 00412 { 00413 current_ = size(); 00414 value_ = boundary_point_type(map_->end(),0); 00415 } 00416 void set_begin() 00417 { 00418 current_ = 0; 00419 value_ = boundary_point_type(map_->begin(),0); 00420 } 00421 00422 void set(base_iterator p) 00423 { 00424 size_t dist = std::distance(map_->begin(),p); 00425 00426 index_type::const_iterator b=index().begin(); 00427 index_type::const_iterator e=index().end(); 00428 index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist)); 00429 00430 if(ptr==index().end()) 00431 current_=size()-1; 00432 else 00433 current_=ptr - index().begin(); 00434 00435 while(!valid_offset(current_)) 00436 current_ ++; 00437 00438 std::ptrdiff_t diff = get_offset(current_) - dist; 00439 std::advance(p,diff); 00440 value_.iterator(p); 00441 update_rule(); 00442 } 00443 00444 void update_current(size_t pos) 00445 { 00446 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_); 00447 base_iterator i=value_.iterator(); 00448 std::advance(i,diff); 00449 current_ = pos; 00450 value_.iterator(i); 00451 update_rule(); 00452 } 00453 00454 void update_rule() 00455 { 00456 if(current_ != size()) { 00457 value_.rule(index()[current_].rule); 00458 } 00459 } 00460 size_t get_offset(size_t ind) const 00461 { 00462 if(ind == size()) 00463 return index().back().offset; 00464 return index()[ind].offset; 00465 } 00466 00467 bool valid_offset(size_t offset) const 00468 { 00469 return offset == 0 00470 || offset + 1 >= size() // last and first are always valid regardless of mark 00471 || (index()[offset].rule & mask_)!=0; 00472 } 00473 00474 size_t size() const 00475 { 00476 return index().size(); 00477 } 00478 00479 index_type const &index() const 00480 { 00481 return map_->index(); 00482 } 00483 00484 00485 boundary_point_type value_; 00486 size_t current_; 00487 mapping_type const *map_; 00488 rule_type mask_; 00489 }; 00490 00491 00492 } // details 00493 00495 00496 template<typename BaseIterator> 00497 class segment_index; 00498 00499 template<typename BaseIterator> 00500 class boundary_point_index; 00501 00502 00554 00555 template<typename BaseIterator> 00556 class segment_index { 00557 public: 00558 00562 typedef BaseIterator base_iterator; 00563 #ifdef BOOST_LOCALE_DOXYGEN 00564 00565 00566 00567 00568 00569 00570 00571 00572 00573 00574 00575 00576 00577 00578 typedef unspecified_iterator_type iterator; 00582 typedef unspecified_iterator_type const_iterator; 00583 #else 00584 typedef details::segment_index_iterator<base_iterator> iterator; 00585 typedef details::segment_index_iterator<base_iterator> const_iterator; 00586 #endif 00587 00588 00589 00590 00591 typedef segment<base_iterator> value_type; 00592 00602 segment_index() : mask_(0xFFFFFFFFu),full_select_(false) 00603 { 00604 } 00609 segment_index(boundary_type type, 00610 base_iterator begin, 00611 base_iterator end, 00612 rule_type mask, 00613 std::locale const &loc=std::locale()) 00614 : 00615 map_(type,begin,end,loc), 00616 mask_(mask), 00617 full_select_(false) 00618 { 00619 } 00624 segment_index(boundary_type type, 00625 base_iterator begin, 00626 base_iterator end, 00627 std::locale const &loc=std::locale()) 00628 : 00629 map_(type,begin,end,loc), 00630 mask_(0xFFFFFFFFu), 00631 full_select_(false) 00632 { 00633 } 00634 00645 segment_index(boundary_point_index<base_iterator> const &); 00656 segment_index const &operator = (boundary_point_index<base_iterator> const &); 00657 00658 00665 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale()) 00666 { 00667 map_ = mapping_type(type,begin,end,loc); 00668 } 00669 00679 iterator begin() const 00680 { 00681 return iterator(true,&map_,mask_,full_select_); 00682 } 00683 00691 iterator end() const 00692 { 00693 return iterator(false,&map_,mask_,full_select_); 00694 } 00695 00713 iterator find(base_iterator p) const 00714 { 00715 return iterator(p,&map_,mask_,full_select_); 00716 } 00717 00721 rule_type rule() const 00722 { 00723 return mask_; 00724 } 00728 void rule(rule_type v) 00729 { 00730 mask_ = v; 00731 } 00732 00745 00746 bool full_select() const 00747 { 00748 return full_select_; 00749 } 00750 00763 00764 void full_select(bool v) 00765 { 00766 full_select_ = v; 00767 } 00768 00769 private: 00770 friend class boundary_point_index<base_iterator>; 00771 typedef details::mapping<base_iterator> mapping_type; 00772 mapping_type map_; 00773 rule_type mask_; 00774 bool full_select_; 00775 }; 00776 00823 00824 00825 template<typename BaseIterator> 00826 class boundary_point_index { 00827 public: 00831 typedef BaseIterator base_iterator; 00832 #ifdef BOOST_LOCALE_DOXYGEN 00833 00834 00835 00836 00837 00838 00839 00840 00841 00842 00843 00844 00845 00846 00847 typedef unspecified_iterator_type iterator; 00851 typedef unspecified_iterator_type const_iterator; 00852 #else 00853 typedef details::boundary_point_index_iterator<base_iterator> iterator; 00854 typedef details::boundary_point_index_iterator<base_iterator> const_iterator; 00855 #endif 00856 00857 00858 00859 00860 typedef boundary_point<base_iterator> value_type; 00861 00871 boundary_point_index() : mask_(0xFFFFFFFFu) 00872 { 00873 } 00874 00879 boundary_point_index(boundary_type type, 00880 base_iterator begin, 00881 base_iterator end, 00882 rule_type mask, 00883 std::locale const &loc=std::locale()) 00884 : 00885 map_(type,begin,end,loc), 00886 mask_(mask) 00887 { 00888 } 00893 boundary_point_index(boundary_type type, 00894 base_iterator begin, 00895 base_iterator end, 00896 std::locale const &loc=std::locale()) 00897 : 00898 map_(type,begin,end,loc), 00899 mask_(0xFFFFFFFFu) 00900 { 00901 } 00902 00913 boundary_point_index(segment_index<base_iterator> const &other); 00924 boundary_point_index const &operator=(segment_index<base_iterator> const &other); 00925 00932 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale()) 00933 { 00934 map_ = mapping_type(type,begin,end,loc); 00935 } 00936 00946 iterator begin() const 00947 { 00948 return iterator(true,&map_,mask_); 00949 } 00950 00960 iterator end() const 00961 { 00962 return iterator(false,&map_,mask_); 00963 } 00964 00978 iterator find(base_iterator p) const 00979 { 00980 return iterator(p,&map_,mask_); 00981 } 00982 00986 rule_type rule() const 00987 { 00988 return mask_; 00989 } 00993 void rule(rule_type v) 00994 { 00995 mask_ = v; 00996 } 00997 00998 private: 00999 01000 friend class segment_index<base_iterator>; 01001 typedef details::mapping<base_iterator> mapping_type; 01002 mapping_type map_; 01003 rule_type mask_; 01004 }; 01005 01007 template<typename BaseIterator> 01008 segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) : 01009 map_(other.map_), 01010 mask_(0xFFFFFFFFu), 01011 full_select_(false) 01012 { 01013 } 01014 01015 template<typename BaseIterator> 01016 boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) : 01017 map_(other.map_), 01018 mask_(0xFFFFFFFFu) 01019 { 01020 } 01021 01022 template<typename BaseIterator> 01023 segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other) 01024 { 01025 map_ = other.map_; 01026 return *this; 01027 } 01028 01029 template<typename BaseIterator> 01030 boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other) 01031 { 01032 map_ = other.map_; 01033 return *this; 01034 } 01036 01037 typedef segment_index<std::string::const_iterator> ssegment_index; 01038 typedef segment_index<std::wstring::const_iterator> wssegment_index; 01039 #ifdef BOOST_HAS_CHAR16_T 01040 typedef segment_index<std::u16string::const_iterator> u16ssegment_index; 01041 #endif 01042 #ifdef BOOST_HAS_CHAR32_T 01043 typedef segment_index<std::u32string::const_iterator> u32ssegment_index; 01044 #endif 01045 01046 typedef segment_index<char const *> csegment_index; 01047 typedef segment_index<wchar_t const *> wcsegment_index; 01048 #ifdef BOOST_HAS_CHAR16_T 01049 typedef segment_index<char16_t const *> u16csegment_index; 01050 #endif 01051 #ifdef BOOST_HAS_CHAR32_T 01052 typedef segment_index<char32_t const *> u32csegment_index; 01053 #endif 01054 01055 typedef boundary_point_index<std::string::const_iterator> sboundary_point_index; 01056 typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index; 01057 #ifdef BOOST_HAS_CHAR16_T 01058 typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index; 01059 #endif 01060 #ifdef BOOST_HAS_CHAR32_T 01061 typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index; 01062 #endif 01063 01064 typedef boundary_point_index<char const *> cboundary_point_index; 01065 typedef boundary_point_index<wchar_t const *> wcboundary_point_index; 01066 #ifdef BOOST_HAS_CHAR16_T 01067 typedef boundary_point_index<char16_t const *> u16cboundary_point_index; 01068 #endif 01069 #ifdef BOOST_HAS_CHAR32_T 01070 typedef boundary_point_index<char32_t const *> u32cboundary_point_index; 01071 #endif 01072 01073 01074 01075 } // boundary 01076 01077 } // locale 01078 } // boost 01079 01086 01087 #ifdef BOOST_MSVC 01088 #pragma warning(pop) 01089 #endif 01090 01091 #endif 01092 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4