1 #ifndef libjmmcg_core_basic_stack_string_hpp 2 #define libjmmcg_core_basic_stack_string_hpp 3 4 /****************************************************************************** 5 ** $Header: svn+ssh://jmmcg@svn.code.sf.net/p/libjmmcg/code/trunk/libjmmcg/core/stack_string.hpp 2267 2018-03-11 15:52:50Z jmmcg $ 6 ** 7 ** Copyright © 2013 by J.M.McGuiness, coder@hussar.me.uk 8 ** 9 ** This library is free software; you can redistribute it and/or 10 ** modify it under the terms of the GNU Lesser General Public 11 ** License as published by the Free Software Foundation; either 12 ** version 2.1 of the License, or (at your option) any later version. 13 ** 14 ** This library is distributed in the hope that it will be useful, 15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 ** Lesser General Public License for more details. 18 ** 19 ** You should have received a copy of the GNU Lesser General Public 20 ** License along with this library; if not, write to the Free Software 21 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 */ 23 24 #include "hash.hpp" 25 #include "max_min.hpp" 26 #include "memops.hpp" 27 28 #include <algorithm> 29 #include <cstring> 30 #include <functional> 31 #include <memory> 32 #include <stdexcept> 33 #include <iterator> 34 #include <limits> 35 36 namespace jmmcg { 37 38 /// A class to provide a representation for a null-terminated character string that utilises a small-string optimisation. 39 /** 40 This class is a value-type. 41 If the string to be stored is less than small_string_max_size it is not stored on the heap, but on the stack. 42 It is not a drop-in replacement for std::string, as it does not implement all of the member functions of the latter (because I don't like the fat interface). 43 The optimisations assume that this class will be used for small strings, get the static branch-predictions wrong, and one can get a 50% drop in performance! 44 45 \see __gnuxx::__vstring, std::string 46 */ 47 template< 48 unsigned int BuffN, ///< The approximate number of charT characters to store in the stack-based buffer. The minimum actual size is given by small_string_max_size. 49 class charT, 50 class traits=std::char_traits<charT> 51 > 52 class basic_stack_string final { 53 /** 54 A type to express the machine's native memory-bus width for a word, i.e. what can be rapidly fetched from, copied in a single register operation & written back to memory. This is not the cache line width. 55 */ 56 typedef unsigned long native_databus_type; 57 58 public: 59 typedef charT value_type; 60 typedef traits traits_type; 61 typedef std::iterator_traits<value_type const *> const_iterator_traits; 62 typedef std::iterator_traits<value_type *> iterator_traits; 63 typedef std::size_t size_type; 64 typedef typename iterator_traits::pointer pointer; 65 typedef typename const_iterator_traits::pointer const_pointer; 66 typedef typename iterator_traits::pointer iterator; 67 typedef typename const_iterator_traits::pointer const_iterator; 68 typedef typename iterator_traits::reference reference; 69 typedef typename const_iterator_traits::reference const_reference; 70 71 static_assert(std::is_pod<value_type>::value, "The contained type must be a POD due to the implementation."); 72 73 enum : std::size_t { 74 small_string_max_size=max<std::size_t, sizeof(pointer), ((BuffN+sizeof(native_databus_type)/2)/sizeof(native_databus_type))*sizeof(native_databus_type)>::value ///< Ensure that the internal, stack-based buffer is the minimum size of a pointer, or larger, in units of "native word"s. Also store the null termination char so that c_str() is fast too, trading off size for speed. In units of chars. 75 }; 76 77 /** 78 The type of the exception that we throw. 79 */ 80 struct exception : std::runtime_error { 81 typedef std::runtime_error base_t; 82 83 explicit constexpr exception(char const *s) FORCE_INLINE : base_t(s) {} 84 }; 85 86 private: 87 /** 88 Try to ensure that the alignment of the input char ptr parameter is suitable for speedily copying into the internal buffer, otherwise we might get sub-optimal copying or at worst a bus error or alignment error in buffer_type::copy(). 89 Note that this is compiler-specific, and the attribute will need to change for different compilers. 90 91 \see buffer_type::copy(), basic_stack_string() 92 */ 93 typedef value_type __attribute__ ((aligned (sizeof(native_databus_type)))) const * ensure_char_ptr_argument_aligned; 94 95 public: 96 /** 97 Complexity: O(1) 98 */ 99 constexpr basic_stack_string() noexcept(true) FORCE_INLINE; 100 /// Convert a C-style string into a basic_stack_string. 101 /** 102 Note the use of the opaque parameter type ensure_char_ptr_argument_aligned: this allows the user to pass in a char const * pointer, but ensures that it is suitably aligned for our purposes, without the user having to worry about alignment issues. 103 Complexity: O(sz) best case, O(new()+sz) worst-case. 104 May throw: std::bad_alloc, exception if pointer is nullptr 105 106 \param src A c-style null terminated string, that should have been suitably aligned, automatically, by the compiler. 107 \param sz The number of chars to copy. 108 109 \see ensure_char_ptr_argument_aligned 110 */ 111 explicit basic_stack_string(ensure_char_ptr_argument_aligned src, std::size_t sz) noexcept(false) FORCE_INLINE; 112 /// Convert a C-style array of chars into a basic_stack_string. 113 /** 114 Complexity: O(1) best case, O(new()+SrcSz) worst-case. 115 May throw: std::bad_alloc 116 117 \param src A c-style null terminated char-array. 118 */ 119 template<std::size_t SrcSz> explicit FORCE_INLINE 120 basic_stack_string(value_type const (& src)[SrcSz]) noexcept(SrcSz<=small_string_max_size); 121 /** 122 Note that if the string contained in str can be optimised, this function will optimise the copy, i.e. place the copy on the stack from the heap. 123 Complexity: O(1) best case, O(new()+str.size()) worst-case. 124 May throw: std::bad_alloc 125 126 \param str The basic_stack_string to be copied. 127 */ 128 basic_stack_string(basic_stack_string const &str) noexcept(false) FORCE_INLINE; 129 /** 130 Complexity: O(1). 131 132 \param str The string to be copied. 133 */ 134 basic_stack_string(basic_stack_string&& str) noexcept(true) FORCE_INLINE; 135 /** 136 Complexity: O(1) best-case, O(delete()) worst-case. 137 */ 138 ~basic_stack_string() noexcept(true) FORCE_INLINE; 139 140 /// Copy the basic_stack_string. 141 /** 142 Complexity: O(1) best case, O(new()+str.size()) worst-case. 143 May throw: std::bad_alloc 144 145 \return An r-value to the copied basic_stack_string. 146 */ 147 basic_stack_string &operator=(basic_stack_string const &str) noexcept(false) FORCE_INLINE; 148 /// Move the basic_stack_string. 149 /** 150 Complexity: O(1) 151 152 \return An r-value to the copied basic_stack_string. 153 */ 154 basic_stack_string &operator=(basic_stack_string &&str) noexcept(true) FORCE_INLINE; 155 156 constexpr bool operator==(basic_stack_string const &) const noexcept(true) FORCE_INLINE; 157 constexpr bool operator!=(basic_stack_string const &) const noexcept(true) FORCE_INLINE; 158 159 /// Return an iterator to the beginning of the contained string. 160 /** 161 Complexity: O(1) 162 163 \return iterator to the beginning of the contained string. 164 */ 165 iterator begin() noexcept(true) FORCE_INLINE; 166 /// Return a const_iterator to the beginning of the contained string. 167 /** 168 Complexity: O(1) 169 170 \return const_iterator to the beginning of the contained string. 171 */ 172 const_iterator begin() const noexcept(true) FORCE_INLINE; 173 /// Return an iterator to the end of the contained string. 174 /** 175 Complexity: O(1) 176 177 \return iterator to the end of the contained string. 178 */ 179 iterator end() noexcept(true) FORCE_INLINE; 180 /// Return a const_iterator to the end of the contained string. 181 /** 182 Complexity: O(1) 183 184 \return const_iterator to the end of the contained string. 185 */ 186 const_iterator end() const noexcept(true) FORCE_INLINE; 187 188 /// Return the size of the contained string, excluding any null termination. 189 /** 190 Complexity: O(1) 191 Constraint: size()<=capacity() && empty==(size()==0). 192 193 \return Size of the contained string. 194 195 \see capacity(), empty() 196 */ 197 constexpr size_type size() const noexcept(true) FORCE_INLINE; 198 /// Return the maximum size of any possible contained string, including any null termination. 199 /** 200 Complexity: O(1) 201 Constraint: max_size()>=capacity(). 202 203 \return Maximum possible size of a contained string. 204 205 \see capacity() 206 */ 207 static constexpr size_type max_size() noexcept(true) FORCE_INLINE { 208 return std::numeric_limits<typename iterator_traits::difference_type>::max(); 209 } 210 /// Uninitialised resize of the contained string to the requested size. 211 /** 212 Notes: 213 - The elements in the internal buffer are not re-initialised, even if more memory has had to be allocated, for speed. 214 - If capacity()>s then the internal buffer is reduced in size. Note that if the string has been moved to the heap it will not be moved back to the stack, for speed. 215 - The null terminator is re-added to the end. 216 - If the capacity() has to be increased, all iterators are invalidated but the contained string is maintained. 217 Complexity: O(reserve()) 218 Constraint: size()==s. 219 May throw: std::bad_alloc 220 221 \param s Ensure that size() returns s. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity of at least s. 222 223 \see empty(), reserve(), size() 224 */ 225 void resize(size_type s) noexcept(false) FORCE_INLINE; 226 /// Initialised resize of the contained string to the requested size. 227 /** 228 Notes: 229 - If capacity()>s then the internal buffer is reduced in size. Note that if the string has been moved to the heap it will not be moved back to the stack, for speed. 230 - The null terminator is re-added to the end. 231 - If the capacity() has to be increased, all iterators are invalidated but the contained string is maintained. 232 Complexity: O(reserve()+std::max(s-size(), 0)) 233 Constraint: size()==s && empty()==false. 234 May throw: std::bad_alloc 235 236 \param s Ensure that size() returns s. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity() of at least s. 237 \param i The value to which the s elements within the contained string should be initialised. 238 239 \see capacity(), empty(), size() 240 */ 241 void resize(size_type s, value_type i) noexcept(false) FORCE_INLINE; 242 /// Return the size of the internal buffer used to store the string, including the null termination. 243 /** 244 Complexity: O(1) 245 Constraint: capacity()>=size() && capacity()<=max_size(). 246 247 \return Return the size of internal buffer. 248 249 \see max_size(), size() 250 */ 251 constexpr size_type capacity() const noexcept(true) FORCE_INLINE { 252 return capacity_; 253 } 254 /// Set the size of the internal buffer to at least the requested size, if the requested size is larger, then it will not be made smaller. 255 /** 256 Notes: 257 - The elements in the internal buffer are not re-initialised, even if more memory has had to be allocated. 258 - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained string is maintained. 259 - If capacity() has to be reduced, note that if the string has been moved to the heap it will not be moved back to the stack, for speed. 260 - If an exception is thrown, then the object is unaffected. 261 Complexity: O(1) best case, O(new()+delete()+std::max(s-size(), 0)) worst case. 262 Constraint: capacity()==s && capacity()<=max_size(). 263 May throw: std::bad_alloc 264 265 \param s Ensure that capacity() returns s, if less than max_size(), otherwise no effect. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity of s. 266 267 \see capacity(), max_size() 268 */ 269 void reserve(size_type s) noexcept(false) FORCE_INLINE; 270 271 /// Clear the contained string. 272 /** 273 Complexity: O(1) best case, O(delete()) worst case. 274 Postcondition: size()==0 && empty()==true. 275 276 \see empty(), size() 277 */ 278 void clear() noexcept(true) FORCE_INLINE; 279 /// Return is the string is empty. 280 /** 281 Complexity: O(1) 282 Condition: empty()==(size()==0). 283 284 \return true if size()==0, false otherwise. 285 286 \see size() 287 */ 288 constexpr bool empty() const noexcept(true) FORCE_INLINE; 289 290 /// Return an l-value to the requested element in the contained string. 291 /** 292 Note that p must be less than size(), otherwise UB. 293 Complexity: O(1) 294 295 \param p The zero-based index into the contained string. 296 \return An l-value to the requested character in the contained string. 297 298 \see at(), size() 299 */ 300 reference operator[](size_type p) noexcept(true) FORCE_INLINE; 301 /// Return an r-value to the requested element in the contained string. 302 /** 303 Note that p must be less than size(), otherwise UB. 304 Complexity: O(1) 305 306 \param p The zero-based index into the contained string. 307 \return An r-value to the requested character in the contained string. 308 309 \see at(), size() 310 */ 311 const_reference operator[](size_type p) const noexcept(true) FORCE_INLINE; 312 313 /// Append a character to the end of the contained string. 314 /** 315 Note: 316 - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained basic_stack_string is maintained. 317 Complexity: O(resize()) 318 May throw: std::bad_alloc 319 320 \param c The character to append. 321 322 \see resize() 323 */ 324 void push_back(value_type c) noexcept(false) FORCE_INLINE; 325 /// Insert the characters in the range [b, e) into the contained string from the specified position, growing the contained string as necessary. 326 /** 327 Note: 328 - I'm not implementing the large number of overloads in std::basic_basic_stack_string, as that interface is unnecessarily fat. 329 - The range inserted is [b, e). 330 - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained basic_stack_string is maintained. 331 Complexity: O(resize()) 332 May throw: std::bad_alloc 333 334 \param p The position immediately after the position into which the range is inserted, must be in the range [begin(), end()). 335 \param b The beginning of the range. 336 \param e The end of the range. 337 \return An iterator which refers to the copy of the first inserted character, or p if b==e. 338 */ 339 iterator insert(iterator p, const_iterator b, const_iterator e) noexcept(false) FORCE_INLINE; 340 341 /// Remove the characters in the range [b, e). 342 /** 343 Note that {b, e} must be within [begin(), end()). 344 Complexity: O(std::distance(b, e)) 345 346 \param b The beginning of the range. 347 \param e The end of the range. 348 \return An iterator which points to the element pointed to by e prior to the other elements being 349 erased. If no such element exists, end() is returned. 350 */ 351 iterator erase(const_iterator b, const_iterator e) noexcept(true) FORCE_INLINE; 352 353 /// Replace the characters in the range [b, e) with those in the range [src_b, src_e). 354 /** 355 Note that {b, e} must be within [begin(), end()). 356 Complexity: O(resize()+std::max(std::distance(b, e), std::distance(src_b, src_e)) 357 358 \param b The beginning of the range. 359 \param e The end of the range. 360 \param src_b The beginning of the source range. 361 \param src_e The end of the source range. 362 \return An l-value to the basic_stack_string. 363 */ 364 basic_stack_string &replace(iterator b, iterator e, const_iterator src_b, const_iterator src_e) noexcept(false) FORCE_INLINE; 365 366 /// Swap the current basic_stack_string with the argument. 367 /** 368 Complexity: O(1) 369 370 \param str The basic_stack_string with which to swap. 371 */ 372 void swap(basic_stack_string &str) noexcept(true) FORCE_INLINE; 373 374 private: 375 /** 376 Either have a pointer to the heap or a small string stored in the space for the pointer. By using this union and putting the pointer-type first this should guarantee that the string alignment is reasonable for the cache, i.e. not at char alignment, which could be very bad. 377 If the small_string_max_size>capacity() then make sure that the small-string optimisation is engaged, i.e. this is the flag to know which member of the buffer_type union is valid. 378 */ 379 union ALIGN_TO_L1_CACHE buffer_type { 380 pointer heap; 381 value_type small_basic_stack_string[small_string_max_size]; 382 /** 383 The copy of the small string can be sped up using this array to directly copy the chunks of chars words-at-a-time. 384 */ 385 native_databus_type fast_copy_values[small_string_max_size/sizeof(native_databus_type)]; 386 387 enum { 388 num_fast_copy_values=sizeof(fast_copy_values)/sizeof(native_databus_type) 389 }; 390 typedef private_::unroll<num_fast_copy_values> unrolled_op_t; 391 392 void ctor(size_type cap, ensure_char_ptr_argument_aligned src) noexcept(false) FORCE_INLINE; 393 template<std::size_t SrcSz> 394 void ctor(value_type const (& src)[SrcSz]) noexcept(SrcSz<=small_string_max_size) FORCE_INLINE; 395 void cctor(size_type cap, buffer_type const &b) noexcept(false) FORCE_INLINE; 396 397 /** 398 Copy the small string directly in chunks of chars words-at-a-time. 399 400 \param src The source string. 401 */ 402 void copy(native_databus_type const *src) noexcept(true) FORCE_INLINE; 403 /** 404 Copy the small string directly in chunks of chars words-at-a-time. 405 406 \param src The source string. 407 */ 408 void copy(ensure_char_ptr_argument_aligned src) noexcept(true) FORCE_INLINE; 409 template<std::size_t SrcSz> void FORCE_INLINE 410 copy(value_type const (& src)[SrcSz]) noexcept(true); 411 void copy(size_type cap, const_pointer b, const_pointer e, size_type offset) noexcept(true) FORCE_INLINE; 412 void fill_n(size_type cap, size_type offset, size_type n, value_type i) noexcept(true) FORCE_INLINE; 413 void swap(buffer_type &buff) noexcept(true) FORCE_INLINE; 414 void move(size_type cap, typename iterator_traits::difference_type f, typename iterator_traits::difference_type l, size_type n) noexcept(true) FORCE_INLINE; 415 }; 416 417 /** 418 We store the size to get at it rapidly, trading off the space required to store the size. 419 */ 420 size_type size_; 421 /** 422 We store the capacity to get at it rapidly, trading off the space required to store the size. 423 */ 424 size_type capacity_; 425 // Larger objects come first to improve cache use of strings. 426 buffer_type buffer; 427 }; 428 429 template<unsigned int BuffN, class charT, class traits> std::basic_ostream<charT, traits> & 430 operator<<(std::basic_ostream<charT, traits> &os, basic_stack_string<BuffN, charT, traits> const &s) noexcept(false); 431 432 template<unsigned int BuffN, class charT, class traits> std::basic_istream<charT, traits> & 433 operator>>(std::basic_istream<charT, traits> &os, basic_stack_string<BuffN, charT, traits> &s) noexcept(false); 434 435 typedef basic_stack_string<1, char> stack_string; 436 typedef basic_stack_string<1, wchar_t> wstack_string; 437 438 } 439 440 namespace std { 441 442 template<> 443 struct hash<jmmcg::stack_string> : unary_function<jmmcg::stack_string, std::size_t> { 444 typedef unary_function<jmmcg::stack_string, size_t>::result_type result_type; 445 typedef unary_function<jmmcg::stack_string, size_t>::argument_type argument_type; 446 447 result_type operator()(argument_type const &s) const noexcept(true) FORCE_INLINE { 448 typedef jmmcg::hashers::murmur2<argument_type> hasher_t; 449 return s.capacity()<s.small_string_max_size ? *s.begin() : hasher_t().operator()(s); 450 } 451 }; 452 453 template<> 454 struct hash<jmmcg::wstack_string> : unary_function<jmmcg::wstack_string, std::size_t> { 455 typedef unary_function<jmmcg::wstack_string, size_t>::result_type result_type; 456 typedef unary_function<jmmcg::wstack_string, size_t>::argument_type argument_type; 457 458 result_type operator()(argument_type const &s) const noexcept(true) FORCE_INLINE { 459 typedef jmmcg::hashers::murmur2<argument_type> hasher_t; 460 return s.capacity()<s.small_string_max_size ? *s.begin() : hasher_t().operator()(s); 461 } 462 }; 463 464 } 465 466 #include "stack_string_impl.hpp" 467 468 #endif