root/core/stack_string.hpp

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. max_size
  2. capacity

   1 #ifndef libjmmcg_core_basic_stack_string_hpp
   2 #define libjmmcg_core_basic_stack_string_hpp
   3 
   4 /******************************************************************************
   5 ** $Header: svn+ssh://jmmcg@svn.code.sf.net/p/libjmmcg/code/trunk/libjmmcg/core/stack_string.hpp 2267 2018-03-11 15:52:50Z jmmcg $
   6 **
   7 ** Copyright © 2013 by J.M.McGuiness, coder@hussar.me.uk
   8 **
   9 ** This library is free software; you can redistribute it and/or
  10 ** modify it under the terms of the GNU Lesser General Public
  11 ** License as published by the Free Software Foundation; either
  12 ** version 2.1 of the License, or (at your option) any later version.
  13 **
  14 ** This library is distributed in the hope that it will be useful,
  15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17 ** Lesser General Public License for more details.
  18 **
  19 ** You should have received a copy of the GNU Lesser General Public
  20 ** License along with this library; if not, write to the Free Software
  21 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  22 */
  23 
  24 #include "hash.hpp"
  25 #include "max_min.hpp"
  26 #include "memops.hpp"
  27 
  28 #include <algorithm>
  29 #include <cstring>
  30 #include <functional>
  31 #include <memory>
  32 #include <stdexcept>
  33 #include <iterator>
  34 #include <limits>
  35 
  36 namespace jmmcg {
  37 
  38 /// A class to provide a representation for a null-terminated character string that utilises a small-string optimisation.
  39 /**
  40         This class is a value-type.
  41         If the string to be stored is less than small_string_max_size it is not stored on the heap, but on the stack.
  42         It is not a drop-in replacement for std::string, as it does not implement all of the member functions of the latter (because I don't like the fat interface).
  43         The optimisations assume that this class will be used for small strings, get the static branch-predictions wrong, and one can get a 50% drop in performance!
  44 
  45         \see __gnuxx::__vstring, std::string
  46 */
  47 template<
  48         unsigned int BuffN,     ///< The approximate number of charT characters to store in the stack-based buffer. The minimum actual size is given by small_string_max_size.
  49         class charT,
  50         class traits=std::char_traits<charT>
  51 >
  52 class basic_stack_string final {
  53         /**
  54                 A type to express the machine's native memory-bus width for a word, i.e. what can be rapidly fetched from, copied in a single register operation & written back to memory. This is not the cache line width.
  55         */
  56         typedef unsigned long native_databus_type;
  57 
  58 public:
  59         typedef charT value_type;
  60         typedef traits traits_type;
  61         typedef std::iterator_traits<value_type const *> const_iterator_traits;
  62         typedef std::iterator_traits<value_type *> iterator_traits;
  63         typedef std::size_t size_type;
  64         typedef typename iterator_traits::pointer pointer;
  65         typedef typename const_iterator_traits::pointer const_pointer;
  66         typedef typename iterator_traits::pointer iterator;
  67         typedef typename const_iterator_traits::pointer const_iterator;
  68         typedef typename iterator_traits::reference reference;
  69         typedef typename const_iterator_traits::reference const_reference;
  70 
  71         static_assert(std::is_pod<value_type>::value, "The contained type must be a POD due to the implementation.");
  72 
  73         enum : std::size_t {
  74                 small_string_max_size=max<std::size_t, sizeof(pointer), ((BuffN+sizeof(native_databus_type)/2)/sizeof(native_databus_type))*sizeof(native_databus_type)>::value ///< Ensure that the internal, stack-based buffer is the minimum size of a pointer, or larger, in units of "native word"s. Also store the null termination char so that c_str() is fast too, trading off size for speed. In units of chars.
  75         };
  76 
  77         /**
  78                 The type of the exception that we throw.
  79         */
  80         struct exception : std::runtime_error {
  81                 typedef std::runtime_error base_t;
  82 
  83                 explicit constexpr exception(char const *s) FORCE_INLINE : base_t(s) {}
  84         };
  85 
  86 private:
  87         /**
  88                 Try to ensure that the alignment of the input char ptr parameter is suitable for speedily copying into the internal buffer, otherwise we might get sub-optimal copying or at worst a bus error or alignment error in buffer_type::copy().
  89                 Note that this is compiler-specific, and the attribute will need to change for different compilers.
  90 
  91                 \see buffer_type::copy(), basic_stack_string()
  92         */
  93         typedef value_type __attribute__ ((aligned (sizeof(native_databus_type)))) const * ensure_char_ptr_argument_aligned;
  94 
  95 public:
  96         /**
  97                 Complexity: O(1)
  98         */
  99         constexpr basic_stack_string() noexcept(true) FORCE_INLINE;
 100         /// Convert a C-style string into a basic_stack_string.
 101         /**
 102                 Note the use of the opaque parameter type ensure_char_ptr_argument_aligned: this allows the user to pass in a char const * pointer, but ensures that it is suitably aligned for our purposes, without the user having to worry about alignment issues.
 103                 Complexity: O(sz) best case, O(new()+sz) worst-case.
 104                 May throw: std::bad_alloc, exception if pointer is nullptr
 105 
 106                 \param  src     A c-style null terminated string, that should have been suitably aligned, automatically, by the compiler.
 107                 \param  sz      The number of chars to copy.
 108 
 109                 \see ensure_char_ptr_argument_aligned
 110         */
 111         explicit basic_stack_string(ensure_char_ptr_argument_aligned src, std::size_t sz) noexcept(false) FORCE_INLINE;
 112         /// Convert a C-style array of chars into a basic_stack_string.
 113         /**
 114                 Complexity: O(1) best case, O(new()+SrcSz) worst-case.
 115                 May throw: std::bad_alloc
 116 
 117                 \param  src     A c-style null terminated char-array.
 118         */
 119         template<std::size_t SrcSz> explicit FORCE_INLINE
 120         basic_stack_string(value_type const (& src)[SrcSz]) noexcept(SrcSz<=small_string_max_size);
 121         /**
 122                 Note that if the string contained in str can be optimised, this function will optimise the copy, i.e. place the copy on the stack from the heap.
 123                 Complexity: O(1) best case, O(new()+str.size()) worst-case.
 124                 May throw: std::bad_alloc
 125 
 126                 \param  str     The basic_stack_string to be copied.
 127         */
 128         basic_stack_string(basic_stack_string const &str) noexcept(false) FORCE_INLINE;
 129         /**
 130                 Complexity: O(1).
 131 
 132                 \param  str     The string to be copied.
 133         */
 134         basic_stack_string(basic_stack_string&& str) noexcept(true) FORCE_INLINE;
 135         /**
 136                 Complexity: O(1) best-case, O(delete()) worst-case.
 137         */
 138         ~basic_stack_string() noexcept(true) FORCE_INLINE;
 139 
 140         /// Copy the basic_stack_string.
 141         /**
 142                 Complexity: O(1) best case, O(new()+str.size()) worst-case.
 143                 May throw: std::bad_alloc
 144 
 145                 \return An r-value to the copied basic_stack_string.
 146         */
 147         basic_stack_string &operator=(basic_stack_string const &str) noexcept(false) FORCE_INLINE;
 148         /// Move the basic_stack_string.
 149         /**
 150                 Complexity: O(1)
 151 
 152                 \return An r-value to the copied basic_stack_string.
 153         */
 154         basic_stack_string &operator=(basic_stack_string &&str) noexcept(true) FORCE_INLINE;
 155 
 156         constexpr bool operator==(basic_stack_string const &) const noexcept(true) FORCE_INLINE;
 157         constexpr bool operator!=(basic_stack_string const &) const noexcept(true) FORCE_INLINE;
 158 
 159         /// Return an iterator to the beginning of the contained string.
 160         /**
 161                 Complexity: O(1)
 162 
 163                 \return iterator to the beginning of the contained string.
 164         */
 165         iterator begin() noexcept(true) FORCE_INLINE;
 166         /// Return a const_iterator to the beginning of the contained string.
 167         /**
 168                 Complexity: O(1)
 169 
 170                 \return const_iterator to the beginning of the contained string.
 171         */
 172         const_iterator begin() const noexcept(true) FORCE_INLINE;
 173         /// Return an iterator to the end of the contained string.
 174         /**
 175                 Complexity: O(1)
 176 
 177                 \return iterator to the end of the contained string.
 178         */
 179         iterator end() noexcept(true) FORCE_INLINE;
 180         /// Return a const_iterator to the end of the contained string.
 181         /**
 182                 Complexity: O(1)
 183 
 184                 \return const_iterator to the end of the contained string.
 185         */
 186         const_iterator end() const noexcept(true) FORCE_INLINE;
 187 
 188         /// Return the size of the contained string, excluding any null termination.
 189         /**
 190                 Complexity: O(1)
 191                 Constraint: size()<=capacity() && empty==(size()==0).
 192 
 193                 \return Size of the contained string.
 194 
 195                 \see capacity(), empty()
 196         */
 197         constexpr size_type size() const noexcept(true) FORCE_INLINE;
 198         /// Return the maximum size of any possible contained string, including any null termination.
 199         /**
 200                 Complexity: O(1)
 201                 Constraint: max_size()>=capacity().
 202 
 203                 \return Maximum possible size of a contained string.
 204 
 205                 \see capacity()
 206         */
 207         static constexpr size_type max_size() noexcept(true) FORCE_INLINE {
 208                 return std::numeric_limits<typename iterator_traits::difference_type>::max();
 209         }
 210         /// Uninitialised resize of the contained string to the requested size.
 211         /**
 212                 Notes:
 213                 - The elements in the internal buffer are not re-initialised, even if more memory has had to be allocated, for speed.
 214                 - If capacity()>s then the internal buffer is reduced in size. Note that if the string has been moved to the heap it will not be moved back to the stack, for speed.
 215                 - The null terminator is re-added to the end.
 216                 - If the capacity() has to be increased, all iterators are invalidated but the contained string is maintained.
 217                 Complexity: O(reserve())
 218                 Constraint: size()==s.
 219                 May throw: std::bad_alloc
 220 
 221                 \param  s       Ensure that size() returns s. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity of at least s.
 222 
 223                 \see empty(), reserve(), size()
 224         */
 225         void resize(size_type s) noexcept(false) FORCE_INLINE;
 226         /// Initialised resize of the contained string to the requested size.
 227         /**
 228                 Notes:
 229                 - If capacity()>s then the internal buffer is reduced in size. Note that if the string has been moved to the heap it will not be moved back to the stack, for speed.
 230                 - The null terminator is re-added to the end.
 231                 - If the capacity() has to be increased, all iterators are invalidated but the contained string is maintained.
 232                 Complexity: O(reserve()+std::max(s-size(), 0))
 233                 Constraint: size()==s && empty()==false.
 234                 May throw: std::bad_alloc
 235 
 236                 \param  s       Ensure that size() returns s. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity() of at least s.
 237                 \param  i       The value to which the s elements within the contained string should be initialised.
 238 
 239                 \see capacity(), empty(), size()
 240         */
 241         void resize(size_type s, value_type i) noexcept(false) FORCE_INLINE;
 242         /// Return the size of the internal buffer used to store the string, including the null termination.
 243         /**
 244                 Complexity: O(1)
 245                 Constraint: capacity()>=size() && capacity()<=max_size().
 246 
 247                 \return Return the size of internal buffer.
 248 
 249                 \see max_size(), size()
 250         */
 251         constexpr size_type capacity() const noexcept(true) FORCE_INLINE {
 252                 return capacity_;
 253         }
 254         /// Set the size of the internal buffer to at least the requested size, if the requested size is larger, then it will not be made smaller.
 255         /**
 256                 Notes:
 257                 - The elements in the internal buffer are not re-initialised, even if more memory has had to be allocated.
 258                 - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained string is maintained.
 259                 - If capacity() has to be reduced, note that if the string has been moved to the heap it will not be moved back to the stack, for speed.
 260                 - If an exception is thrown, then the object is unaffected.
 261                 Complexity: O(1) best case, O(new()+delete()+std::max(s-size(), 0)) worst case.
 262                 Constraint: capacity()==s && capacity()<=max_size().
 263                 May throw: std::bad_alloc
 264 
 265                 \param  s       Ensure that capacity() returns s, if less than max_size(), otherwise no effect. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity of s.
 266 
 267                 \see capacity(), max_size()
 268         */
 269         void reserve(size_type s) noexcept(false) FORCE_INLINE;
 270 
 271         /// Clear the contained string.
 272         /**
 273                 Complexity: O(1) best case, O(delete()) worst case.
 274                 Postcondition: size()==0 && empty()==true.
 275 
 276                 \see empty(), size()
 277         */
 278         void clear() noexcept(true) FORCE_INLINE;
 279         /// Return is the string is empty.
 280         /**
 281                 Complexity: O(1)
 282                 Condition: empty()==(size()==0).
 283 
 284                 \return true if size()==0, false otherwise.
 285 
 286                 \see size()
 287         */
 288         constexpr bool empty() const noexcept(true) FORCE_INLINE;
 289 
 290         /// Return an l-value to the requested element in the contained string.
 291         /**
 292                 Note that p must be less than size(), otherwise UB.
 293                 Complexity: O(1)
 294 
 295                 \param  p       The zero-based index into the contained string.
 296                 \return An l-value to the requested character in the contained string.
 297 
 298                 \see at(), size()
 299         */
 300         reference operator[](size_type p) noexcept(true) FORCE_INLINE;
 301         /// Return an r-value to the requested element in the contained string.
 302         /**
 303                 Note that p must be less than size(), otherwise UB.
 304                 Complexity: O(1)
 305 
 306                 \param  p       The zero-based index into the contained string.
 307                 \return An r-value to the requested character in the contained string.
 308 
 309                 \see at(), size()
 310         */
 311         const_reference operator[](size_type p) const noexcept(true) FORCE_INLINE;
 312 
 313         /// Append a character to the end of the contained string.
 314         /**
 315                 Note:
 316                 - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained basic_stack_string is maintained.
 317                 Complexity: O(resize())
 318                 May throw: std::bad_alloc
 319 
 320                 \param  c       The character to append.
 321 
 322                 \see resize()
 323         */
 324         void push_back(value_type c) noexcept(false) FORCE_INLINE;
 325         /// Insert the characters in the range [b, e) into the contained string from the specified position, growing the contained string as necessary.
 326         /**
 327                 Note:
 328                 - I'm not implementing the large number of overloads in std::basic_basic_stack_string, as that interface is unnecessarily fat.
 329                 - The range inserted is [b, e).
 330                 - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained basic_stack_string is maintained.
 331                 Complexity: O(resize())
 332                 May throw: std::bad_alloc
 333 
 334                 \param  p       The position immediately after the position into which the range is inserted, must be in the range [begin(), end()).
 335                 \param  b       The beginning of the range.
 336                 \param  e       The end of the range.
 337                 \return An iterator which refers to the copy of the first inserted character, or p if b==e.
 338         */
 339         iterator insert(iterator p, const_iterator b, const_iterator e) noexcept(false) FORCE_INLINE;
 340 
 341         /// Remove the characters in the range [b, e).
 342         /**
 343                 Note that {b, e} must be within [begin(), end()).
 344                 Complexity: O(std::distance(b, e))
 345 
 346                 \param  b       The beginning of the range.
 347                 \param  e       The end of the range.
 348                 \return An iterator which points to the element pointed to by e prior to the other elements being
 349 erased. If no such element exists, end() is returned.
 350         */
 351         iterator erase(const_iterator b, const_iterator e) noexcept(true) FORCE_INLINE;
 352 
 353         /// Replace the characters in the range [b, e) with those in the range [src_b, src_e).
 354         /**
 355                 Note that {b, e} must be within [begin(), end()).
 356                 Complexity: O(resize()+std::max(std::distance(b, e), std::distance(src_b, src_e))
 357 
 358                 \param  b       The beginning of the range.
 359                 \param  e       The end of the range.
 360                 \param  src_b   The beginning of the source range.
 361                 \param  src_e   The end of the source range.
 362                 \return An l-value to the basic_stack_string.
 363         */
 364         basic_stack_string &replace(iterator b, iterator e, const_iterator src_b, const_iterator src_e) noexcept(false) FORCE_INLINE;
 365 
 366         /// Swap the current basic_stack_string with the argument.
 367         /**
 368                 Complexity: O(1)
 369 
 370                 \param  str     The basic_stack_string with which to swap.
 371         */
 372         void swap(basic_stack_string &str) noexcept(true) FORCE_INLINE;
 373 
 374 private:
 375         /**
 376                 Either have a pointer to the heap or a small string stored in the space for the pointer. By using this union and putting the pointer-type first this should guarantee that the string alignment is reasonable for the cache, i.e. not at char alignment, which could be very bad.
 377                 If the small_string_max_size>capacity() then make sure that the small-string optimisation is engaged, i.e. this is the flag to know which member of the buffer_type union is valid.
 378         */
 379         union ALIGN_TO_L1_CACHE buffer_type {
 380                 pointer heap;
 381                 value_type small_basic_stack_string[small_string_max_size];
 382                 /**
 383                         The copy of the small string can be sped up using this array to directly copy the chunks of chars words-at-a-time.
 384                 */
 385                 native_databus_type fast_copy_values[small_string_max_size/sizeof(native_databus_type)];
 386                 
 387                 enum {
 388                         num_fast_copy_values=sizeof(fast_copy_values)/sizeof(native_databus_type)
 389                 };
 390                 typedef private_::unroll<num_fast_copy_values> unrolled_op_t;
 391 
 392                 void ctor(size_type cap, ensure_char_ptr_argument_aligned src) noexcept(false) FORCE_INLINE;
 393                 template<std::size_t SrcSz>
 394                 void ctor(value_type const (& src)[SrcSz]) noexcept(SrcSz<=small_string_max_size) FORCE_INLINE;
 395                 void cctor(size_type cap, buffer_type const &b) noexcept(false) FORCE_INLINE;
 396 
 397                 /**
 398                         Copy the small string directly in chunks of chars words-at-a-time.
 399 
 400                         \param  src     The source string.
 401                 */
 402                 void copy(native_databus_type const *src) noexcept(true) FORCE_INLINE;
 403                 /**
 404                         Copy the small string directly in chunks of chars words-at-a-time.
 405 
 406                         \param  src     The source string.
 407                 */
 408                 void copy(ensure_char_ptr_argument_aligned src) noexcept(true) FORCE_INLINE;
 409                 template<std::size_t SrcSz> void FORCE_INLINE
 410                 copy(value_type const (& src)[SrcSz]) noexcept(true);
 411                 void copy(size_type cap, const_pointer b, const_pointer e, size_type offset) noexcept(true) FORCE_INLINE;
 412                 void fill_n(size_type cap, size_type offset, size_type n, value_type i) noexcept(true) FORCE_INLINE;
 413                 void swap(buffer_type &buff) noexcept(true) FORCE_INLINE;
 414                 void move(size_type cap, typename iterator_traits::difference_type f, typename iterator_traits::difference_type l, size_type n) noexcept(true) FORCE_INLINE;
 415         };
 416 
 417         /**
 418                 We store the size to get at it rapidly, trading off the space required to store the size.
 419         */
 420         size_type size_;
 421         /**
 422                 We store the capacity to get at it rapidly, trading off the space required to store the size.
 423         */
 424         size_type capacity_;
 425         // Larger objects come first to improve cache use of strings.
 426         buffer_type buffer;
 427 };
 428 
 429 template<unsigned int BuffN, class charT, class traits> std::basic_ostream<charT, traits> &
 430 operator<<(std::basic_ostream<charT, traits> &os, basic_stack_string<BuffN, charT, traits> const &s) noexcept(false);
 431 
 432 template<unsigned int BuffN, class charT, class traits> std::basic_istream<charT, traits> &
 433 operator>>(std::basic_istream<charT, traits> &os, basic_stack_string<BuffN, charT, traits> &s) noexcept(false);
 434 
 435 typedef basic_stack_string<1, char> stack_string;
 436 typedef basic_stack_string<1, wchar_t> wstack_string;
 437 
 438 }
 439 
 440 namespace std {
 441 
 442 template<>
 443 struct hash<jmmcg::stack_string> : unary_function<jmmcg::stack_string, std::size_t> {
 444         typedef unary_function<jmmcg::stack_string, size_t>::result_type result_type;
 445         typedef unary_function<jmmcg::stack_string, size_t>::argument_type argument_type;
 446 
 447         result_type operator()(argument_type const &s) const noexcept(true) FORCE_INLINE {
 448                 typedef jmmcg::hashers::murmur2<argument_type> hasher_t;
 449                 return s.capacity()<s.small_string_max_size ? *s.begin() : hasher_t().operator()(s);
 450         }
 451 };
 452 
 453 template<>
 454 struct hash<jmmcg::wstack_string> : unary_function<jmmcg::wstack_string, std::size_t> {
 455         typedef unary_function<jmmcg::wstack_string, size_t>::result_type result_type;
 456         typedef unary_function<jmmcg::wstack_string, size_t>::argument_type argument_type;
 457 
 458         result_type operator()(argument_type const &s) const noexcept(true) FORCE_INLINE {
 459                 typedef jmmcg::hashers::murmur2<argument_type> hasher_t;
 460                 return s.capacity()<s.small_string_max_size ? *s.begin() : hasher_t().operator()(s);
 461         }
 462 };
 463 
 464 }
 465 
 466 #include "stack_string_impl.hpp"
 467 
 468 #endif

/* [<][>][^][v][top][bottom][index][help] */