root/core/private_/parallel_algorithms.hpp

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 #ifndef libjmmcg_core_private_parallel_algorithms_hpp
   2 #define libjmmcg_core_private_parallel_algorithms_hpp
   3 
   4 /******************************************************************************
   5 ** $Header: svn+ssh://jmmcg@svn.code.sf.net/p/libjmmcg/code/trunk/libjmmcg/core/private_/parallel_algorithms.hpp 2251 2018-02-20 23:30:57Z jmmcg $
   6 **
   7 ** Copyright © 2004 by J.M.McGuiness, coder@hussar.me.uk
   8 **
   9 ** This library is free software; you can redistribute it and/or
  10 ** modify it under the terms of the GNU Lesser General Public
  11 ** License as published by the Free Software Foundation; either
  12 ** version 2.1 of the License, or (at your option) any later version.
  13 **
  14 ** This library is distributed in the hope that it will be useful,
  15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17 ** Lesser General Public License for more details.
  18 **
  19 ** You should have received a copy of the GNU Lesser General Public
  20 ** License along with this library; if not, write to the Free Software
  21 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  22 */
  23 
  24 #include "manage_container_args.hpp"
  25 #include "thread_client_context.hpp"
  26 #include "thread_dsel_types.hpp"
  27 #include "../../core/shared_ptr.hpp"
  28 
  29 #include <boost/function.hpp>
  30 
  31 namespace jmmcg { namespace ppd { namespace private_ {
  32 
  33         const char shuffle_str[]="shuffle";
  34         const char lhs_merge_str[]="lhs_merge";
  35         const char rhs_merge_str[]="rhs_merge";
  36         const char combine1_str[]="combine1";
  37         const char combine2_str[]="combine2";
  38         const char ascending_lhs_str[]="ascending_lhs";
  39         const char descending_rhs_str[]="descending_rhs";
  40         const char merge_str[]="merge";
  41         const char arg_str[]="arg";
  42         const char lhs_str[]="lhs";
  43         const char rhs_str[]="rhs";
  44         const char unary_fun_str[]="unary_fun";
  45         const char binary_fun_str[]="binary_fun";
  46 
  47         namespace alg_wk_wrap {
  48 
  49                 template<class V>
  50                 struct pass_value {
  51                         typedef void result_type;
  52                         typedef V element_type;
  53 
  54                         element_type value;
  55 
  56                         explicit constexpr pass_value(element_type &r) FORCE_INLINE
  57                         : value(r) {
  58                         }
  59 
  60                         constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
  61                         }
  62 
  63                         constexpr bool __fastcall operator<(pass_value const &) const noexcept(true) FORCE_INLINE {
  64                                 return true;
  65                         }
  66 
  67                         template<class CoreWk>
  68                         static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {}
  69                 };
  70 
  71                 /// Assist with implementing the parallel versions of the standard algorithms.
  72                 /**
  73                         \see for_each_reduce
  74                 */
  75                 template<class Op>
  76                 struct for_each_work_type {
  77                         typedef void result_type;
  78                         typedef Op operation_type;
  79 
  80                         /**
  81                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
  82                         */
  83                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
  84 
  85                         /**
  86                                 Need this to be non-const, in case pointer-types get stuffed in here, otherwise the compiler will complain (not unreasonably) about the const-ness.
  87                         */
  88                         operation_type op;
  89 
  90                         constexpr for_each_work_type() noexcept(true) FORCE_INLINE {
  91                         }
  92                         explicit constexpr for_each_work_type(operation_type const &o) noexcept(true) FORCE_INLINE
  93                         : op(o) {
  94                         }
  95 
  96                         constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
  97                         }
  98 
  99                         constexpr bool __fastcall operator<(for_each_work_type const &) const noexcept(true) FORCE_INLINE {
 100                                 return true;
 101                         }
 102 
 103                         template<class CoreWk>
 104                         static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {}
 105                 };
 106 
 107                 /// Assist with implementing the parallel versions of the standard algorithms.
 108                 /**
 109                         \see for_each_reduce
 110                 */
 111                 template<class Op>
 112                 struct transform_work_type {
 113                         typedef void result_type;
 114                         typedef Op operation_type;
 115 
 116                         /**
 117                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 118                         */
 119                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 120 
 121                         /**
 122                                 Need this to be non-const, in case pointer-types get stuffed in here, otherwise the compiler will complain (not unreasonably) about the const-ness.
 123                         */
 124                         operation_type op;
 125 
 126                         constexpr transform_work_type() noexcept(true) FORCE_INLINE {
 127                         }
 128                         explicit constexpr transform_work_type(operation_type const &o) noexcept(true) FORCE_INLINE
 129                         : op(o) {
 130                         }
 131 
 132                         constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
 133                         }
 134 
 135                         constexpr bool __fastcall operator<(transform_work_type const &) const noexcept(true) FORCE_INLINE {
 136                                 return true;
 137                         }
 138 
 139                         template<class CoreWk>
 140                         static void FORCE_INLINE resize_output(CoreWk &wk) noexcept(false) {
 141                                 wk.resize_output(wk.work_complete()->containers().input1.size());
 142                         }
 143                 };
 144 
 145                 /// Assist with implementing the parallel versions of the standard algorithms.
 146                 /**
 147                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 148 
 149                         \see for_each_work_type
 150                         \see thread_base::for_each
 151                         \see thread_base::alg_wrapper1
 152                 */
 153                 template<class Conts, typename Fn>
 154                 struct for_each_reduce {
 155                         typedef Fn operation_type;
 156                         typedef typename operation_type::result_type result_type;
 157                         typedef Conts containers_type;
 158                         typedef typename containers_type::in_iterator in_iterator;
 159 
 160                         /**
 161                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 162                         */
 163                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 164 
 165                         constexpr for_each_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE
 166                         : beg(b), end(e), fn(w) {
 167                         }
 168 
 169                         void __fastcall process() const FORCE_INLINE {
 170                                 std::for_each(beg, end, fn.input().op);
 171                         }
 172 
 173                         constexpr bool __fastcall operator<(for_each_reduce const &) const noexcept(true) FORCE_INLINE {
 174                                 return true;
 175                         }
 176 
 177                 private:
 178                         const in_iterator beg, end;
 179                         operation_type &fn;
 180                 };
 181 
 182                 /// Assist with implementing the parallel versions of the standard algorithms.
 183                 /**
 184                         \see count_if_reduce
 185                         \see thread_base::count_if_t
 186                         \see thread_base::alg_wrapper1
 187                 */
 188                 template<typename Pred, typename CTR>
 189                 struct countor_work_type {
 190                         typedef CTR result_type;
 191                         typedef Pred operation_type;
 192 
 193                         /**
 194                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 195                         */
 196                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=result_type::memory_access_mode;
 197 
 198                         operation_type const pred;
 199 
 200                         explicit constexpr countor_work_type(operation_type const &p) noexcept(true) FORCE_INLINE
 201                         : pred(p) {
 202                         }
 203 
 204                         constexpr void __fastcall process(result_type &) noexcept(true) FORCE_INLINE {
 205                         }
 206 
 207                         constexpr bool __fastcall operator<(countor_work_type const &) const noexcept(true) FORCE_INLINE {
 208                                 return true;
 209                         }
 210 
 211                         template<class T>
 212                         static constexpr void FORCE_INLINE resize_output(T const &) noexcept(true) {}
 213                 };
 214 
 215                 /// Assist with implementing the parallel versions of the standard algorithms.
 216                 /**
 217                         Implements a reduction operation.
 218                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it..
 219 
 220                         \see countor_work_type
 221                         \see thread_base::count_if_t
 222                         \see thread_base::alg_wrapper1
 223                 */
 224                 template<class Conts, typename CtrPred>
 225                 struct count_if_reduce {
 226                         typedef CtrPred operation_type;
 227                         typedef typename operation_type::result_type result_type;
 228                         typedef Conts containers_type;
 229                         typedef typename containers_type::in_iterator in_iterator;
 230 
 231                         /**
 232                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 233                         */
 234                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 235                                 operation_type::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 236                                 ? ppd::generic_traits::memory_access_modes::crew_memory_access
 237                                 : ppd::generic_traits::memory_access_modes::erew_memory_access
 238                         );
 239 
 240                         constexpr count_if_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) noexcept(true) FORCE_INLINE
 241                         : beg(b), end(e), fn(w) {
 242                         }
 243 
 244                         void __fastcall process() FORCE_INLINE;
 245 
 246                         constexpr bool __fastcall operator<(count_if_reduce const &) const noexcept(true) FORCE_INLINE {
 247                                 return true;
 248                         }
 249 
 250                 private:
 251                         const in_iterator beg, end;
 252                         operation_type &fn;
 253                 };
 254 
 255                 /// Assist with implementing the parallel versions of the standard algorithms.
 256                 /**
 257                         \see accumulate_reduce
 258                 */
 259                 template<typename BinOp, typename Acc>
 260                 struct accumulator_work_type {
 261                         typedef Acc result_type;
 262                         typedef BinOp operation_type;
 263 
 264                         /**
 265                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 266                         */
 267                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=result_type::memory_access_mode;
 268 
 269                         operation_type const binop;
 270                         result_type const init;
 271 
 272                         constexpr accumulator_work_type(operation_type const &p, result_type &&i) noexcept(true) FORCE_INLINE
 273                         : binop(p), init(std::forward<result_type>(i)) {
 274                         }
 275 
 276                         constexpr void process(result_type &) noexcept(true) FORCE_INLINE {
 277                         }
 278 
 279                         constexpr bool __fastcall operator<(accumulator_work_type const &) const noexcept(true) FORCE_INLINE {
 280                                 return true;
 281                         }
 282 
 283                         template<class T>
 284                         static void FORCE_INLINE resize_output(T const &) noexcept(true) {}
 285                 };
 286 
 287                 /// Assist with implementing the parallel versions of the standard algorithms.
 288                 /**
 289                         Implements a reduction operation.
 290                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 291 
 292                         \see accumulator_work_type
 293                         \see thread_base::accumulate_t
 294                         \see thread_base::alg_wrapper1
 295                 */
 296                 template<class Conts, typename Fn>
 297                 struct accumulate_reduce {
 298                         typedef Fn operation_type;
 299                         typedef typename operation_type::result_type result_type;
 300                         typedef Conts containers_type;
 301                         typedef typename containers_type::in_iterator in_iterator;
 302 
 303                         /**
 304                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 305                         */
 306                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 307                                 operation_type::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 308                                 ? ppd::generic_traits::memory_access_modes::crew_memory_access
 309                                 : ppd::generic_traits::memory_access_modes::erew_memory_access
 310                         );
 311 
 312                         constexpr __stdcall accumulate_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE
 313                         : beg(b), end(e), fn(w) {
 314                         }
 315 
 316                         void __fastcall process() FORCE_INLINE;
 317 
 318                         constexpr bool __fastcall operator<(accumulate_reduce const &) const noexcept(true) FORCE_INLINE {
 319                                 return true;
 320                         }
 321 
 322                 private:
 323                         const in_iterator beg, end;
 324                         operation_type &fn;
 325                 };
 326 
 327                 /// Assist with implementing the parallel versions of the standard algorithms.
 328                 /**
 329                         Note how, if the input collection is a collection of unique elements, only one item will write to the output, so no need to implement any locking on the output. Also note that once the item is found it is implementation-defined how far the search continues within the remaining range.
 330                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 331 
 332                         \todo It would be nice, if once found, this could cancel any pending tasks.
 333 
 334                         \see countor_work_type
 335                         \see thread_base::find_if_t
 336                         \see thread_base::alg_wrapper1
 337                 */
 338                 template<class Conts, typename CtrPred>
 339                 struct find_if_reduce {
 340                         typedef CtrPred operation_type;
 341                         typedef typename operation_type::result_type result_type;
 342                         typedef Conts containers_type;
 343                         typedef typename containers_type::in_iterator in_iterator;
 344 
 345                         /**
 346                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 347                         */
 348                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 349                                 operation_type::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 350                                 ? ppd::generic_traits::memory_access_modes::crew_memory_access
 351                                 : ppd::generic_traits::memory_access_modes::erew_memory_access
 352                         );
 353 
 354                         constexpr __stdcall find_if_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE;
 355 
 356                         void __fastcall process() FORCE_INLINE;
 357 
 358                         constexpr bool __fastcall operator<(find_if_reduce const &) const noexcept(true) FORCE_INLINE {
 359                                 return true;
 360                         }
 361 
 362                 private:
 363                         const in_iterator beg, end;
 364                         operation_type &fn;
 365                 };
 366 
 367                 /// Assist with implementing the parallel versions of the standard algorithms.
 368                 /**
 369                         Implements a reduction operation.
 370                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 371 
 372                         \see accumulator_work_type
 373                         \see thread_base::max_element_t
 374                         \see thread_base::alg_wrapper1
 375                 */
 376                 template<class Conts, typename Fn>
 377                 class max_element_reduce {
 378                 public:
 379                         typedef Fn operation_type;
 380                         typedef typename operation_type::result_type result_type;
 381                         typedef Conts containers_type;
 382                         typedef typename containers_type::input_t::container_type container_type;
 383                         typedef typename containers_type::in_iterator in_iterator;
 384 
 385                         /**
 386                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 387                         */
 388                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 389                                 operation_type::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 390                                 ? ppd::generic_traits::memory_access_modes::crew_memory_access
 391                                 : ppd::generic_traits::memory_access_modes::erew_memory_access
 392                         );
 393 
 394                         constexpr __stdcall max_element_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE;
 395 
 396                         void __fastcall process() FORCE_INLINE;
 397 
 398                         constexpr bool __fastcall operator<(max_element_reduce const &) const noexcept(true) FORCE_INLINE {
 399                                 return true;
 400                         }
 401 
 402                 private:
 403                         class max;
 404 
 405                         const in_iterator beg, end;
 406                         operation_type &fn;
 407                 };
 408 
 409                 /// Assist with implementing the parallel versions of the standard algorithms.
 410                 /**
 411                         Implements a reduction operation.
 412                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 413 
 414                         \see accumulator_work_type
 415                         \see thread_base::min_element_t
 416                         \see thread_base::alg_wrapper1
 417                 */
 418                 template<class Conts, typename Fn>
 419                 class min_element_reduce {
 420                 public:
 421                         typedef Fn operation_type;
 422                         typedef typename operation_type::result_type result_type;
 423                         typedef Conts containers_type;
 424                         typedef typename containers_type::input_t::container_type container_type;
 425                         typedef typename containers_type::in_iterator in_iterator;
 426 
 427                         /**
 428                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 429                         */
 430                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 431                                 operation_type::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 432                                 ? ppd::generic_traits::memory_access_modes::crew_memory_access
 433                                 : ppd::generic_traits::memory_access_modes::erew_memory_access
 434                         );
 435 
 436                         constexpr __stdcall min_element_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE;
 437 
 438                         void __fastcall process() FORCE_INLINE;
 439 
 440                         constexpr bool __fastcall operator<(min_element_reduce const &) const noexcept(true) FORCE_INLINE {
 441                                 return true;
 442                         }
 443 
 444                 private:
 445                         class min;
 446 
 447                         const in_iterator beg, end;
 448                         operation_type &fn;
 449                 };
 450 
 451                 /// Assist with implementing the parallel versions of the standard algorithms.
 452                 /**
 453                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 454 
 455                         \see for_each_work_type
 456                         \see thread_base::transform_t
 457                         \see thread_base::alg_wrapper2
 458                 */
 459                 template<class Conts, typename UniOp>
 460                 struct transform_reduce {
 461                         typedef UniOp operation_type;
 462                         typedef typename operation_type::result_type result_type;
 463                         typedef Conts containers_type;
 464                         typedef typename containers_type::in_iterator in_iterator;
 465                         typedef typename containers_type::out_iterator out_iterator;
 466 
 467                         /**
 468                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 469                         */
 470                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 471 
 472                         constexpr __stdcall transform_reduce(in_iterator const &ib, in_iterator const &ie, out_iterator const &o, operation_type const &w) FORCE_INLINE
 473                         : in_beg(ib), in_end(ie), out(o), fn(w) {
 474                         }
 475 
 476                         void __fastcall process() FORCE_INLINE {
 477                                 std::transform(in_beg, in_end, out, fn.input().op);
 478                         }
 479 
 480                         constexpr bool __fastcall operator<(transform_reduce const &) const noexcept(true) FORCE_INLINE {
 481                                 return true;
 482                         }
 483 
 484                 private:
 485                         const in_iterator in_beg, in_end;
 486                         out_iterator out;
 487                         const operation_type &fn;
 488                 };
 489 
 490                 /// Assist with implementing the parallel versions of the standard algorithms.
 491                 /**
 492                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 493 
 494                         \see for_each_work_type
 495                         \see thread_base::transform2_t
 496                         \see thread_base::alg_wrapper2
 497                 */
 498                 template<class Conts, typename BinOp>
 499                 struct transform2_reduce {
 500                         typedef BinOp operation_type;
 501                         typedef typename operation_type::result_type result_type;
 502                         typedef Conts containers_type;
 503                         typedef typename containers_type::in_iterator in_iterator;
 504                         typedef typename containers_type::in2_iterator in2_iterator;
 505                         typedef typename containers_type::out_iterator out_iterator;
 506 
 507                         /**
 508                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 509                         */
 510                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 511 
 512                         constexpr __stdcall transform2_reduce(in_iterator const &i1b, in_iterator const &i1e, in2_iterator const &i2b, out_iterator const &o, operation_type const &w) FORCE_INLINE
 513                         : in1_beg(i1b), in1_end(i1e), in2_beg(i2b), iter_out(o), fn(w) {
 514                         }
 515 
 516                         void __fastcall process() FORCE_INLINE {
 517                                 std::transform(in1_beg, in1_end, in2_beg, iter_out, fn.input().op);
 518                         }
 519 
 520                         constexpr bool __fastcall operator<(transform2_reduce const &) const noexcept(true) FORCE_INLINE {
 521                                 return true;
 522                         }
 523 
 524                 private:
 525                         const in_iterator in1_beg, in1_end;
 526                         const in2_iterator in2_beg;
 527                         out_iterator iter_out;
 528                         const operation_type &fn;
 529                 };
 530 
 531                 /// Assist with implementing the parallel versions of the standard algorithms.
 532                 /**
 533                         \see reverse_reduce
 534                 */
 535                 template<class Colln>
 536                 struct reverse_work_type {
 537                         typedef std::pointer_to_binary_function<typename Colln::container_type::iterator, typename Colln::container_type::iterator, void> operation_type;
 538                         typedef typename operation_type::first_argument_type first_argument_type;
 539                         typedef typename operation_type::second_argument_type second_argument_type;
 540                         typedef typename operation_type::result_type result_type;
 541 
 542                         operation_type const binop;
 543 
 544                         constexpr reverse_work_type() noexcept(true) FORCE_INLINE
 545                         : binop(&std::iter_swap<first_argument_type, second_argument_type>) {
 546                         }
 547                         constexpr reverse_work_type(typename Colln::container_type::iterator cb, typename Colln::container_type::iterator ce) noexcept(true) FORCE_INLINE
 548                         : binop(&std::iter_swap<first_argument_type, second_argument_type>), cont_beg_(cb), cont_end_(ce) {
 549                         }
 550 
 551                         constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
 552                         }
 553 
 554                         constexpr typename Colln::container_type::iterator __fastcall cont_beg() const noexcept(true) FORCE_INLINE {
 555                                 return cont_beg_;
 556                         }
 557                         constexpr typename Colln::container_type::iterator __fastcall cont_end() const noexcept(true) FORCE_INLINE {
 558                                 return cont_end_;
 559                         }
 560 
 561                         constexpr bool __fastcall operator<(reverse_work_type const &) const noexcept(true) FORCE_INLINE {
 562                                 return true;
 563                         }
 564 
 565                         template<class CoreWk>
 566                         static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {
 567                         }
 568 
 569                 private:
 570                         typename Colln::container_type::iterator cont_beg_;
 571                         typename Colln::container_type::iterator cont_end_;
 572                 };
 573                 /// Assist with implementing the parallel versions of the standard algorithms.
 574                 /**
 575                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 576 
 577                         \see reverse_work_type
 578                         \see thread_base::reverse_t
 579                         \see thread_base::alg_wrapper1
 580                 */
 581                 template<class Conts, typename Fn>
 582                 struct reverse_reduce {
 583                         typedef Fn operation_type;
 584                         typedef typename operation_type::result_type result_type;
 585                         typedef Conts containers_type;
 586                         typedef typename containers_type::in_iterator in_iterator;
 587 
 588                         /**
 589                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 590                         */
 591                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 592 
 593                         constexpr __stdcall reverse_reduce(in_iterator const &bs, in_iterator const &es, operation_type const &w) FORCE_INLINE;
 594 
 595                         void __fastcall process() const FORCE_INLINE;
 596 
 597                         constexpr bool __fastcall operator<(reverse_reduce const &) const noexcept(true) FORCE_INLINE {
 598                                 return true;
 599                         }
 600 
 601                 private:
 602                         const in_iterator beg_subrange, end_subrange;
 603                         const operation_type &fn;
 604                         const typename std::iterator_traits<in_iterator>::difference_type cont_size;
 605                 };
 606 
 607                 /// Assist with implementing the parallel versions of the standard algorithms.
 608                 /**
 609                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 610 
 611                         \see pass_value
 612                         \see thread_base::fill_n
 613                         \see thread_base::alg_wrapper1
 614                 */
 615                 template<typename Conts, class UniOp>
 616                 struct fill_n_reduce {
 617                         typedef UniOp operation_type;
 618                         typedef typename operation_type::result_type result_type;
 619                         typedef Conts containers_type;
 620                         typedef typename containers_type::in_iterator in_iterator;
 621 
 622                         /**
 623                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 624                         */
 625                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 626 
 627                         constexpr __stdcall fill_n_reduce(in_iterator b, in_iterator e, operation_type const &op) FORCE_INLINE
 628                         : beg(b), end(e), val(op) {
 629                         }
 630 
 631                         void __fastcall process() const FORCE_INLINE;
 632 
 633                         constexpr bool __fastcall operator<(fill_n_reduce const &) const noexcept(true) FORCE_INLINE {
 634                                 return true;
 635                         }
 636 
 637                 private:
 638                         in_iterator beg, end;
 639                         operation_type const &val;
 640                 };
 641 
 642                 /// Assist with implementing the parallel versions of the standard algorithms.
 643                 /**
 644                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 645 
 646                         \see pass_value
 647                         \see thread_base::fill
 648                         \see thread_base::alg_wrapper1
 649                 */
 650                 template<typename Conts, class UniOp>
 651                 struct fill_reduce {
 652                         typedef UniOp operation_type;
 653                         typedef typename operation_type::result_type result_type;
 654                         typedef Conts containers_type;
 655                         typedef typename containers_type::in_iterator in_iterator;
 656 
 657                         /**
 658                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 659                         */
 660                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 661 
 662                         constexpr __stdcall fill_reduce(in_iterator b, in_iterator e, operation_type const &op) FORCE_INLINE
 663                         : beg(b), end(e), val(op) {
 664                         }
 665 
 666                         void __fastcall process() const FORCE_INLINE;
 667 
 668                         constexpr bool __fastcall operator<(fill_reduce const &) const noexcept(true) FORCE_INLINE {
 669                                 return true;
 670                         }
 671 
 672                 private:
 673                         in_iterator beg, end;
 674                         operation_type const &val;
 675                 };
 676 
 677                 /// Assist with implementing the parallel versions of the standard algorithms.
 678                 /**
 679                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 680 
 681                         \see for_each_work_type
 682                         \see thread_base::swap_ranges_t
 683                         \see thread_base::alg_wrapper1
 684                 */
 685                 template<class Conts, typename Pred>
 686                 struct swap_ranges_reduce {
 687                         typedef Pred operation_type;
 688                         typedef typename operation_type::result_type result_type;
 689                         typedef Conts containers_type;
 690                         typedef typename containers_type::in_iterator in_iterator;
 691                         typedef typename containers_type::out_iterator out_iterator;
 692 
 693                         /**
 694                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 695                         */
 696                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 697 
 698                         constexpr __stdcall swap_ranges_reduce(out_iterator const &b1, in_iterator const &e1, out_iterator const &b2, operation_type const &f) FORCE_INLINE
 699                         : begin1(b1), end1(e1), begin2(b2), fn(f) {
 700                         }
 701 
 702                         void __fastcall process() FORCE_INLINE;
 703 
 704                         constexpr bool __fastcall operator<(swap_ranges_reduce const &) const noexcept(true) FORCE_INLINE {
 705                                 return true;
 706                         }
 707 
 708                 private:
 709                         out_iterator begin1;
 710                         const in_iterator end1;
 711                         out_iterator begin2;
 712                         operation_type const &fn;
 713                 };
 714 
 715                 /// Assist with implementing the parallel versions of the standard algorithms.
 716                 /**
 717                         \see merge_reduce
 718                 */
 719                 template<class Comp, class TPB>
 720                 struct merge_work_type {
 721                         typedef void result_type;
 722                         typedef Comp operation_type;
 723                         typedef TPB thread_pool_type;
 724 
 725                         operation_type const comp;
 726                         thread_pool_type &pool;
 727 
 728                         constexpr __stdcall merge_work_type(operation_type const &o, thread_pool_type &p) noexcept(true) FORCE_INLINE
 729                         : comp(o), pool(p) {
 730                         }
 731 
 732                         constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
 733                         }
 734 
 735                         constexpr bool __fastcall operator<(merge_work_type const &) const noexcept(true) FORCE_INLINE {
 736                                 return true;
 737                         }
 738 
 739                         template<class CoreWk>
 740                         static void resize_output(CoreWk &wk) noexcept(false) FORCE_INLINE;
 741                 };
 742                 /*
 743                         These bits are so damn ugly I want to throw up, but that's optimisations for you....
 744                         They can't be members of batchers_bitonic_merge_reduce because of the template-template parameter to the batchers_bitonic_merge_reduce::merge class, also which needs to be specified in the sort algorithm.
 745                 */
 746                 /// The direction of the resultant output sequence from a merge or sort opration.
 747                 enum class direction {
 748                         ascending,
 749                         descending
 750                 };
 751                 /// The comparator operator to be used within the merge or sort operation.
 752                 template<direction Dir, class out_iterator, class Closure>
 753                 class swap_pred : public std::binary_function<typename out_iterator::value_type, typename out_iterator::value_type, bool> {
 754                 public:
 755                         static constexpr direction dir=Dir;
 756 
 757                         explicit constexpr swap_pred(Closure const &c) noexcept(true) FORCE_INLINE
 758                         : arg(c) {}
 759 
 760                         bool __fastcall operator()(typename out_iterator::value_type const &lhs, typename out_iterator::value_type const &rhs) const noexcept(noexcept(std::declval<typename Closure::argument_type>().comp(lhs, rhs))) FORCE_INLINE;
 761 
 762                 private:
 763                         typename Closure::argument_type const &arg;
 764                 };
 765                 /// Merge operations are predicated upon the two input queues being sorted, so we can improve the algorithmic complexity by making use of std::merge() to merge the final sub-ranges in O(n/p) time. Note that the input is a bitonic sub-range, which makes this algorithm more complex.
 766                 template<class Iter, class operation_type, direction LHSDir, direction RHSDir, class Dummy>
 767                 struct merge_final_sorter {
 768                         static constexpr direction lhs_dir=LHSDir;
 769                         static constexpr direction rhs_dir=RHSDir;
 770                         typedef Iter out_iterator;
 771                         typedef typename out_iterator::difference_type out_sz_t;
 772                         typedef swap_pred<rhs_dir, out_iterator, operation_type> swapper_t;
 773                         typedef boost::function<void (out_iterator, out_iterator, std::binary_negate<swapper_t>)> sort_fn_t;
 774                         typedef out_iterator arg1_type;
 775                         typedef out_iterator arg2_type;
 776                         typedef std::binary_negate<swapper_t> arg3_type;
 777 
 778                         /**
 779                                 \todo What about std::inplace_merge()?
 780 
 781                                 \see std::merge()
 782                         */
 783                         static void __fastcall process(Dummy const &, out_iterator const begin, out_iterator const end, operation_type const &fn) noexcept(false) FORCE_INLINE;
 784                 };
 785                 template<class Iter, class operation_type, direction LHSDir, direction RHSDir, class SortFn>
 786                 struct sort_final_sorter {
 787                         static constexpr direction lhs_dir=LHSDir;
 788                         static constexpr direction rhs_dir=RHSDir;
 789                         typedef Iter out_iterator;
 790                         typedef typename out_iterator::difference_type out_sz_t;
 791                         typedef swap_pred<lhs_dir, out_iterator, operation_type> swapper_t;
 792                         typedef SortFn sort_fn_t;
 793                         typedef typename sort_fn_t::arg1_type arg1_type;
 794                         typedef typename sort_fn_t::arg2_type arg2_type;
 795                         typedef typename sort_fn_t::arg3_type arg3_type;
 796 
 797                         static void __fastcall process(sort_fn_t const &sfn, out_iterator const begin, out_iterator const end, operation_type const &fn) noexcept(false) FORCE_INLINE {
 798                                 sfn(begin, end, arg3_type(swapper_t(fn)));
 799                         }
 800                 };
 801                 /// Assist with implementing the parallel versions of the standard algorithms.
 802                 /**
 803                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 804 
 805                         [1] <a href="http://www.cs.uoregon.edu/research/paraducks/papers/psc94.d/node2.html"/>
 806 
 807                         \see merge_work_type
 808                         \see thread_base::merge_t
 809                         \see thread_base::alg_wrapper2
 810                 */
 811                 template<class Conts, typename Comp>
 812                 class batchers_bitonic_merge_reduce {
 813                 public:
 814                         typedef Comp operation_type;
 815                         typedef typename operation_type::argument_type::thread_pool_type thread_pool_type;
 816                         typedef typename thread_pool_type::exception_type exception_type;
 817                         typedef typename thread_pool_type::pool_traits_type pool_traits_type;
 818                         typedef typename thread_pool_type::joinable joinable;
 819                         typedef typename operation_type::result_type result_type;
 820                         typedef Conts containers_type;
 821                         typedef typename containers_type::in_iterator in_iterator;
 822                         typedef typename containers_type::in2_iterator in2_iterator;
 823                         typedef typename containers_type::out_iterator out_iterator;
 824                         typedef typename out_iterator::difference_type out_sz_t;
 825                         template<
 826                                 direction LHSDir,
 827                                 direction RHSDir,
 828                                 template<class, class, direction, direction, class> class FinalSort     ///< Ugly variation point for introducing an optimisation. Merging is predicated upon sorted inputs, unlike sort, so merging the final sub-ranges can be done in O(n/p) time, which is faster than O(nlog(n)/p) time, and noticeable in testing.
 829                         >
 830                         class merge;
 831 
 832                 private:
 833                         /**
 834                                 Make use of std::merge() to merge the sub-collections in O(n/p) time, which we can do, as the input collections must be sorted as a precondition for the algorithm.
 835 
 836                                 \see merge_final_sorter
 837                         */
 838                         typedef merge<direction::ascending, direction::ascending, merge_final_sorter> init_merger_t;
 839                         typedef typename init_merger_t::sort_fn_t sort_fn_t;
 840 
 841                         void combine();
 842 
 843                 public:
 844                         /**
 845                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 846                         */
 847                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 848                                 thread_pool_type::template copy_iter_t<typename containers_type::input1_t::container_type, typename containers_type::output_t::container_type, typename containers_type::output_t::container_type::container_type::iterator>::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 849                                 && init_merger_t::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
 850                                 ? ppd::generic_traits::memory_access_modes::crew_memory_access
 851                                 : ppd::generic_traits::memory_access_modes::erew_memory_access
 852                         );
 853 
 854                         /**
 855                                 Complexity: O(log^2(O(sfn)/p+log(p)))
 856                         */
 857                         batchers_bitonic_merge_reduce(containers_type &c, operation_type const &w, cliques::element_type const cl) noexcept(true) FORCE_INLINE;
 858                         virtual ~batchers_bitonic_merge_reduce() noexcept(true) FORCE_INLINE {}
 859 
 860                         /**
 861                                 If std::stable_sort() is used then this is on average O(n.log(n)), with enough memory.
 862                         */
 863                         void __fastcall process();
 864 
 865                         constexpr bool __fastcall operator<(batchers_bitonic_merge_reduce const &) const noexcept(true) FORCE_INLINE {
 866                                 return true;
 867                         }
 868 
 869                 private:
 870                         containers_type &conts;
 871                         operation_type const &fn;
 872                         cliques::element_type const clique;
 873                 };
 874 
 875                 /// Assist with implementing the parallel versions of the standard algorithms.
 876                 /**
 877                         \see sort_reduce
 878                 */
 879                 template<class Comp, class TPB>
 880                 struct sort_work_type {
 881                         typedef void result_type;
 882                         typedef Comp operation_type;
 883                         typedef TPB thread_pool_type;
 884 
 885                         operation_type const comp;
 886                         thread_pool_type &pool;
 887 
 888                         constexpr __stdcall sort_work_type(operation_type const &o, thread_pool_type &p) noexcept(true) FORCE_INLINE
 889                         : comp(o), pool(p) {
 890                         }
 891 
 892                         constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
 893                         }
 894 
 895                         constexpr bool __fastcall operator<(sort_work_type const &) const noexcept(true) FORCE_INLINE {
 896                                 return true;
 897                         }
 898 
 899                         template<class CoreWk>
 900                         static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {
 901                         }
 902                 };
 903                 /// Assist with implementing the parallel versions of the standard algorithms.
 904                 /**
 905                         Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
 906 
 907                         [1] <a href="http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/bitonicen.htm"/>
 908 
 909                         \see sort_work_type
 910                         \see thread_base::sort_t
 911                         \see thread_base::alg_wrapper1
 912                 */
 913                 template<typename Conts, class Comp>
 914                 struct bitonic_sort_reduce {
 915                         typedef Comp operation_type;
 916                         typedef typename operation_type::argument_type::thread_pool_type thread_pool_type;
 917                         typedef typename thread_pool_type::pool_traits_type pool_traits_type;
 918                         typedef typename operation_type::result_type result_type;
 919                         typedef Conts containers_type;
 920                         typedef typename containers_type::in_iterator in_iterator;
 921                         typedef batchers_bitonic_merge_reduce<three_containers<typename containers_type::input_t::container_type, typename containers_type::input_t::container_type, typename containers_type::input_t::container_type>, Comp> merge_t;
 922                         template<direction dir>
 923                         class sort;
 924                         typedef sort<direction::ascending> init_sorter_t;
 925 
 926                         /**
 927                                 To assist in allowing compile-time computation of the algorithmic order of the threading model.
 928                         */
 929                         static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=init_sorter_t::memory_access_mode;
 930 
 931                         constexpr bitonic_sort_reduce(containers_type &c, operation_type const &op, cliques::element_type const cl) noexcept(true) FORCE_INLINE;
 932                         virtual ~bitonic_sort_reduce() noexcept(true) FORCE_INLINE {
 933                         }
 934 
 935                         void __fastcall process() const FORCE_INLINE;
 936 
 937                         constexpr bool __fastcall operator<(bitonic_sort_reduce const &) const noexcept(true) FORCE_INLINE {
 938                                 return true;
 939                         }
 940 
 941                 private:
 942                         containers_type &cont;
 943                         operation_type const &fn;
 944                         cliques::element_type const clique;
 945                 };
 946 
 947         }
 948 
 949         template<class T>
 950         struct stl_functor_result_type {
 951                 typedef T value_type;
 952 
 953                 /**
 954                         To assist in allowing compile-time computation of the algorithmic order of the threading model.
 955                 */
 956                 static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=ppd::generic_traits::memory_access_modes::crew_memory_access;
 957 
 958                 value_type result;
 959 
 960                 constexpr __stdcall stl_functor_result_type() noexcept(true) FORCE_INLINE {
 961                 }
 962                 constexpr __stdcall stl_functor_result_type(value_type &&r) noexcept(true) FORCE_INLINE
 963                 : result(std::forward<value_type>(r)) {
 964                 }
 965                 /// Note the use of an automatic conversion here.
 966                 constexpr __fastcall operator value_type const &() const noexcept(true) FORCE_INLINE {
 967                         return result;
 968                 }
 969 
 970                 bool __fastcall operator<(stl_functor_result_type const &rhs) const noexcept(true) FORCE_INLINE {
 971                         return result<rhs.result;
 972                 }
 973         };
 974 
 975         /// An adaptor to allow STL unary functions to be operated upon in the thread_pool.
 976         /**
 977                 Note that the input is evaluated by transferring it into the pool, and the execution_context that holds the result has an automatic conversion to the result_type.
 978         */
 979         template<class ArgT, class UniFn, class PT>
 980         class unary_fun_work_type {
 981         public:
 982                 typedef PT pool_type;
 983                 typedef UniFn operation_type;
 984                 typedef stl_functor_result_type<typename operation_type::result_type> result_type;
 985                 typedef ArgT argument_type;
 986 
 987         private:
 988                 struct arg_int_work_type;
 989 
 990                 struct arg_context_t;
 991 
 992                 using shared_ptr_t=shared_ptr<arg_context_t, api_lock_traits<platform_api, sequential_mode>>;
 993 
 994         public:
 995                 /**
 996                         To assist in allowing compile-time computation of the algorithmic order of the threading model.
 997                 */
 998                 static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
 999                         shared_ptr_t::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
1000                         ? ppd::generic_traits::memory_access_modes::crew_memory_access
1001                         : ppd::generic_traits::memory_access_modes::erew_memory_access
1002                 );
1003 
1004                 __stdcall unary_fun_work_type(argument_type &&a, operation_type const &o, pool_type &pool) noexcept(false) FORCE_INLINE;
1005 
1006                 void __fastcall process(result_type &r) FORCE_INLINE;
1007 
1008                 bool __fastcall operator<(unary_fun_work_type const &rhs) const noexcept(true) FORCE_INLINE;
1009 
1010         private:
1011                 operation_type op;
1012                 /// \todo This is done to prevent copying the execution contexts. If we have a transfer ctor, then we can avoid the copy. But we need to consider the fact that if the work has completed mutation, would this have a problem if the transfer is also occurring.
1013                 shared_ptr_t arg_cxt;
1014         };
1015 
1016         /// An adaptor to allow STL binary functions to be operated upon in the thread_pool.
1017         /**
1018                 Note that the inputs are evaluated by transferring them into the pool, and the execution_context that holds the result has an automatic conversion to the result_type.
1019         */
1020         template<class ArgT1, class ArgT2, class BinFn, class PT>
1021         class binary_fun_work_type {
1022         public:
1023                 typedef PT pool_type;
1024                 typedef BinFn operation_type;
1025                 typedef stl_functor_result_type<typename operation_type::result_type> result_type;
1026                 typedef ArgT1 first_argument_type;
1027                 typedef ArgT2 second_argument_type;
1028 
1029         private:
1030                 template<class Arg>
1031                 struct arg_int_work_type;
1032 
1033                 struct arg_contexts_t;
1034 
1035                 using shared_ptr_t=shared_ptr<arg_contexts_t, api_lock_traits<platform_api, sequential_mode>>;
1036 
1037         public:
1038                 /**
1039                         To assist in allowing compile-time computation of the algorithmic order of the threading model.
1040                 */
1041                 static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=(
1042                         shared_ptr_t::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
1043                         ? ppd::generic_traits::memory_access_modes::crew_memory_access
1044                         : ppd::generic_traits::memory_access_modes::erew_memory_access
1045                 );
1046 
1047                 __stdcall binary_fun_work_type(first_argument_type &&lhs, second_argument_type &&rhs, operation_type const &o, pool_type &pool) noexcept(false) FORCE_INLINE;
1048 
1049                 void __fastcall process(result_type &r) FORCE_INLINE;
1050 
1051                 bool __fastcall operator<(binary_fun_work_type const &rhs) const noexcept(true) FORCE_INLINE;
1052 
1053                 template<class Arg1> constexpr bool __fastcall FORCE_INLINE
1054                 operator<(Arg1 const &) const noexcept(true) {
1055                         return true;
1056                 }
1057 
1058         private:
1059                 operation_type op;
1060                 /// \todo This is done to prevent copying the execution contexts. If we have a transfer ctor, then we can avoid the copy. But we need to consider the fact that if the work has completed mutation, would this have a problem if the transfer is also occurring.
1061                 shared_ptr_t arg_cxts;
1062         };
1063 
1064 } } }
1065 
1066 #include "parallel_algorithms_impl.hpp"
1067 
1068 #endif

/* [<][>][^][v][top][bottom][index][help] */