00001 /* ******************************************************************************* 00002 * Copyright (c) 2007-2014, Intel Corporation 00003 * 00004 * Redistribution and use in source and binary forms, with or without 00005 * modification, are permitted provided that the following conditions are met: 00006 * 00007 * * Redistributions of source code must retain the above copyright notice, 00008 * this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of Intel Corporation nor the names of its contributors 00013 * may be used to endorse or promote products derived from this software 00014 * without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00017 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00018 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 00020 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00021 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 00022 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00023 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 00024 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00025 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 ********************************************************************************/ 00027 00028 // =============================================================================== 00029 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00030 // 00031 // INCLUDE THIS FILE ONLY TO MAKE YOUR PROGRAM READY FOR DISTRIBUTED CnC 00032 // 00033 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00034 // =============================================================================== 00035 00036 #ifndef __DIST_CNC__H_ 00037 #define __DIST_CNC__H_ 00038 00039 /** 00040 \page distcnc Running CnC applications on distributed memory 00041 00042 In principle, every clean CnC program should be immediately 00043 applicable for distributed memory systems. With only a few trivial 00044 changes most CnC programs can be made distribution-ready. You will 00045 get a binary that runs on shared and distributed memory. Most of 00046 the mechanics of data distribution etc. is handled inside the 00047 runtime and the programmer does not need to bother about the gory 00048 details. Of course, there are a few minor changes needed to make a 00049 program distribution-ready, but once that's done, it will run on 00050 distributed CnC as well as on "normal" CnC (decided at runtime). 00051 00052 \section dc_comm Inter-process communication 00053 Conceptually, CnC allows data and computation distribution 00054 across any kind of network; currently CnC supports SOCKETS and MPI. 00055 00056 \section dc_link Linking for distCnC 00057 Support for distributed memory is part of the "normal" CnC 00058 distribution, e.g. it comes with the necessary communication 00059 libraries (cnc_socket, cnc_mpi). The communication library is 00060 loaded on demand at runtime, hence you do not need to link against 00061 extra libraries to create distribution-ready applications. Just 00062 link your binaries like a "traditional" CnC application (explained 00063 in the CnC User Guide, which can be found in the doc directory). 00064 \note a distribution-ready CnC application-binary has no dependencies 00065 on an MPI library, it can be run on shared memory or over SOCKETS 00066 even if no MPI is available on the system 00067 00068 Even though it is not a separate package or module in the CNC kit, 00069 in the following we will refer to features that are specific for 00070 distributed memory with "distCnC". 00071 00072 \section dc_prog Making your program distCnC-ready 00073 As a distributed version of a CnC program needs to do things which 00074 are not required in a shared memory version, the extra code for 00075 distCnC is hidden from "normal" CnC headers. To include the 00076 features required for a distributed version you need to 00077 \code #include <cnc/dist_cnc.h> \endcode 00078 instead of \code #include <cnc/cnc.h> \endcode . 00079 If you want to be able to create optimized binaries for shared 00080 memory and distributed memory from the same source, you might 00081 consider protecting distCnC specifics like this: 00082 @code 00083 #ifdef _DIST_ 00084 # include <cnc/dist_cnc.h> 00085 #else 00086 # include <cnc/cnc.h> 00087 #endif 00088 @endcode 00089 00090 In "main", initialize an object CnC::dist_cnc_init< list-of-contexts > 00091 before anything else; parameters should be all context-types that 00092 you would like to be distributed. Context-types not listed in here 00093 will stay local. You may mix local and distributed contexts, but 00094 in most cases only one context is needed/used anyway. 00095 @code 00096 #ifdef _DIST_ 00097 CnC::dist_cnc_init< my_context_type_1 //, my_context_type_2, ... 00098 > _dinit; 00099 #endif 00100 @endcode 00101 00102 Even though the communication between process is entirely handled by 00103 the CnC runtime, C++ doesn't allow automatic marshaling/serialization 00104 of arbitrary data-types. Hence, if and only if your items and/or tags 00105 are non-standard data types, the compiler will notify you about the 00106 need for serialization/marshaling capability. If you are using 00107 standard data types only then marshaling will be handled by CnC 00108 automatically. 00109 00110 Marshaling doesn't involve sending messages or alike, it only 00111 specifies how an object/variable is packed/unpacked into/from a 00112 buffer. Marshaling of structs/classes without pointers or virtual 00113 functions can easily be enabled using 00114 \code CNC_BITWISE_SERIALIZABLE( type); \endcode 00115 others need a "serialize" method or function. The CnC kit comes 00116 with an convenient interface for this which is similar to BOOST 00117 serialization. It is very simple to use and requires only one 00118 function/method for packing and unpacking. See \ref serialization for 00119 more details. 00120 00121 <b>This is it! Your CnC program will now run on distributed memory!</b> 00122 00123 \attention Global variables are evil and must not be used within 00124 the execution scope of steps. Read \ref dist_global 00125 about how CnC supports global read-only data. 00126 Apparently, pointers are nothing else than global 00127 variables and hence need special treatment in distCnC 00128 (see \ref serialization). 00129 \note Even if your program runs on distributed memory, that does not 00130 necessarily imply that the trivial extension above will make it 00131 run fast. Please consult \ref dist_tuning for the tuning 00132 options for distributed memory. 00133 00134 The above describes the default "single-program" approach for 00135 distribution. Please refer to to CnC::dist_cnc_init for more advanced 00136 modes which allow SPMD-style interaction as well as distributing 00137 parts of the CnC program over groups of processes. 00138 00139 00140 \section dc_run Running distCnC 00141 The communication infrastructure used by distCnC is chosen at 00142 runtime. By default, the CnC runtime will run your application in 00143 shared memory mode. When starting up, the runtime will evaluate 00144 the environment variable "DIST_CNC". Currently it accepts the 00145 following values 00146 - SHMEM : shared memory (default) 00147 - SOCKETS : communication through TCP sockets 00148 - MPI : using Intel(R) MPI 00149 00150 Please see \ref itac on how to profile distributed programs 00151 00152 \subsection dc_sockets Using SOCKETS 00153 On application start-up, when DIST_CNC=SOCKETS, CnC checks the 00154 environment variable "CNC_SOCKET_HOST". If it is set to a number, 00155 it will print a contact string and wait for the given number of 00156 clients to connect. Usually this means that clients need to be 00157 started "manually" as follows: set DIST_CNC=SOCKETS and 00158 "CNC_SOCKET_CLIENT" to the given contact string and launch the 00159 same executable on the desired machine. 00160 00161 If "CNC_SOCKET_HOST" is not a number it is interpreted as a 00162 name of a script. CnC executes the script twice: First with "-n" 00163 it expects the script to return the number of clients it will 00164 start. The second invocation is expected to launch the client 00165 processes. 00166 00167 There is a sample script "misc/start.sh" which you can 00168 use. Usually all you need is setting the number of clients and 00169 replacing "localhost" with the names of the machines you want the 00170 application(-clients) to be started on. It requires password-less 00171 login via ssh. It also gives some details of the start-up 00172 procedure. For windows, the script "start.bat" does the same, 00173 except that it will start the clients on the same machine without 00174 ssh or alike. Adjust the script to use your preferred remote login 00175 mechanism. 00176 00177 \subsection dc_mpi MPI 00178 CnC comes with a communication layer based on MPI. You need the 00179 Intel(R) MPI runtime to use it. You can download a free version of 00180 the MPI runtime from 00181 http://software.intel.com/en-us/articles/intel-mpi-library/ (under 00182 "Resources"). A distCnC application is launched like any other 00183 MPI application with mpirun or mpiexec, but DIST_CNC must be set 00184 to MPI: 00185 \code 00186 env DIST_CNC=MPI mpiexec -n 4 my_cnc_program 00187 \endcode 00188 Alternatively, just run the app as usually (with DIST_CNC=MPI) and 00189 control the number (n) of additionally spawned processes with 00190 CNC_MPI_SPAWN=n. If host and client applications need to be 00191 different, set CNC_MPI_EXECUTABLE to the client-program 00192 name. Here's an example: 00193 \code 00194 env DIST_CNC=MPI env CNC_MPI_SPAWN=3 env CNC_MPI_EXECUTABLE=cnc_client cnc_host 00195 \endcode 00196 It starts your host executable "cnc_host" and then spawns 3 additional 00197 processes which all execute the client executable "cnc_client". 00198 00199 \subsection dc_mic Intel Xeon Phi(TM) (MIC) 00200 for CnC a MIC process is just another process where work can be computed 00201 on. So all you need to do is 00202 - Build your application for MIC (see 00203 http://software.intel.com/en-us/articles/intel-concurrent-collections-getting-started) 00204 - Start a process with the MIC executable on each MIC card just 00205 like on a CPU. Communication and Startup is equivalent to how it 00206 works on intel64 (\ref dc_mpi and \ref dc_sockets). 00207 00208 \note Of course the normal mechanics for MIC need to be considered 00209 (like getting applications and dependent libraries to the MIC 00210 first). You'll find documentation about this on IDZ, like 00211 <A HREF="http://software.intel.com/en-us/articles/how-to-run-intel-mpi-on-xeon-phi">here</A> 00212 and/or <A HREF="http://software.intel.com/en-us/articles/using-the-intel-mpi-library-on-intel-xeon-phi-coprocessor-systems">here</A> 00213 \note We recommend starting only 2 threads per MIC-core, e.g. if your 00214 card has 60 cores, set CNC_NUM_THREADS=120 00215 \note To start different binaries with one mpirun/mpiexec command you 00216 can use a syntax like this:<br> 00217 mpirun -genv DIST_CNC=MPI -n 2 -host xeon xeonbinary : -n 1 -host mic0 -env CNC_NUM_THREADS=120 micbinary 00218 00219 00220 \section def_dist Default Distribution 00221 Step instances are distributed across clients and the host. By 00222 default, they are distributed in a round-robin fashion. Note that 00223 every process can put tags (and so prescribe new step instances). 00224 The round-robin distribution decision is made locally on each 00225 process (not globally). 00226 00227 If the same tag is put multiple times, the default scheduling 00228 might execute the multiply prescribed steps on different processes 00229 and the preserveTags attribute of tag_collections will then not 00230 have the desired effect. 00231 00232 The default scheduling is intended primarily as a development aid. 00233 your CnC application will be distribution ready with only little effort. 00234 In some cases it might lead to good performance, in other cases 00235 a sensible distribution is needed to achieve good performance. 00236 See \ref dist_tuning. 00237 00238 Next: \ref dist_tuning 00239 00240 00241 \page dist_tuning Tuning for distributed memory 00242 The CnC tuning interface provides convenient ways to control the 00243 distribution of work and data across the address spaces. The 00244 tuning interface is separate from the actual step-code and its 00245 declarative nature allows flexible and productive experiments with 00246 different distribution strategies. 00247 00248 \section dist_work Distributing the work 00249 Let's first look at the distribution of work/steps. You can specify 00250 the distribution of work (e.g. steps) across the network by providing 00251 a tuner to a step-collection (the second template argument to 00252 CnC::step_collection, see \ref tuning). Similar to other tuning 00253 features, the tuner defines the distribution plan based on the 00254 control-tags and item-tags. For a given instance (identified by the 00255 control-tag) the tuner defines the placement of the instance in the 00256 communication network. This mechanism allows a declarative definition 00257 of the distribution and keeps it separate from the actual program code 00258 - you can change the distribution without changing the actual program. 00259 00260 The method for distributing steps is called "compute_on". It takes the 00261 tag of the step and the context as arguments and has to return the 00262 process number to run the step on. The numbering of processes is 00263 similar to ranks in MPI. Running on "N" processes, the host process is 00264 "0" and the last client "N-1". 00265 00266 @code 00267 struct my_tuner : public CnC::step_tuner<> 00268 { 00269 int compute_on( const tag_type & tag, context_type & ) const { return tag % numProcs(); } 00270 }; 00271 @endcode 00272 00273 The shown tuner is derived from CnC::step_tuner. To allow a flexible 00274 and generic definition of the distribution CnC::step_tuner provides 00275 information specific for distributed memory: 00276 CnC::tuner_base::numProcs() and CnC::tuner_base::myPid(). Both return 00277 the values of the current run of your application. Using those allows 00278 defining a distribution plan which adapts to the current runtime 00279 configuration. 00280 00281 If you wonder how the necessary gets distributed - this will be 00282 covered soon. Let's first look at the computation side a bit more 00283 closely; but if you can't wait see \ref dist_data. 00284 00285 The given tuner above simply distributes the tags in a 00286 round-robin fashion by applying the modulo operator on the tag. Here's 00287 an example of how a given set of tags would be mapped to 4 processes 00288 (e.g. numProcs()==4): 00289 \verbatim 00290 1 -> 1 00291 3 -> 3 00292 4 -> 0 00293 5 -> 1 00294 10 -> 2 00295 20 -> 0 00296 31 -> 3 00297 34 -> 2 00298 \endverbatim 00299 00300 An example of such a simple tuner is \ref bs_tuner. 00301 00302 Now let's do something a little more interesting. Let's assume our tag 00303 is a pair of x and y coordinates. To distribute the work per row, we 00304 could simply do something like 00305 00306 @code 00307 struct my_tuner : public CnC::step_tuner<> 00308 { 00309 int compute_on( const tag_type & tag, context_type & ) const { return tag.y % numProcs(); } 00310 }; 00311 @endcode 00312 00313 As you see, the tuner entirely ignores the x-part of the tag. This 00314 means that all entries on a given row (identified by tag.y) gets 00315 executed on the same process. Similarly, if you want to distribute 00316 the work per column instead, you simply change it to 00317 00318 @code 00319 struct my_tuner : public CnC::step_tuner<> 00320 { 00321 int compute_on( const tag_type & tag, context_type & ) const { return tag.x % numProcs(); } 00322 }; 00323 @endcode 00324 00325 As we'll also see later, you can certainly also conditionally switch 00326 between row- and column-wise (or any other) distribution within 00327 compute_on. 00328 00329 To avoid the afore-mentioned problem of becoming globally 00330 inconsistent, you should make sure that the return value is 00331 independent of the process it is executed on. 00332 00333 CnC provides special values to make working with compute_on more 00334 convenient, more generic and more effective: 00335 CnC::COMPUTE_ON_LOCAL, CnC::COMPUTE_ON_ROUND_ROBIN, 00336 CnC::COMPUTE_ON_ALL, CnC::COMPUTE_ON_ALL_OTHERS. 00337 00338 \section dist_data Distributing the data 00339 By default, the CnC runtime will deliver data items automatically 00340 to where they are needed. In its current form, the C++ API does 00341 not express the dependencies between instances of steps and/or 00342 items. Hence, without additional information, the runtime does not 00343 know what step-instances produce and consume which 00344 item-instances. Even when the step-distribution is known 00345 automatically automatic distribution of data requires 00346 global communication. Apparently this constitutes a considerable 00347 bottleneck. The CnC tuner interface provides two ways to reduce 00348 this overhead. 00349 00350 The ideal, most flexible and most efficient approach is to map 00351 items to their consumers. It will convert the default pull-model 00352 to a push-model: whenever an item becomes produced, it will be 00353 sent only to those processes, which actually need it without any 00354 other communication/synchronization. If you can determine which 00355 steps are going to consume a given item, you can use the above 00356 compute_on to map the consumer step to the actual address 00357 spaces. This allows changing the distribution at a single place 00358 (compute_on) and the data distribution will be automatically 00359 optimized to the minimum needed data transfer. 00360 00361 The runtime evaluates the tuner provided to the item-collection 00362 when an item is put. If its method consumed_on (from 00363 CnC::item_tuner) returns anything other than CnC::CONSUMER_UNKNOWN 00364 it will send the item to the returned process id and avoid all the 00365 overhead of requesting the item when consumed. 00366 @code 00367 struct my_tuner : public CnC::item_tuner< tag_type, item_type > 00368 { 00369 int consumed_on( const tag_type & tag ) 00370 { 00371 return my_step_tuner::consumed_on( consumer_step ); 00372 } 00373 }; 00374 @endcode 00375 00376 As more than one process might consume the item, you 00377 can also return a vector of ids (instead of a single id) and the 00378 runtime will send the item to all given processes. 00379 @code 00380 struct my_tuner : public CnC::item_tuner< tag_type, item_type > 00381 { 00382 std::vector< int > consumed_on( const tag_type & tag ) 00383 { 00384 std::vector< int > consumers; 00385 foreach( consumer_step of tag ) { 00386 int _tmp = my_step_tuner::consumed_on( consumer_step ); 00387 consumers.push_back( _tmp ); 00388 } 00389 return consumers; 00390 } 00391 }; 00392 @endcode 00393 00394 Like for compute_on, CnC provides special values to facilitate and 00395 generalize the use of consumed_on: CnC::CONSUMER_UNKNOWN, 00396 CnC::CONSUMER_LOCAL, CnC::CONSUMER_ALL and 00397 CnC::CONSUMER_ALL_OTHERS. 00398 00399 Note that consumed_on can return CnC::CONSUMER_UNKOWN for some 00400 item-instances, and process rank(s) for others. 00401 00402 Sometimes the program semantics make it easier to think about the 00403 producer of an item. CnC provides a mechanism to keep the 00404 pull-model but allows declaring the owner/producer of the item. If 00405 the producer of an item is specified the CnC-runtime can 00406 significantly reduce the communication overhead because it on 00407 longer requires global communication to find the owner of the 00408 item. For this, simply define the depends-method in your 00409 step-tuner (derived from CnC::step_tuner) and provide the 00410 owning/producing process as an additional argument. 00411 00412 @code 00413 struct my_tuner : public CnC::step_tuner<> 00414 { 00415 int produced_on( const tag_type & tag ) const 00416 { 00417 return producer_known ? my_step_tuner::consumed_on( tag ) : tag % numProcs(); 00418 } 00419 }; 00420 @endcode 00421 00422 Like for consumed_on, CnC provides special values 00423 CnC::PRODUCER_UNKNOWN and CnC::PRODUCER_LOCAL to facilitate and 00424 generalize the use of produced_on. 00425 00426 The push-model consumed_on smoothly cooperates with the 00427 pull-model as long as they don't conflict. 00428 00429 \section dist_sync Keeping data and work distribution in sync 00430 For a more productive development, you might consider implementing 00431 consumed_on by thinking about which other steps (not processes) 00432 consume the item. With that knowledge you can easily use the 00433 appropriate compute_on function to determine the consuming process. 00434 The great benefit here is that you can then change compute 00435 distribution (e.g. change compute_on) and the data will automatically 00436 follow in an optimal way; data and work distribution will always be in 00437 sync. It allows experimenting with different distribution plans with 00438 much less trouble and lets you define different strategies at a single 00439 place. Here is a simple example code which lets you select different 00440 strategies at runtime. Adding a new strategy only requires extending 00441 the compute_on function: 00442 \ref bs_tuner 00443 A more complex example is this one: \ref cholesky_tuner 00444 00445 \section dist_global Using global read-only data with distCnC 00446 Many algorithms require global data that is initialized once and 00447 during computation it stays read-only (dynamic single assignment, 00448 DSA). In principle this is aligned with the CnC methodology as 00449 long as the initialization is done from the environment. The CnC 00450 API allows global DSA data through the context, e.g. you can store 00451 global data in the context, initialize it there and then use it in 00452 a read-only fashion within your step codes. 00453 00454 The internal mechanism works as follows: on remote processes the 00455 user context is default constructed and then 00456 de-serialized/un-marshaled. On the host, construction and 00457 serialization/marshaling is done in a lazy manner, e.g. not 00458 before something actually needs being transferred. This allows 00459 creating contexts on the host with non-default constructors, but 00460 it requires overloading the serialize method of the context. The 00461 actual time of transfer is not statically known, the earliest 00462 possible time is the first item- or tag-put. All changes to the 00463 context until that point will be duplicated remotely, later 00464 changes will not. 00465 00466 Here is a simple example code which uses this feature: 00467 \ref bs_tuner 00468 00469 Next: \ref non_cnc 00470 **/ 00471 00472 #ifdef _CnC_H_ALREADY_INCLUDED_ 00473 #warning dist_cnc.h included after cnc.h. Distribution capabilities will not be activated. 00474 #endif 00475 00476 #ifndef _DIST_CNC_ 00477 # define _DIST_CNC_ 00478 #endif 00479 00480 #include <cnc/internal/dist/dist_init.h> 00481 00482 namespace CnC { 00483 namespace Internal { 00484 class void_context; 00485 } 00486 00487 /// To enable remote CnC you must create one such object. The 00488 /// lifetime of the object defines the "scope" of 00489 /// distribution. Contexts created in the "scope" of the 00490 /// dist_cnc_init objects (e.g. when it exists) will get 00491 /// distributed to participating processes (see \ref dc_run). 00492 /// 00493 /// Usually, a single dist_cnc_init object is created for the 00494 /// entire lifetime of a program. e.g. the dist_cnc_init object 00495 /// is created right when entering main and (auto-)destructed when 00496 /// main terminates. In this default mode all processes other than 00497 /// the root/host process exit the program when the dist_cnc_init 00498 /// objects gets dextructed. 00499 /// 00500 /// Actually, the current implementation allows only a single 00501 /// dist_cnc_init object at a time for every process. Hence, all 00502 /// contexts on a given process are distributed in the same way. 00503 /// However, an optional parameter/flag allows allows defining the 00504 /// processes that actually "share" the dist_cnc_init object (and 00505 /// so their contexts). An optional flag/parameter is interpreted 00506 /// as a MPI_Comm to be used by the dist_cnc_init scope. This 00507 /// allows different groups of processes (defined by the 00508 /// MPI_Comm's) to work on different CnC contexts/graphs 00509 /// concurrently. If no MPI_Comm was specified (e.g. the default) 00510 /// client processes exit the program when the host dist_cnc_init 00511 /// object is destructed. If a MPI_Comm is provided they also wait 00512 /// until the host process destructs its dist_cnc_init object but 00513 /// simply returns from the constructor rather than exiting the 00514 /// program. Apparently all this only works when using the MPI 00515 /// communication infrastructure. 00516 /// 00517 /// Additionally, two modes of operation are supported: 00518 /// 1. By default, constructing a dist_cnc_init objects blocks all 00519 /// processes except the root process in the constructor. 00520 /// Hence, code after the object instantiation will be executed 00521 /// only on the host process. 00522 /// 2. If dist_env is set to true, the constructor returns on all 00523 /// processes and execution continues in a SPMD style, e.g. all 00524 /// processes continue program execution. The SPMD style mode 00525 /// allows alternating between MPI phases and CnC phases. This 00526 /// mode is currently supported only using MPI communication. 00527 /// You have to ensure that all processes fully completed their 00528 /// local context creation before putting any data into a 00529 /// context's collection. Similarly, you have to synchronize 00530 /// context-destruction. It is recommended to put a MPI_Barrier 00531 /// right after instantiating a context and just before it gets 00532 /// destructed (e.g. at the end of its scope). 00533 /// 00534 /// \note It is possible to combine SPMD mode and providing a 00535 /// MPI_Comm. You can even change the grouping in phases by 00536 /// using different MPI_Comm's at different times of the 00537 /// execution. E.g. the lifetime of a dist_cnc_object might 00538 /// be a (collective) function call. Make sure each process 00539 /// has only single dist_cnc_object alive at each point in 00540 /// time. 00541 /// 00542 /// \note All context classes ever used in the program must be 00543 /// referenced as template arguments if they should be 00544 /// distributed. 00545 /// \note All distributed contexts must have all 00546 /// collections they use as members and must be 00547 /// default-constructible. 00548 /// \note Pointers as tags are not supported by distCnC. 00549 /// 00550 /// Execution and other internal details described in 00551 /// CnC::Internal::dist_init 00552 template< class C1, class C2 = Internal::void_context, class C3 = Internal::void_context, 00553 class C4 = Internal::void_context, class C5 = Internal::void_context > 00554 struct /*CNC_API*/ dist_cnc_init : public Internal::dist_init< C1, C2, C3, C4, C5 > 00555 { 00556 dist_cnc_init() : Internal::dist_init< C1, C2, C3, C4, C5 >() {} 00557 /// \param dist_env enable SPMD-style access to contexts 00558 /// \param flag MPI_Comm to be used (MPI only) 00559 dist_cnc_init( bool dist_env, int flag = 0 ) : Internal::dist_init< C1, C2, C3, C4, C5 >( flag, dist_env ) {} 00560 /// \param dist_env enable SPMD-style access to contexts 00561 /// \param flag MPI_Comm to be used (MPI only) 00562 dist_cnc_init( int flag, bool dist_env = false ) : Internal::dist_init< C1, C2, C3, C4, C5 >( flag, dist_env ) {} 00563 }; 00564 00565 } // namespace CnC 00566 00567 #include <cnc/cnc.h> 00568 00569 #endif // __DIST_CNC__H_