CnC
|
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** // compute fibonacci numbers // #define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB #include <cnc/cnc.h> // let's use a large type to store fib numbers typedef unsigned long long fib_type; // forward declaration struct fib_context; // declaration of compute step class struct fib_step { // declaration of execute method goes here int execute( const int & tag, fib_context & c ) const; }; // this is our context containing collections and defining their depndencies struct fib_context : public CnC::context< fib_context > // derive from CnC::context { // the step collection for the instances of the compute-kernel CnC::step_collection< fib_step > m_steps; // item collection holding the fib number(s) CnC::item_collection< int, fib_type > m_fibs; // tag collection to control steps CnC::tag_collection< int > m_tags; // constructor fib_context(); }; fib_context::fib_context() : CnC::context< fib_context >(), // pass context to collection constructors m_steps( *this ), m_fibs( *this ), m_tags( *this ) { // prescribe compute steps with this (context) as argument m_tags.prescribes( m_steps, *this ); // step consumes m_fibs m_steps.consumes( m_fibs ); // step also produces m_fibs m_steps.produces( m_fibs ); } // the actual step code computing the fib numbers goes here int fib_step::execute( const int & tag, fib_context & ctxt ) const { switch( tag ) { case 0 : ctxt.m_fibs.put( tag, 0 ); break; case 1 : ctxt.m_fibs.put( tag, 1 ); break; default : // get previous 2 results fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 ); fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 ); // put our result ctxt.m_fibs.put( tag, f_1 + f_2 ); } return CnC::CNC_Success; } int main( int argc, char* argv[] ) { int n = 42; // eval command line args if( argc < 2 ) { std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl; } else n = atol( argv[1] ); // create context fib_context ctxt; // put tags to initiate evaluation for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i ); // wait for completion ctxt.wait(); // get result fib_type res2; ctxt.m_fibs.get( n, res2 ); // print result std::cout << "fib (" << n << "): " << res2 << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** // compute fibonacci numbers // #define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB // let's use a large type to store fib numbers typedef unsigned long long fib_type; #include "fib.h" // the actual step code computing the fib numbers goes here int fib_step::execute( const int & tag, fib_context & ctxt ) const { switch( tag ) { case 0 : ctxt.m_fibs.put( tag, 0 ); break; case 1 : ctxt.m_fibs.put( tag, 1 ); break; default : // get previous 2 results fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 ); fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 ); // put our result ctxt.m_fibs.put( tag, f_1 + f_2 ); } return CnC::CNC_Success; } int main( int argc, char* argv[] ) { int n = 42; // eval command line args if( argc < 2 ) { std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl; } else n = atol( argv[1] ); // create context fib_context ctxt; // enable debug output for steps CnC::debug::trace( ctxt.m_steps ); // also enable debug output for our items CnC::debug::trace( ctxt.m_fibs ); // put tags to initiate evaluation for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i ); // wait for completion ctxt.wait(); // get result fib_type res2; ctxt.m_fibs.get( n, res2 ); // print result std::cout << "fib (" << n << "): " << res2 << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifndef fib_H_ALREADY_INCLUDED #define fib_H_ALREADY_INCLUDED #include <cnc/cnc.h> #include <cnc/debug.h> // Forward declaration of the context class (also known as graph) struct fib_context; // The step classes struct fib_step { int execute( const int & t, fib_context & c ) const; }; // The context class struct fib_context : public CnC::context< fib_context > { // the step collection for the instances of the compute-kernel CnC::step_collection< fib_step > m_steps; // Item collections CnC::item_collection< int, fib_type > m_fibs; // Tag collections CnC::tag_collection< int > m_tags; // The context class constructor fib_context() : CnC::context< fib_context >(), // Initialize each step collection m_steps( *this, "fib_step" ), // Initialize each tag collection m_fibs( *this, "fibs" ), // Initialize each item collection m_tags( *this, "tags" ) { // prescribe compute steps with this (context) as argument m_tags.prescribes( m_steps, *this ); // step consumes m_fibs m_steps.consumes( m_fibs ); // step also produces m_fibs m_steps.produces( m_fibs ); } }; #endif // fib_H_ALREADY_INCLUDED
//******************************************************************************** // Copyright (c) 2010-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** // compute fibonacci numbers // #define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB // let's use a large type to store fib numbers typedef unsigned long long fib_type; #include "fib.h" // the actual step code computing the fib numbers goes here int fib_step::execute( const int & tag, fib_context & ctxt ) const { switch( tag ) { case 0 : ctxt.m_fibs.put( tag, 0 ); break; case 1 : ctxt.m_fibs.put( tag, 1 ); break; default : // get previous 2 results fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 ); fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 ); // put our result ctxt.m_fibs.put( tag, f_1 + f_2 ); } return CnC::CNC_Success; } int main( int argc, char* argv[] ) { int n = 42; // eval command line args if( argc < 2 ) { std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl; } else n = atol( argv[1] ); // create context fib_context ctxt; // show scheduler statistics when done CnC::debug::collect_scheduler_statistics( ctxt ); // put tags to initiate evaluation for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i ); // wait for completion ctxt.wait(); // get result fib_type res2; ctxt.m_fibs.get( n, res2 ); // print result std::cout << "fib (" << n << "): " << res2 << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifndef fib_H_ALREADY_INCLUDED #define fib_H_ALREADY_INCLUDED #include <cnc/cnc.h> #include <cnc/debug.h> // Forward declaration of the context class (also known as graph) struct fib_context; // The step classes struct fib_step { int execute( const int & t, fib_context & c ) const; }; // The context class struct fib_context : public CnC::context< fib_context > { // the step collection for the instances of the compute-kernel CnC::step_collection< fib_step > m_steps; // Item collections CnC::item_collection< int, fib_type > m_fibs; // Tag collections CnC::tag_collection< int > m_tags; // The context class constructor fib_context() : CnC::context< fib_context >(), // Initialize each step collection m_steps( *this, "fib_step" ), // Initialize each tag collection m_fibs( *this, "fibs" ), // Initialize each item collection m_tags( *this, "tags" ) { // prescribe compute steps with this (context) as argument m_tags.prescribes( m_steps, *this ); // step consumes m_fibs m_steps.consumes( m_fibs ); // step also produces m_fibs m_steps.produces( m_fibs ); } }; #endif // fib_H_ALREADY_INCLUDED
//******************************************************************************** // Copyright (c) 2010-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** // compute fibonacci numbers // #define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB // let's use a large type to store fib numbers typedef unsigned long long fib_type; #include "cnc/cnc.h" struct fib_context; // let's use a tuner to pre-declare dependencies struct fib_tuner : public CnC::step_tuner<> { template< class dependency_consumer > void depends( const int & tag, fib_context & c, dependency_consumer & dC ) const; }; #include "fib.h" template< class dependency_consumer > void fib_tuner::depends( const int & tag, fib_context & c, dependency_consumer & dC ) const { // we have item-dependencies only if tag > 1 if( tag > 1 ) { dC.depends( c.m_fibs, tag - 1 ); dC.depends( c.m_fibs, tag - 2 ); } } // the actual step code computing the fib numbers goes here int fib_step::execute( const int & tag, fib_context & ctxt ) const { switch( tag ) { case 0 : ctxt.m_fibs.put( tag, 0 ); break; case 1 : ctxt.m_fibs.put( tag, 1 ); break; default : // get previous 2 results fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 ); fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 ); // put our result ctxt.m_fibs.put( tag, f_1 + f_2 ); } return CnC::CNC_Success; } int main( int argc, char* argv[] ) { int n = 42; // eval command line args if( argc < 2 ) { std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl; } else n = atol( argv[1] ); // create context fib_context ctxt; // show scheduler statistics when done CnC::debug::collect_scheduler_statistics( ctxt ); // put tags to initiate evaluation for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i ); // wait for completion ctxt.wait(); // get result fib_type res2; ctxt.m_fibs.get( n, res2 ); // print result std::cout << "fib (" << n << "): " << res2 << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** // compute fibonacci numbers // #define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB // let's use a large type to store fib numbers typedef unsigned long long fib_type; #include "cnc/cnc.h" struct fib_context; // let's use a tuner to pre-declare dependencies struct fib_tuner : public CnC::step_tuner<> { bool preschedule() const { return true; } }; #include "fib.h" // the actual step code computing the fib numbers goes here int fib_step::execute( const int & tag, fib_context & ctxt ) const { switch( tag ) { case 0 : ctxt.m_fibs.put( tag, 0 ); break; case 1 : ctxt.m_fibs.put( tag, 1 ); break; default : // get previous 2 results fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 ); fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 ); // ctxt.flush_gets(); // uncomment this line to prohibit pre-scheduling from completing full step-execution // put our result ctxt.m_fibs.put( tag, f_1 + f_2 ); } return CnC::CNC_Success; } int main( int argc, char* argv[] ) { int n = 42; // eval command line args if( argc < 2 ) { std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl; } else n = atol( argv[1] ); // create context fib_context ctxt; // show scheduler statistics when done CnC::debug::collect_scheduler_statistics( ctxt ); // put tags to initiate evaluation for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i ); // wait for completion ctxt.wait(); // get result fib_type res2; ctxt.m_fibs.get( n, res2 ); // print result std::cout << "fib (" << n << "): " << res2 << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2010-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** // compute fibonacci numbers // #define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB #include <cnc/cnc.h> // let's use a large type to store fib numbers typedef unsigned long long fib_type; struct fib_context; // let's use a tuner to pre-declare dependencies struct fib_tuner : public CnC::step_tuner<> { // pre-declare data-dependencies template< class dependency_consumer > void depends( const int & tag, fib_context & c, dependency_consumer & dC ) const; }; struct item_tuner : public CnC::hashmap_tuner { // provide number gets to each item int get_count( const int & tag ) const; }; #include "fib.h" template< class dependency_consumer > void fib_tuner::depends( const int & tag, fib_context & c, dependency_consumer & dC ) const { // we have item-dependencies only if tag > 1 if( tag > 1 ) { dC.depends( c.m_fibs, tag - 1 ); dC.depends( c.m_fibs, tag - 2 ); } } int item_tuner::get_count( const int & tag ) const { return tag > 0 ? 2 : 1; } // the actual step code computing the fib numbers goes here int fib_step::execute( const int & tag, fib_context & ctxt ) const { switch( tag ) { case 0 : ctxt.m_fibs.put( tag, 0 ); break; case 1 : ctxt.m_fibs.put( tag, 1 ); break; default : // get previous 2 results fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 ); fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 ); // put our result, and specify that this data item will be // get'ed twice (i.e. for the next two fib calculations) // before it is destroyed ctxt.m_fibs.put( tag, f_1 + f_2 ); } return CnC::CNC_Success; } int main( int argc, char* argv[] ) { int n = 42; // eval command line args if( argc < 2 ) { std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl; } else n = atol( argv[1] ); // create context fib_context ctxt; // show scheduler statistics when done CnC::debug::collect_scheduler_statistics( ctxt ); // put tags to initiate evaluation for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i ); // wait for completion ctxt.wait(); // get result fib_type res2; ctxt.m_fibs.get( n, res2 ); // print result std::cout << "fib (" << n << "): " << res2 << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifndef fib_H_ALREADY_INCLUDED #define fib_H_ALREADY_INCLUDED #include <cnc/cnc.h> #include <cnc/debug.h> // Forward declaration of the context class (also known as graph) struct fib_context; // The step classes struct fib_step { int execute( const int & t, fib_context & c ) const; }; // The context class struct fib_context : public CnC::context< fib_context > { // the step collection for the instances of the compute-kernel CnC::step_collection< fib_step, fib_tuner > m_steps; // Item collections CnC::item_collection< int, fib_type, item_tuner > m_fibs; // Tag collections CnC::tag_collection< int > m_tags; // The context class constructor fib_context() : CnC::context< fib_context >(), // Initialize each step collection m_steps( *this, "fib_step" ), // Initialize each tag collection m_fibs( *this, "fibs" ), // Initialize each item collection m_tags( *this, "tags" ) { // prescribe compute steps with this (context) as argument m_tags.prescribes( m_steps, *this ); // step consumes m_fibs m_steps.consumes( m_fibs ); // step also produces m_fibs m_steps.produces( m_fibs ); } }; #endif // fib_H_ALREADY_INCLUDED
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** // #include <stdio.h> #include <stdlib.h> #include <string.h> #include <tbb/tick_count.h> #ifdef _DIST_ #include <cnc/dist_cnc.h> #include <cnc/internal/dist/distributor.h> #else #include <cnc/cnc.h> #endif struct my_context; struct FindPrimes { int execute( int n, my_context & c ) const; }; struct primes_tuner : public CnC::step_tuner<> { int compute_on( const int & p, my_context & ) const { return ( 1 + p / ( 1000000 / 20 ) ) % numProcs();//( p * 5 / 3 + 1 ) % 5; } }; struct my_context : public CnC::context< my_context > { CnC::step_collection< FindPrimes, primes_tuner > m_steps; CnC::tag_collection< int, CnC::tag_tuner< CnC::Internal::strided_range< int > > > m_tags; CnC::item_collection< int,int > m_primes; my_context() : CnC::context< my_context >(), m_steps( *this ), m_tags( *this ), m_primes( *this ) { m_tags.prescribes( m_steps, *this ); } }; int FindPrimes::execute( int n, my_context & c ) const { int factor = 3; while ( n % factor ) factor += 2; if (factor == n) c.m_primes.put(n, n); return CnC::CNC_Success; } int main(int argc, char* argv[]) { #ifdef _DIST_ CnC::dist_cnc_init< my_context > dc_init; #endif bool verbose = false; int n = 0; int number_of_primes = 0; if (argc == 2) { n = atoi(argv[1]); } else if (argc == 3 && 0 == strcmp("-v", argv[1])) { n = atoi(argv[2]); verbose = true; } else { fprintf(stderr,"Usage: primes [-v] n\n"); return -1; } my_context c; printf("Determining primes from 1-%d \n",n); tbb::tick_count t0 = tbb::tick_count::now(); c.m_tags.put_range( CnC::Internal::strided_range< int >( 3, n, 2 ) ); c.wait(); tbb::tick_count t1 = tbb::tick_count::now(); // FIXME we have to transfer the items to the host first (distCnC) number_of_primes = (int)c.m_primes.size() + 1; printf("Found %d primes in %g seconds\n", number_of_primes, (t1-t0).seconds()); if (verbose) { printf("%d\n", 2); CnC::item_collection<int,int>::const_iterator cii; for (cii = c.m_primes.begin(); cii != c.m_primes.end(); cii++) { printf("%d\n", cii->first); // kludge } } }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** // #include <stdio.h> #include <stdlib.h> #include <string.h> #include <tbb/tick_count.h> #include <cnc/cnc.h> #include <cnc/debug.h> struct my_context; struct FindPrimes { int operator()( int n ) const; }; struct my_context : public CnC::context< my_context > { CnC::item_collection< int,int > m_primes; my_context() : CnC::context< my_context >(), m_primes( *this ) { CnC::debug::collect_scheduler_statistics( *this ); } }; my_context g_c; int FindPrimes::operator()( int n ) const { int factor = 3; while ( n % factor ) factor += 2; if (factor == n) g_c.m_primes.put(n, n); return CnC::CNC_Success; } int main(int argc, char* argv[]) { bool verbose = false; int n = 0; int number_of_primes = 0; if (argc == 2) { n = atoi(argv[1]); } else if (argc == 3 && 0 == strcmp("-v", argv[1])) { n = atoi(argv[2]); verbose = true; } else { fprintf(stderr,"Usage: primes [-v] n\n"); return -1; } printf("Determining primes from 1-%d \n",n); tbb::tick_count t0 = tbb::tick_count::now(); CnC::parallel_for( 3, n+1, 2, FindPrimes(), CnC::pfor_tuner< false >() ); tbb::tick_count t1 = tbb::tick_count::now(); number_of_primes = (int)g_c.m_primes.size() + 1; printf("Found %d primes in %g seconds\n", number_of_primes, (t1-t0).seconds()); if (verbose) { printf("%d\n", 2); for (CnC::item_collection<int,int>::const_iterator cii = g_c.m_primes.begin(); cii != g_c.m_primes.end(); cii++) { printf("%d\n", cii->first); // kludge } } }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifndef blackscholes_H_ALREADY_INCLUDED #define blackscholes_H_ALREADY_INCLUDED #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <vector> #include <memory> typedef std::shared_ptr< std::vector< OptionData > > option_vector_type; typedef std::shared_ptr< std::vector< fptype > > price_vector_type; CNC_BITWISE_SERIALIZABLE( OptionData ); // Forward declaration of the context class (also known as graph) struct blackscholes_context; // The step classes struct Compute { int execute( const int &, blackscholes_context & ) const; }; struct bs_tuner : public CnC::step_tuner<>, public CnC::vector_tuner { int compute_on( const int tag ) const { return tag % numProcs(); } int compute_on( const int tag, blackscholes_context & ) const { return compute_on( tag ); } int consumed_by( const int tag ) const { return tag; } int consumed_on( const int tag ) const { return compute_on( consumed_by( tag ) ); } int getcount( const int tag ) { return 1; } }; // The context class struct blackscholes_context : public CnC::context< blackscholes_context > { // Step collections CnC::step_collection< Compute, bs_tuner > compute; // Item collections CnC::item_collection< int, option_vector_type, bs_tuner > opt_data; CnC::item_collection< int, price_vector_type, bs_tuner > prices; // Tag collections CnC::tag_collection< int > tags; int m_vs; // The context class constructor blackscholes_context( int vs = 0) : CnC::context< blackscholes_context >(), // Initialize each step collection compute( *this ), // Initialize each item collection opt_data( *this ), prices( *this ), // Initialize each tag collection tags( *this ), m_vs( vs ) { // Prescriptive relations tags.prescribes( compute, *this ); compute.produces( prices ); opt_data.set_max( m_vs ); prices.set_max( m_vs ); } #ifdef _DIST_ void serialize( CnC::serializer & ser ) { ser & m_vs; if( ser.is_unpacking() ) { opt_data.set_max( m_vs ); prices.set_max( m_vs ); } } #endif }; #endif // blackscholes_H_ALREADY_INCLUDED
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifndef cholesky_H_ALREADY_INCLUDED #define cholesky_H_ALREADY_INCLUDED #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <memory> // Forward declaration of the context class (also known as graph) struct cholesky_context; // The step classes struct S1_compute { int execute( const int & t, cholesky_context & c ) const; }; struct S2_compute { int execute( const pair & t, cholesky_context & c ) const; }; struct S3_compute { int execute( const triple & t, cholesky_context & c ) const; }; /////////////// static void mark( int p, bool * d ) { if( d[p] == false ) { d[p] = true; } } struct cholesky_tuner : public CnC::step_tuner<>, public CnC::hashmap_tuner { cholesky_tuner( cholesky_context & c, int p = 0, int n = 0, dist_type dt = BLOCKED_ROWS ) : m_context( c ), m_p( p ), m_n( n ), m_div( (((p*p) / 2) + 1) / numProcs() ), // m_div( p ? (((p*(p+1))/2)+((2*numProcs())/2))/(2*numProcs()) : 0 ), m_dt( dt ) { if( myPid() == 0 ) { switch( dt ) { default: case BLOCKED_ROWS : std::cerr << "Distributing BLOCKED_ROWS\n"; break; case ROW_CYCLIC : std::cerr << "Distributing ROW_CYCLIC\n"; break; case COLUMN_CYCLIC : std::cerr << "Distributing COLUMN_CYCLIC\n"; break; case BLOCKED_CYCLIC : std::cerr << "Distributing BLOCKED_CYCLICS\n"; break; } } } inline static int compute_on( const dist_type dt, const int i, const int j, const int n, const int s ) { switch( dt ) { default: case BLOCKED_ROWS : return ( ((j*j)/2 + 1 + i ) / s ) % numProcs(); break; case ROW_CYCLIC : return j % numProcs(); break; case COLUMN_CYCLIC : return i % numProcs(); break; case BLOCKED_CYCLIC : return ( (i/2) * n + (j/2) ) % numProcs(); break; } } // step-bits int compute_on( const int tag, cholesky_context & /*arg*/ ) const { return compute_on( m_dt, tag, tag, m_n, m_div ); } int compute_on( const pair & tag, cholesky_context & /*arg*/ ) const { return compute_on( m_dt, tag.first, tag.second, m_p, m_div ); } int compute_on( const triple & tag, cholesky_context & /*arg*/ ) const { return compute_on( m_dt, tag[2], tag[1], m_p, m_div ); } // item-bits typedef triple tag_type; int get_count( const tag_type & tag ) const { int _k = tag[0], _i = tag[2]; if( _k == _i+1 ) return CnC::NO_GETCOUNT; // that's our result return ( _k > 0 && _k > _i ) ? ( m_p - _k ) : 1; } // First we determine which *steps* are going to consume this item. // Then we use compute_on to determine the *processes* to which the item needs to go. // Mostly the two steps are formulated in one line. // We avoid duplicate entries by using a helper mechanism "mark" std::vector< int > consumed_on( const tag_type & tag ) const { int _k = tag[0], _j = tag[1], _i = tag[2]; if( _i == _j ) { // on diagonal if( _i == _k ) return std::vector< int >( 1, compute_on( _k, m_context ) ); // S1 only if( _k == m_p ) return std::vector< int >( 1, 0 ); // the end } if( _i == _k ) return std::vector< int >( 1, compute_on( pair( _k, _j ), m_context ) ); // S2 only bool * _d; _d = new bool[numProcs()]; memset( _d, 0, numProcs() * sizeof( *_d ) ); if( _i == _k-1 ) { if( _i == _j ) { // on diagonal on S2 for( int j = _k; j < m_p; ++j ) { mark( compute_on( pair( _k - 1, j ), m_context ), _d ); } } else { // S3 for( int j = _j; j < m_p; ++j ) { for( int i = _k; i <= _j; ++i ) { mark( compute_on( triple( _k-1, j, i ), m_context ), _d ); } } } } mark( compute_on( triple( _k, _j, _i ), m_context ), _d ); std::vector< int > _v; _v.reserve( numProcs()/2 ); if( _d[myPid()] ) _v.push_back( myPid() ); for( int i = 0; i < numProcs(); ++i ) { if( _d[i] && i != myPid() ) _v.push_back( i ); } delete [] _d; return _v; } #ifdef _DIST_ void serialize( CnC::serializer & ser ) { ser & m_p & m_div & m_n & m_dt; } #endif private: cholesky_context & m_context; int m_p; int m_n; int m_div; dist_type m_dt; }; // The context class struct cholesky_context : public CnC::context< cholesky_context > { // tuners cholesky_tuner tuner; // Step Collections CnC::step_collection< S1_compute, cholesky_tuner > sc_s1_compute; CnC::step_collection< S2_compute, cholesky_tuner > sc_s2_compute; CnC::step_collection< S3_compute, cholesky_tuner > sc_s3_compute; // Item collections CnC::item_collection< triple, tile_const_ptr_type, cholesky_tuner > Lkji; int p,b; // Tag collections CnC::tag_collection< int > control_S1; CnC::tag_collection< pair > control_S2; CnC::tag_collection< triple > control_S3; // The context class constructor cholesky_context( int _b = 0, int _p = 0, int _n = 0, dist_type dt = BLOCKED_ROWS ) : CnC::context< cholesky_context >(), // init tuners tuner( *this, _p, _n, dt ), // init step colls sc_s1_compute( *this, tuner, "Cholesky" ), sc_s2_compute( *this, tuner, "Trisolve" ), sc_s3_compute( *this, tuner, "Update" ), // Initialize each item collection Lkji( *this, "Lkji", tuner ), p( _p ), b( _b ), // Initialize each tag collection control_S1( *this, "S1" ), control_S2( *this, "S2" ), control_S3( *this, "S3" ) { // Prescriptive relations control_S1.prescribes( sc_s1_compute, *this ); control_S2.prescribes( sc_s2_compute, *this ); control_S3.prescribes( sc_s3_compute, *this ); // producer/consumer relations sc_s1_compute.consumes( Lkji ); sc_s1_compute.produces( Lkji ); sc_s2_compute.consumes( Lkji ); sc_s2_compute.produces( Lkji ); sc_s3_compute.consumes( Lkji ); sc_s3_compute.produces( Lkji ); #if 0 CnC::debug::trace_all( *this ); #endif } #ifdef _DIST_ void serialize( CnC::serializer & ser ) { ser & p & b & tuner; } #endif }; #endif // cholesky_H_ALREADY_INCLUDED
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <cnc/reduce.h> #include <cassert> #include <string> #include <sstream> #include <fstream> // Count the number of occurances of given strings in a file. An // example of using a pre-defined CnC::graph, e.g. a reduction. // // The file is read line by line (on the host). A computation unit is // tagged by the pair (line-id/block, search-string). Such a // computation gets the line/block to search in. It looks for the // string and puts the number of matches into a collection. The // collection is part of a reduction. // // The input to the reduction is counts_per_block with the matched // string as key/above pair as its tag. The selector (a lambda!) maps // the input-pair to the string that is matched. The reduction also // takes "counts" which provides the number of participating elements // per reduction (for each string one reduction!). The reduction's // output is the overall count per string. // // This is not optimized in any way. To make this fast, we need to // adjust the block size and read in parallel. Additionally, // std::strings are used in a very memory-unfriendly and // copy-intensive manner. // // On distributed memory an intelligent distribution plan // would probably be helpful. typedef std::pair< int, std::string > tag_type; struct cw_context; struct counter { int execute( const tag_type & t, cw_context & ctxt ) const; }; struct cw_context : public CnC::context< cw_context > { // the prescribing tags CnC::tag_collection< tag_type > tags; // holds the lines/blocks of the file(s) CnC::item_collection< int, std::string > blocks; // for each pair(block,string) we put the number of occurances of string found in block CnC::item_collection< tag_type, size_type > counts_per_block; // the final number of occurances per string CnC::item_collection< std::string, size_type > counts; // the number of items (e.g. counts per string) participating in a reduction CnC::item_collection< std::string, size_type > red_counts; CnC::step_collection< counter > steps; // our reduction is provided as a graph, see constructor for its instantiation&wiring CnC::graph * reduce; cw_context() : tags( *this, "tags" ), blocks( *this, "blocks" ), counts_per_block( *this, "counts_per_block" ), counts( *this, "counts" ), red_counts( *this, "red_counts" ), steps( *this, "counter" ), reduce( NULL ) { // here we wire the graph/reduction reduce = CnC::make_reduce_graph( *this, // context "reduce", // name counts_per_block, // input collection red_counts, // number of items per reduction counts, // the final result for each reduction std::plus<size_type>(), // the reduction operation size_type(0), // identity element // we use a lambda as the selector // it maps the item to the reduction identified by t.second (the string) // e.g. it reduces over all blocks []( const tag_type & t, std::string & _s )->bool{_s=t.second;return true;} ); tags.prescribes( steps, *this ); steps.consumes( blocks ); steps.produces( counts_per_block ); //CnC::debug::trace( *reduce, 3 ); } ~cw_context() { delete reduce; } }; // get block/line (tag.first) and determine number of occurances of string (tag.second) // put result into counts_per_block int counter::execute( const tag_type & t, cw_context & ctxt ) const { std::string _str; ctxt.blocks.get( t.first, _str ); const size_type _sz = _str.size(); size_type _pos = -1; size_type _cnt = 0; while( ( _pos = _str.find( t.second, _pos+1 ) ) < _sz ) ++_cnt; // always to results, even if 0 // this allows us to determine the number of reduced items in main ctxt.counts_per_block.put( t, _cnt ); return 0; } int main( int argc, char * argv[]) { #ifdef _DIST_ CnC::dist_cnc_init< cw_context > _dc; #endif if( argc < 3 ) { std::cerr << "expected arguments: <file> <word1> [<word2>...]\n"; exit(1); } // create the context cw_context ctxt; std::ifstream _file( argv[1] ); std::string _block; int _id = 0; // read line line while( std::getline( _file, _block ) ) { ctxt.blocks.put( _id, _block ); int i = 1; // for each line, put control for each searched string while( ++i < argc ) { ctxt.tags.put( std::make_pair( _id, argv[i] ) ); } ++_id; } // reduction needs number of reduced items // as we always put a number in the step, we know its #blocks // _id holds #blocks int i = 1; while( ++i < argc ) ctxt.red_counts.put( argv[i], _id ); ctxt.wait(); std::cout << "done" << std::endl; std::cout << ctxt.counts.size() << " " << ctxt.blocks.size() << std::endl; // iterate and write out all keys/value pairs for( auto i = ctxt.counts.begin(); i != ctxt.counts.end(); ++i ) { std::cout << i->first << " \t " << *i->second << std::endl; } ctxt.wait(); std::cout << ctxt.counts.size() << " " << ctxt.blocks.size() << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <cnc/reduce.h> #include <cassert> #include <string> #include <sstream> #include <algorithm> #include <fstream> // Count the number of occurances of all words in a file. An example // of using a pre-defined CnC::graph, e.g. a reduction. // // The file is read line by line (on the host). A computation unit is // tagged by the line-id/block. Such a computation gets the // line/block to search in. It reads word by word and puts an item // into an item-collection "counts_per_block". To make the tag unique, // its a triple (block/line, word#, word) [case-insensitive]. The // data itself is just a dummy. The collection is part of a reduction // which will finally compute the number of occurances per word. // // The input to the reduction is counts_per_block. The selector (a // lambda!) maps the input-triple to the word that is matched. The // reduction also takes "counts". As we don't know which words we will // find and count, there is no way to know the number of participating // items per reduction. Instead we have to indicate when all lines // have been processed. After a first call to wait, we know no more // computation is done, we then call reduction::flush to indicate that // no more input arrives. A second wait() makes sure that the // reduction completes before we access the result. The reduction's // output is the overall count for ever word in the file. // // As an opimization, we assign a get-count of 0 to all words we put. // In this example the words are used for nothing else than the // reduction. With a get-couhjnt of 0 the items will be passed to the // reduction but never really stored. // // This is not optimized in any way. To make this fast, we need to // adjust the block size and read in parallel. Additionally, // std::strings are used in a very memory-unfriendly and // copy-intensive manner. // // On distributed memory an intelligent distribution plan would // probably be helpful. typedef long long int id_type; typedef std::pair< id_type, const std::string > tag_type; struct cw_context; struct counter { int execute( const int & t, cw_context & ctxt ) const; }; struct gc0_tuner : public CnC::hashmap_tuner { template< typename Tag > int get_count( const Tag & ) const { return 0; } }; struct cw_context : public CnC::context< cw_context > { // prescribes our computation CnC::tag_collection< int > tags; // one input line per item CnC::item_collection< int, std::string > blocks; // every word instance we find we put in here with a unique tag CnC::item_collection< tag_type, size_type, gc0_tuner > counts_per_block; // we don't really need this, we use flush CnC::item_collection< std::string, int > cnt; // the resulting count per word CnC::item_collection< std::string, size_type > counts; // extracing words CnC::step_collection< counter > steps; // the reduction sub-graph CnC::graph * reduce; cw_context() : tags( *this, "tags" ), blocks( *this, "blocks" ), counts_per_block( *this, "counts_per_block" ), cnt( *this, "cnt" ), counts( *this, "counts" ), steps( *this, "counter" ), reduce( NULL ) { reduce = CnC::make_reduce_graph( *this, "reduce", counts_per_block, //input collection cnt, // counts per reduce, not used counts, // result std::plus<size_type>(), // reduce operation size_type(0), // identity element // we use a lambda as the selector // it maps the item to the reduction identified by t.second (the string) // e.g. it reduces over all blocks []( const tag_type & t, std::string& _s )->bool{_s=t.second;return true;} ); tags.prescribes( steps, *this ); steps.consumes( blocks ); steps.produces( counts_per_block ); // CnC::debug::trace_all( *this ); //CnC::debug::trace( steps ); // CnC::debug::trace( *reduce, 2 ); } ~cw_context() { delete reduce; } }; // get the line, read word by word and put each instance // with unique tag into counts_per_block, which is the // input collection of the reduction int counter::execute( const int & t, cw_context & ctxt ) const { std::string _str; ctxt.blocks.get( t, _str ); std::istringstream iss( _str ); std::string _word; id_type _id = (t << 16); while( iss >> _word ) { // let's a do a case-insensitive matching std::transform(_word.begin(), _word.end(), _word.begin(), ::tolower); ctxt.counts_per_block.put( tag_type(_id, _word), 1 ); ++_id; } return 0; } int main( int argc, char * argv[]) { #ifdef _DIST_ CnC::dist_cnc_init< cw_context > _dc; #endif if( argc < 2 ) { std::cerr << "expected arguments: <file>\n"; exit(1); } // create the context cw_context ctxt; std::ifstream _file( argv[1] ); std::string _block; int _id = 0; tbb::tick_count startTime = tbb::tick_count::now(); // read line line while( std::getline( _file, _block ) ) { // put each line ctxt.blocks.put( _id, _block ); // and control to start searching/matching ctxt.tags.put( _id ); ++_id; } // A reduction needs number of reduced items. // We do not know the count in advance. // We do not even know which words are going to show up. // We have to wait until processing is done // and then tell the reduction it can do the final pass ctxt.wait(); ctxt.reduce->flush(); ctxt.wait(); tbb::tick_count endTime = tbb::tick_count::now(); std::cout << ctxt.counts.size() << std::endl; // iterate and write out all keys/value pairs with counts > 999 for( auto i = ctxt.counts.begin(); i != ctxt.counts.end(); ++i ) { if( *i->second > 999 ) std::cout << i->first << " \t " << *i->second << std::endl; } std::cout << "Time: " << (endTime-startTime).seconds() << "s.\n"; return 0; }
// ******************************************************************************* // Copyright (c) 2013-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** // ******************************************************************************* #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <cnc/reduce.h> #include <cassert> #include <string> #include <sstream> #include <algorithm> #include <fstream> #include <cwctype> /** Map-Reduce for CnC. The mapreduce_context provides a functionality similar to hadoop/mapreduce. At instantiation time it accepts the map- and reduce operations. Input data is accepted by putting files into its read_data collection. A call to wait() makes sure all mapreduce operations are done. Output data is stored in the "results" collection and can then be accessed through get-calls or iterators. Like in common mapreduce frameworks the map operation works on streams - not on CnC entities. Also like in mapreduce frameworks the result of the map is stored in a collection (of course here we use a CnC item collection). It supports an easy way to distribute computation on distributed memory. By appending':<processid>' to the input filenames it will execute the corresponding map operation on the given process '<processid>'. The reduction part is automatically distributed. Internally we use a step for reading files, a step for mapping and a reduction graph for the reduce part. \todo write a driver for HPFS which provides the distribution information (appending':<processid>') \todo what about making better use of blocks (not entire files)? \todo what about making this a CnC::graph? **/ template< typename MapOp, typename ReduceOp > struct mapreduce_context : public CnC::context< mapreduce_context< MapOp, ReduceOp > > { typedef std::pair< const std::string, long long int > map_tag_type; typedef typename MapOp::result_type result_type; typedef typename MapOp::key_type key_type; typedef std::string char_ptr_type; // the step which executes the map operation struct mapper { int execute( const map_tag_type & t, mapreduce_context< MapOp, ReduceOp > & ctxt ) const; }; // the step which reads individual files struct reader { int execute( const std::string & file, mapreduce_context< MapOp, ReduceOp > & ctxt ) const; }; // a generic tuner providing constant get-count // and executes locally (e.g. whereever the step gets prescribed) template< int GC > struct gc_tuner : public CnC::hashmap_tuner { template< typename Tag > int get_count( const Tag & ) const { return GC; } template< typename Tag > int consumed_on( const Tag & ) const { return CnC::CONSUMER_LOCAL; } template< typename Tag > int produced_on( const Tag & ) const { return CnC::PRODUCER_LOCAL; } }; // tuner for reading files, distributes file-reads struct read_tuner : public CnC::step_tuner< false > { // we analyze the input string and search for ':<pid>' prefix. // if found, the corresponding step will be executed on process <pid>. // Otherwise we distribute randomly. int compute_on( const std::string & file, mapreduce_context< MapOp, ReduceOp > & ) const { auto _col = file.rfind( ':' ); if( _col == std::string::npos ) { cnc_hash< std::string > _h; int _x = _h( file ) % CnC::tuner_base::numProcs(); return _x; } // manually convert to int (faster) const char * _p = file.c_str() + _col + 1; int _x = 0; while( *_p >= '0' && *_p <= '9' ) { _x = ( _x * 10 ) + ( *_p - '0' ); ++_p; } return _x % CnC::tuner_base::numProcs(); } }; // the mapping stays local struct map_tuner : public CnC::step_tuner< false > { int compute_on( const map_tag_type &, mapreduce_context< MapOp, ReduceOp > & ) const { return CnC::COMPUTE_ON_LOCAL; } }; // prescribes reading files CnC::tag_collection< std::string > read_tags = { *this, "read_tags" }; // need recent g++ or icpc // prescribes our computation CnC::tag_collection< map_tag_type > map_tags = { *this, "map_tags" }; // need recent g++ or icpc // one input line per item CnC::item_collection< map_tag_type, char_ptr_type, gc_tuner< 1 > > blocks = { *this, "blocks" }; // need recent g++ or icpc // every word instance we find we put in here with a unique tag CnC::item_collection< std::string, result_type, gc_tuner< 0 > > map_out = { *this, "map_out" }; // need recent g++ or icpc // we don't really need this, we use flush CnC::item_collection< key_type, int > cnt = { *this, "cnt" }; // need recent g++ or icpc // the results CnC::item_collection< key_type, result_type/*, OTuner*/ > results = { *this, "results" }; // need recent g++ or icpc // reading files CnC::step_collection< reader, read_tuner > read_steps = { *this, "reader" }; // need recent g++ or icpc // extracing words CnC::step_collection< mapper, map_tuner > map_steps = { *this, "mapper" }; // need recent g++ or icpc // the reduction sub-graph CnC::graph * reduce; // our private instance/copy of the map operation MapOp m_mapOp; // our private instance/copy of the reduce operation ReduceOp m_reduceOp; // we need a default constructor on distributed memory // the combination with serialize must match what the parametrized constructor does. mapreduce_context() : reduce( NULL ), m_mapOp(), m_reduceOp() { read_tags.prescribes( read_steps, *this ); map_tags.prescribes( map_steps, *this ); read_steps.produces( blocks ); map_steps.consumes( blocks ); map_steps.produces( map_out ); } // this is the cosntructor the user actually sees/uses: // it provides the map- and reduce operations mapreduce_context( const MapOp & mo, const ReduceOp & ro ) : mapreduce_context< MapOp, ReduceOp >() { m_mapOp = mo; m_reduceOp = ro; init_reduce(); } // initing the reduce graph // called by serialize (unpack) and the programmer-used constructor void init_reduce() { delete reduce; reduce = CnC::make_reduce_graph( *this, "reduce", map_out, //input collection cnt, // counts per reduce, not used results, // result m_reduceOp, // reduce operation size_type(0), // identity element // we use a lambda as the selector // it maps the item to the reduction identified by t.second (the string) // e.g. it reduces over all blocks []( const std::string & t, std::string& _s )->bool{_s=t;return true;} ); } ~mapreduce_context() { delete reduce; } // we overwrite the normal wait to make it even easier // in some cases this might not be desired, but we don't care for now. void wait() { // A reduction needs number of reduced items. // We do not know the count in advance. // We do not even know which words are going to show up. // We have to wait until processing is done // and then tell the reduction it can do the final pass. CnC::context< mapreduce_context< MapOp, ReduceOp > >::wait(); reduce->flush(); CnC::context< mapreduce_context< MapOp, ReduceOp > >::wait(); } #ifdef _DIST_ void serialize( CnC::serializer & ser ) { ser & m_mapOp & m_reduceOp; if( ser.is_unpacking() ) init_reduce(); } #endif }; // function templates are easier to use the template classes // let's define a function which returns the right context type by // just providing the operations. template< typename MapOp, typename ReduceOp > mapreduce_context< MapOp, ReduceOp > * make_mapreduce( const MapOp & mo, const ReduceOp & ro ) { return new mapreduce_context< MapOp, ReduceOp >( mo, ro ); } // get the line, read word by word and put each instance // with unique tag into map_out, which is the // input collection of the reduction template< typename MapOp, typename ReduceOp > int mapreduce_context< MapOp, ReduceOp >::mapper::execute( const map_tag_type & t, mapreduce_context< MapOp, ReduceOp > & ctxt ) const { char_ptr_type _str; ctxt.blocks.get( t, _str ); std::istringstream iss( _str ); ctxt.m_mapOp( iss, ctxt.map_out ); return 0; } // read the given file and trigger map operations template< typename MapOp, typename ReduceOp > int mapreduce_context< MapOp, ReduceOp >::reader::execute( const std::string & file, mapreduce_context< MapOp, ReduceOp > & ctxt ) const { const int MIN_BLK_SZ = 1024; std::string _fn( file ); auto _col = file.rfind( ':' ); if( _col != std::string::npos ) _fn.resize( _col ); std::ifstream _file( _fn ); if( ! _file ) { std::cerr << "Could not open file " << _fn << std::endl; return CnC::CNC_Failure; } long long int _id = 0; int _first = 0; char_ptr_type _block; //( new char[MIN_BLK_SZ], std::default_delete<char[]>() ); _block.resize( MIN_BLK_SZ ); // we read blocks of data // as a block might be in the middle of a word, we need some extra stuff // e.g. we find the last whitespace to end the block // and copy the rest to the next block do { char * _b = const_cast< char * >( _block.c_str() ); _file.read( _b + _first, MIN_BLK_SZ ); int _cnt = _first + _file.gcount(); _block.resize( _cnt ); if( _cnt > 0 ) { // we'll miss the last word if the file doesn't end with a whitespace. auto _last = _block.find_last_of( " \t\n" ); _first = _cnt - _last - 1; std::string _tmp; if( _file ) { _tmp.resize( MIN_BLK_SZ + _first ); if( _first > 0 ) _tmp.replace( 0, _first, _block, _last + 1, _first ); // _tmp.resize( MIN_BLK_SZ + _first ); } _block.resize( _last ); auto _btag = std::make_pair( file, _id ); // put each line ctxt.blocks.put( _btag, _block ); // and control to start searching/matching ctxt.map_tags.put( _btag ); _block = _tmp;//.reset( _tmp, std::default_delete<char[]>() ); ++_id; } } while( _file ); return 0; }
// ******************************************************************************* // Copyright (c) 2013-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** // ******************************************************************************* #include "mapreduce.h" /* Count the number of occurances of all words in a file. An example of using mapreduce_context. It's bascially the same functionality as what's done in count_all_words but using the mapreduce abstraction. Please see mapreduce.h and count_all_words.cpp for details. */ // we use std::plus as the reduce operation namespace std { template< typename T > inline CnC::bitwise_serializable serializer_category( const std::plus< T > * ) { return CnC::bitwise_serializable(); } } // our map function (functor) // reads given stream word by word and counts all occuring words // by simply putting 1 for each word struct count { typedef std::string key_type; typedef size_type result_type; template< typename IStream, typename OutCollection > inline void operator()( IStream & iss, OutCollection & out ) const { std::string _word; while( iss >> _word ) { // let's a do a case-insensitive matching std::transform(_word.begin(), _word.end(), _word.begin(), ::tolower); out.put( _word, 1 ); } } }; CNC_BITWISE_SERIALIZABLE( count ); int main( int argc, char * argv[]) { typedef mapreduce_context< count, std::plus<size_type> > caw_context; #ifdef _DIST_ CnC::dist_cnc_init< caw_context > _dc; #endif if( argc < 2 ) { std::cerr << "expected arguments: <file1>[ <file2> ...]\n"; exit(1); } // create the context caw_context * mapred = make_mapreduce( count(), std::plus<size_type>() ); tbb::tick_count startTime = tbb::tick_count::now(); for( int i = 1; i<argc; ++i ) { mapred->read_tags.put( argv[i] ); // reader_execute( argv[i], mapred ); } mapred->wait(); tbb::tick_count endTime = tbb::tick_count::now(); std::cout << mapred->results.size() << std::endl; // iterate and write out all keys/value pairs with counts > 999 for( auto i = mapred->results.begin(); i != mapred->results.end(); ++i ) { // if( *i->second > 999 ) std::cout << i->first << " \t " << *i->second << std::endl; } delete mapred; std::cout << "Time: " << (endTime-startTime).seconds() << "s.\n"; return 0; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <cnc/reduce.h> #include <cassert> /* Demo for connecting 2 dependent graphs (e.g. reductions). The input to the program are values ina 2d-space (a). The output is the sum of all values (sum2). The first reduction computes the sum for each row (sum1) The second takes this output and sums it to the final value (sum2). There is no barrier and only a single wait (ignore whats enclosed in #ifdef _CNC_TESTING_ ... #endif). We just use sum1 as the output for the first reduction (red1) and also as the input to the second (red2). Evaluation executes fully asynchronously. Additionally, the selector ignores certain values when summing the rows. Last but not least we also increase the stress on the runtime by providing the counts per reduction at different stages (early, mid, late). */ // size of "matrix" const int N = 32; // threashold for selecting candidates for row-sum const int MX = 24; // describes our 2d space typedef std::pair< int, int > tag_type; // selector performing different selections dependening on reduction/tag struct selector { // functor, assign second as reduction-id // return false if first >= MX (we only reduce items with tag->first < MX) template< typename T, typename S, typename R > bool operator()( const std::pair< T, S > & t, R & r ) const { if( t.first >= MX ) return false; r = t.second; return true; } // here we select all and return singleton 0 template< typename T, typename R > bool operator()( const T & t, R & r ) const { r = 0; return true; } }; // the step just puts the value to the "matrix" // in real life this might first do some meaningful computation struct putter { // simply puts 1 with same tag to item-coll template< typename T, typename C > int execute( const T & t, C & ctxt ) const { ctxt.a.put( t, 1 ); // in some cases we put the count here (just testing) if( t.first >= MX && t.first == t.second ) ctxt.cnt1.put( t.first, MX ); return 0; } }; struct reduce_context : public CnC::context< reduce_context > { CnC::tag_collection< tag_type > tags; // control tags for our step CnC::item_collection< tag_type, int > a; // the matrix CnC::item_collection< int, int > cnt1, cnt2; // provides number of participating items per reduction CnC::item_collection< int, int > sum1, sum2; // outputs of reductions CnC::step_collection< putter > sc; // the steps CnC::graph * red1, * red2; // out 2 sub-graphs: 2 reductions reduce_context() : tags( *this, "tags" ), a( *this, "a" ), cnt1( *this, "cnt1" ), cnt2( *this, "cnt2" ), sum1( *this, "sum1" ), sum2( *this, "sum2" ), sc( *this, "step" ), red1( NULL ), red2( NULL ) { tags.prescribes( sc, *this ); sc.produces( a ); // first reduction inputs a and outputs sum1 red1 = CnC::make_reduce_graph( *this, "red_row", a, // input collection: our matrix cnt1, // count of items per reduction sum1, // output collection: sum per row std::plus<int>(), // reduction operation '+' 0, //identity element selector() ); // selector // second reduction inputs sum1 and outputs sum2 red2 = CnC::make_reduce_graph( *this, "red_red", sum1, // the input collection is the output of red1 cnt2, // count of items per reduction sum2, // the final output: sum of all values std::plus<int>(), // reduction operation '+' 0, //identity element selector() ); // selector #ifdef _CNC_TESTING_ CnC::debug::trace( sum1, 1 ); CnC::debug::trace( *red1, 2 ); CnC::debug::trace( *red2, 2 ); #endif } ~reduce_context() { delete red1; } }; int main() { #ifdef _DIST_ CnC::dist_cnc_init< reduce_context > _dc; #endif // create the context reduce_context ctxt; // put the count for some reduction before eveything else for( int i = 2; i < MX; ++i ) ctxt.cnt1.put( i, MX ); // put control tags for( int i = 0; i < N; ++i ) { for( int j = N-1; j >= 0; --j ) ctxt.tags.put( tag_type( i, j ) ); } #ifdef _CNC_TESTING_ // wait for all reductions except 3 ctxt.wait(); #endif ctxt.cnt1.put( 0, MX ); ctxt.cnt2.put(0,N); #ifdef _CNC_TESTING_ // wait for all reductions except one to complete ctxt.wait(); #endif // put the count for one reduction after everything else ctxt.cnt1.put( 1, MX ); // wait for safe state ctxt.wait(); // iterate, check and write out all keys/value pairs auto _sz = ctxt.sum1.size(); if( _sz != N ) { std::cerr << "#reductions is " << _sz << ", expected " << N << std::endl; return 1239; } for( auto i = ctxt.sum1.begin(); i != ctxt.sum1.end(); ++i ) { if( *i->second != MX ) { std::cerr << "cnt1 of " << i->first << " is " << *i->second << ", expected " << MX << std::endl; return 1234; } } int n = 0; ctxt.sum2.get(0, n ); if( n != N*MX ) { std::cerr << "reduce count is " << n << ", expected " << N*MX << std::endl; return 1236; } std::cout << "Success with " << n << std::endl; return 0; }
//******************************************************************************** // Copyright (c) 2013-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** /* Demonstration of a hidden graph that's active outside the on_put callbacks. Using leave_quiescence and enter_quiescence to communicate its internal state. Environment puts data that the graph operates on. The graph's computation is just sleeping random time, but when done it puts control and data that a consumer step outside the hidden graph uses. For distribution we use tuner::consumed_on to assign the input data to remote processes. */ #ifdef _DIST_ # include <cnc/dist_cnc.h> #else # include <cnc/cnc.h> #endif #include <cnc/debug.h> #include <cassert> #include <ctime> #include <tbb/concurrent_queue.h> #include <thread> // std::thread #include <chrono> // std::chrono::milliseconds // This is our hidden graph. It is parametrized by 3 collections: the // input data colleciton, an output tag collection and a output data // collection. // // It sets its state to active and registers a on_put callback which // stores the input data in an internal queue. // // An internal thread pops entries from the queue, sleeps for a while // and then puts a control tag ana corresponding data item. When the // thread has processed 7 entries it reports quiescence and exits. // // The environment only puts data, no control. Without quiescence // reporting the wait call could return immediately as no steps are // prescribed. AS the hidden graph sets its state to active right at // the beginning, the wait() call will not succeed until the graph // signaled its quiescence. template< typename IC_IN, typename IC_OUT, typename TC_OUT > class hidden_graph : public CnC::graph { private: typedef tbb::concurrent_bounded_queue< typename IC_IN::data_type > blocking_queue; // the callback for incoming data just pushes the items on our internal queue struct on_data : public IC_IN::callback_type { on_data( blocking_queue & q ) : m_q(q) {} void on_put( const typename IC_IN::tag_type & /*tag*/, const typename IC_IN::data_type & val ) { m_q.push( val ); }; blocking_queue & m_q; }; // we start a thread which takes data from our internal queue and works on it. // it enters quiescence and exits after processing 7 items // this is just a demo, so our work is simply sleepings struct observe_and_compute { void operator()( blocking_queue * queue, IC_OUT * dataout, TC_OUT * tagout, const hidden_graph< IC_IN, IC_OUT, TC_OUT > * graph ) { typename IC_IN::data_type _item; typename TC_OUT::tag_type _tag(CnC::tuner_base::myPid()*1000); do { // get next input queue->pop( _item ); // and do some work, we just sleep, but it could be anything // ideally of course this would use TBB tasks int _tm = rand() % 1111; std::this_thread::sleep_for( std::chrono::milliseconds(_tm) ); // now produce our output, to make it a little more intersting let's make it conditional if( _tm % 4 ) { dataout->put( _tag, _item + _tm ); tagout->put( _tag ); ++_tag; } } while( _tag < CnC::tuner_base::myPid()*1000+7 ); // we are done, before exiting we must inform the runtime graph->enter_quiescence(); CnC::Internal::Speaker oss; oss << "done observe_and_compute"; } }; public: // the constructor tells the runtime that the graph is not quiescent // it also starts the thread and registers the on_put callback template< typename Ctxt > hidden_graph( CnC::context< Ctxt > & ctxt, const std::string & name, IC_IN & ic1, IC_OUT & ic2, TC_OUT & tc ) : CnC::graph( ctxt, name ), m_input( ic1 ), m_dataout( ic2 ), m_tagout( tc ), m_queue(), m_thread( observe_and_compute(), &m_queue, &m_dataout, &m_tagout, this ) { // we started a thread, we are not quiescent until it exits this->leave_quiescence(); // register the callback m_input.on_put( new on_data( m_queue ) ); } virtual ~hidden_graph() { m_thread.join(); } private: IC_IN & m_input; // here we expect the input to arrive IC_OUT & m_dataout; // here we put our output data TC_OUT & m_tagout; // here we put the control tags blocking_queue m_queue; // the queue where we push the data and the threads pops data from std::thread m_thread; // our internal thread }; // a template function is much easier to use than a template class with many template arguments. template< typename Ctxt, typename IC_IN, typename IC_OUT, typename TC_OUT > hidden_graph< IC_IN, IC_OUT, TC_OUT > * make_hgraph( CnC::context< Ctxt > & ctxt, const std::string & name, IC_IN & ic1, IC_OUT & ic2, TC_OUT & tc ) { return new hidden_graph< IC_IN, IC_OUT, TC_OUT >( ctxt, name, ic1, ic2, tc ); } struct hg_context; // here we consume the values produced by the hidden graph struct consume { int execute( const int tag, hg_context & ctxt ) const; }; // our tuner makes sure the data gets distributed // as it gets produced in the env, we must define the consumed_on method struct hg_tuner : public CnC::hashmap_tuner { int consumed_on( const int & tag ) const { return tag % numProcs(); } }; // our actual "global" context struct hg_context : public CnC::context< hg_context > { CnC::step_collection< consume > consumer; CnC::item_collection< int, int, hg_tuner > input_data; CnC::item_collection< int, int > processed_data; CnC::item_collection< int, int > result_data; CnC::tag_collection< int > consumer_tags; CnC::graph * hgraph; hg_context() : consumer( *this, "consumer" ), input_data( *this, "input_data" ), processed_data( *this, "processed_data" ), result_data( *this, "result_data" ), consumer_tags( *this, "consumer_tags" ) { consumer_tags.prescribes( consumer, *this ); consumer.consumes( processed_data ); consumer.produces( result_data ); hgraph = make_hgraph( *this, "hidden_graph", input_data, processed_data, consumer_tags ); // CnC::debug::trace_all( *this ); } }; // the consumer just gets the data and puts something into result int consume::execute( const int tag, hg_context & ctxt ) const { int _val; ctxt.processed_data.get( tag, _val ); ctxt.result_data.put( tag+tag, _val*_val ); return 0; } // the env only puts data and waits for completion int main() { srand( 11 ); #ifdef _DIST_ CnC::dist_cnc_init< hg_context > _dc; #endif hg_context _ctxt; for( int i = 0; i < 444; ++i ) { _ctxt.input_data.put( i, rand() ); } _ctxt.wait(); // we use srand, so the number of generated tuples should always be identical if( _ctxt.result_data.size() != 7 * CnC::tuner_base::numProcs() ) { std::cerr << "Error: expected " << (7 * CnC::tuner_base::numProcs()) << " items, found " << _ctxt.result_data.size() << std::endl; exit(11); } return 0; }
/* ******************************************************************************* * Copyright (c) 2013-2014, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ********************************************************************************/ /* Showcasing ths SPMD style interface to CnC. We'll alternate between MPI and CnC phases. Additionally we'll let different groups of processes concurrently operate on separate CnC phases (we'll use the same context, but different instances of it). This is an "ariticial" example, we don't really compute anything meaningful. The MPI-phase is a simple All_to_all communication. The CnC phase prescribes a set of steps which simple take an input and produce one output. We use the control tag as the key of the output item. We use tag/F as the key of the input item, as tags are integers this makes F many steps depend on the same item instance. */ #include <mpi.h> #ifdef _DIST_ #include <cnc/dist_cnc.h> #else #include <cnc/cnc.h> #endif #include <cnc/debug.h> struct my_context; // our simple step struct MyStep { int execute( int n, my_context & c ) const; }; const int N = 480; // number of step-instances, will create control tags 0-N const int F = 6; // number of steps reading the same item // include the tuner, needed for testing (only) #include "mpicnc.h" // The CnC context defines a trivial graph with 3 collections. // Nothing specific to our MPI/CNC/SPMD combo struct my_context : public CnC::context< my_context > { CnC::step_collection< MyStep, my_tuner > m_steps; CnC::tag_collection<int> m_tags; CnC::item_collection<int, int, my_tuner> m_items; my_context() : CnC::context< my_context >(), m_steps( *this ), m_tags( *this ), m_items( *this ) { m_tags.prescribes( m_steps, *this ); m_steps.consumes( m_items ); m_steps.produces( m_items ); CnC::debug::trace( m_items ); //CnC::debug::trace_all( *this ); } }; // ok, let's get one item and put another one. // we don't care what the data is. int MyStep::execute( int t, my_context & c ) const { int item = 11; // tags are integers; so t/F returns the same value for multiple values of t(ag) if( t ) c.m_items.get( t/F, item ); c.m_items.put( t, t+item*item ); return CnC::CNC_Success; } // This is our CnC phase. // We instantiate the dist_cnc_init object, put a number of tags and wait. // The function can be called with different parameters. We need the // communicator in any case, but of course it can be MPI_Comm_World. // If dist_env is true we do it the SPMD-way: every process, not only // root, executes everything. If dist_env is false only the root // executes things outside the dist_cnc_init constructor. void cnc_phase( MPI_Comm mc, bool dist_env ) { #ifdef _DIST_ // Init distribution infrastructure (RAII), most likely behaves like a barrier // if dist_env==false client processes get blocked in here forever. CnC::dist_cnc_init< my_context > dc_init( mc, dist_env ); #endif // as we provided a communicator to dc_init, all processes // continue here (by default only rank 0 gets here) std::cout << "Let's go\n"; // now let's get MPI rank etc. int rank = 0, numranks = 0; MPI_Comm_rank(mc,&rank); MPI_Comm_size(mc,&numranks); // The environment code should be executed on rank 0 and // on all client processes of a distributed environment. // We might have more than one rank 0 // if different communicators are in use. // Multiple environments are semantically tricky, combined with // CnC they make sense if the MPI phase creates distributed // data to be used by CnC. We mimic this by distributing // the control-puts across all processes of a distributed env. if( rank == 0 || dist_env ) { // create the context, the runtime will distribute it under the hood my_context c; // dist_env requires a sync, e.g. a barrier if( dist_env ) MPI_Barrier( mc ); // start tag, 0 if root int s = rank; // in a dist env each process puts a subset of tags // we do this by setting the loop-increment to the number // of processes in the env int inc = dist_env ? numranks : 1; // Simply put all (owned) control tags for (int number = s; number < N; number += inc) { c.m_tags.put(number); } // everyone in the env waits for quiescence c.wait(); std::cout << "done\n"; } // here the CnC distribution infrastructure gets shut down // when the dist_cnc_init object gets destructed (RAII) } int main( int argc, char *argv[] ) { int numranks, rank; int p; MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p ); if( p != MPI_THREAD_MULTIPLE ) std::cerr << "Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl; MPI_Comm_size(MPI_COMM_WORLD,&numranks); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if( rank == 0 ) { system("ldd /home/fschlimb/cnc/trunk/distro/tests_runtime/tests-linux-intel64/distmpicnc"); } // Let's split COMM_WORLD into 2 MPI_Comm's MPI_Comm newcomm; int color = rank%2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); // let's not only iterate through alternating CnC/MPI phases, but // also let 2 CnC "instances" work concurrently (each has its own // MPICommunicator) for( int i = 0; i < 2; ++i ) { // you can loop as often as you like // Now let's do some CnC, all processes execute this, but on different MPI_Comm's. // One communicator is executed as a dist_env (color==0) // the other as a "normal" single-env on its root (color==1) cnc_phase( newcomm, color == 1 ); // And some MPI in between // All procs do this together on MPI_COMM_WORLD int res; MPI_Allreduce(&color, &res, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); } // Done: collectively shut down MPI MPI_Finalize(); if( rank == 0 ) std::cerr << "\n\nFinalized and out.\nThis should be the last message.\n"; }
//******************************************************************************** // Copyright (c) 2007-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ** // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ** // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ** // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ** // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ** // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ** // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ** // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ** // THE POSSIBILITY OF SUCH DAMAGE. ** //******************************************************************************** /* this version differs from the plain version - it uses smart-pointersm, which avoids data-copies and allows distributed allocation of the tile-array; performance seems to suffer a bit from this, but memory footprint is cut to half - in dist-mode, the array/matrix is generated distributedly, e.g. each process generates what it owns; input to the CnC graph is also done distributedly Mimicking SPMD-style interaction with MPI codes. Search for "FOR_ME" and "_DIST_" to look at these changes. */ #ifdef _DIST_ #include <mpi.h> #include <cnc/dist_cnc.h> #include <cnc/internal/dist/distributor.h> #else #include <cnc/cnc.h> #endif #include "cnc/debug.h" #include <utility> #include <iostream> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <math.h> #include <string.h> #include <cassert> #include <memory> #ifdef WIN32 #include <windows.h> #include <psapi.h> #else #include <sys/resource.h> #endif #include "tbb/tick_count.h" #include "tbb/atomic.h" #include "../tile.h" using namespace CnC; class tile_array; struct tile_tag { int m_i0, m_i1, m_i2, m_dim; tile_tag( int dim, int i0, int i1, int i2 ) : m_i0(i0), m_i1(i1), m_i2(i2), m_dim(dim) {}; bool operator==( const tile_tag & t ) const { return m_dim == t.m_dim && m_i0 == t.m_i0 && m_i1 == t.m_i1 && m_i2 == t.m_i2; } #ifdef _DIST_ tile_tag() : m_i0( -1 ), m_i1( -1 ), m_i2( -1 ), m_dim( 0 ) {} void serialize( CnC::serializer & ser ) { ser & m_i0 & m_i1 & m_i2 & m_dim; } #endif }; template <> struct cnc_hash< tile_tag > { size_t operator()(const tile_tag& tt) const { unsigned int h = (int)tt.m_dim; unsigned int high = h & 0xf8000000; h = h << 5; h = h ^ (high >> 27); h = h ^ tt.m_i0; unsigned int high1 = h & 0xf8000000; h = h << 5; h = h ^ (high1 >> 27); h = h ^ tt.m_i1; unsigned int high2 = h & 0xf8000000; h = h << 5; h = h ^ (high2 >> 27); h = h ^ tt.m_i2; return size_t(h); } }; namespace std { std::ostream & cnc_format( std::ostream& os, const tile_tag &tt ) { return os << "(" << tt.m_dim << ":" << tt.m_i0 << "," << tt.m_i1 << "," << tt.m_i2 << ")"; } } struct my_context; struct compute_inverse { int execute( const tile_tag & t, my_context & c ) const; }; //#define NO_CONSUMED_ON 1 struct my_tuner : public CnC::step_tuner<>, public CnC::hashmap_tuner { typedef tile_tag tag_type; int PROC_FOR_TILE( int _x, int _y ) const { return ( ( ( ( (_y) * m_tnx + 31 ) + (_x) ) ) % m_nP ); } int COMPUTE_ON( int _i, int _j ) const { return ( PROC_FOR_TILE( (_i) / m_tsx, (_j) / m_tsy ) ); } template< class dependency_consumer > void depends( const tile_tag & tag, my_context & c, dependency_consumer & dC ) const; my_tuner( int ntiles = 0 ); int compute_on( const tile_tag & tag, my_context & ) const; int get_count( const tag_type & tag ) const; #ifdef NO_CONSUMED_ON int consumed_on( const tag_type & tag ) const { return CnC::CONSUMER_UNKNOWN; } #else const std::vector< int > & consumed_on( const tile_tag & tag ) const; #endif int produced_on( const tile_tag & tag ) const; int m_nP; int m_tnx, m_tny, m_tsx, m_tsy; std::vector< std::vector< int > > m_rows, m_cols, m_rowcols, m_procs; std::vector< int > m_all; void serialize( CnC::serializer & ser ) { ser & m_nP & m_tnx & m_tny & m_tsx & m_tsy & m_rows & m_cols & m_rowcols & m_procs & m_all; assert( m_nP > 0 && m_nP == (int)m_procs.size() ); } }; typedef std::shared_ptr< const tile > const_tile_type; typedef std::shared_ptr< tile > tile_type; struct my_context : public CnC::context< my_context > { my_tuner m_tuner; CnC::step_collection< compute_inverse, my_tuner > m_steps; CnC::item_collection< tile_tag, const_tile_type, my_tuner > m_tiles; CnC::tag_collection< tile_tag > m_tags; my_context( int nt = 0 ) : CnC::context< my_context >(), m_tuner( nt ), m_steps( *this, "mi", compute_inverse(), m_tuner ), m_tiles( *this, m_tuner ), m_tags( *this ) { m_tags.prescribes( m_steps, *this ); #if 0 CnC::debug::trace( m_tiles, 3 ); // CnC::debug::trace( m_tags, 3 ); // CnC::debug::trace( m_steps, 3 ); #endif CnC::debug::collect_scheduler_statistics(*this); } virtual void serialize( CnC::serializer & ser ) { ser & m_tuner; } }; #ifdef _DIST_ // returns if the given tile (x,y) is owned by my (r) # define FOR_ME( _c, _r, _x, _y ) ((_r) == _c.m_tuner.COMPUTE_ON( _x, _y )) // returns if the given tile (x,y) is owned by my (r)m always true if r==0 # define FOR_ME_OR_0( _c, _r, _x, _y ) (((_r)==0) || FOR_ME( _c, _r, _x, _y )) #else # define FOR_ME( _c, _r, _x, _y ) true # define FOR_ME_OR_0( _c, _r, _x, _y ) true #endif class tile_array { int m_dim; int m_size; const_tile_type * m_tiles; public: int dim() const { return m_dim; } int size() const { return m_size; } tile_array( int size = 0 ) : m_dim((size + TILESIZE - 1)/TILESIZE), // Size/TILESIZE rounded up m_size(size), m_tiles( NULL ) { if( m_dim ) m_tiles = new const_tile_type[m_dim*m_dim]; } ~tile_array() { delete[] m_tiles; } tile_array(const tile_array& t) { m_size = t.m_size; m_dim = t.m_dim; int sz = m_dim*m_dim; m_tiles = new const_tile_type[sz]; for( int i = 0; i < sz; ++i ) { m_tiles[i] = t.m_tiles[i]; } } tile_array& operator=(const tile_array& t) { if (this != &t) { int sz = t.m_dim*t.m_dim; if( m_dim != t.m_dim ) { delete[] m_tiles; m_tiles = new const_tile_type[sz]; m_size = t.m_size; m_dim = t.m_dim; } for( int i = 0; i < sz; ++i ) { m_tiles[i] = t.m_tiles[i]; } } return *this; } void dump( double epsilon = 1e-12 ) const { for (int i = 0; i < m_dim; i++ ) { for (int j = 0; j < m_dim; j++ ) { std::cout << "(" << i << "," << j << ")" << std::endl; m_tiles[m_dim*i+j]->dump(epsilon); } std::cout << std::endl; } } int generate_matrix( int dimension, my_context & c ) { #ifdef _DIST_ // need rank determine owned fraction of the matrix (FOR_ME) int rank; MPI_Comm_rank(MPI_COMM_WORLD,&rank); #endif printf("Floating point elements per matrix: %i x %i\n", dimension, dimension); printf("Floating point elements per tile: %i x %i\n", TILESIZE, TILESIZE); if( m_size != dimension ) { delete[] m_tiles; m_size = dimension; m_dim = (m_size + TILESIZE - 1)/TILESIZE; // Size/TILESIZE rounded up m_tiles = new const_tile_type[m_dim*m_dim]; } printf("tiles per matrix: %i x %i\n", m_dim, m_dim); int dim = m_dim; int size = m_size; std::cout << "dim(" << dim << ") size(" << size << ")" << std::endl; double e = 0.0; for (int I = 0; I < dim; I++) { for (int J = 0; J < dim; J++) { if( FOR_ME_OR_0( c, rank, I, J ) ) { // for the identity check we need the entire matrix on 0 srand( I*m_dim+J ); int ii = I * TILESIZE;; tile_type _tile = std::make_shared< tile >(); for (int i = 0; i < TILESIZE; i++) { int jj = J * TILESIZE; for (int j = 0; j < TILESIZE; j++) { if ((ii < size)&(jj < size)) e = double(rand())/RAND_MAX; else if (ii == jj) e = 1; // On-diagonal padding else e = 0; // Off-diagonal padding // std::cout << "m[" << ii << "," << jj << "(" << I << "," << J << "," << i << "," << j << ")]=" << e << std::endl; _tile->set(i,j,e); jj++; } ii++; } // std::cerr << rank << " generated tile " << I << "," << J << std::endl; m_tiles[dim*I + J] = _tile; } } } return m_dim; } int identity_check( double epsilon = MINPIVOT ) const { int ecount = 0; for (int i = 0; i < m_dim; i++ ) { for (int j = 0; j < m_dim; j++ ) { int tcount = 0; const_tile_type t = m_tiles[m_dim*i+j]; tcount = (i == j) ? t->identity_check(epsilon) : t->zero_check(epsilon); if (tcount == 0 ) continue; std::cout << "problem in tile(" << i << "," << j << ")" << std::endl; ecount += tcount; } } return ecount; } bool equal( const tile_array &b ) const { if (b.m_dim != m_dim) return false; for (int i = 0; i < m_dim; i++ ) { for (int j = 0; j < m_dim; j++ ) { const_tile_type t = m_tiles[m_dim*i+j]; if (!t->equal( *b.m_tiles[m_dim*i+j] )) return false; } } return true; } // c = this * b tile_array multiply(const tile_array &b) const { tile_array c(m_size); for (int i = 0; i < m_dim; i++) { for (int j = 0; j < m_dim; j++) { tile_type t = std::make_shared< tile >(); t->zero(); for (int k = 0; k < m_dim; k++) { t->multiply_add_in_place(*m_tiles[m_dim*i+k], *b.m_tiles[m_dim*k+j]); } c.m_tiles[m_dim*i+j] = t; } } return c; } tile_array inverse() { tile_array b = *this; int dim = m_dim; for (int n = 0; n < dim; n++) { const_tile_type pivot_inverse = std::make_shared< const tile >( C_INVERSE, *b.m_tiles[dim*n+n] ); b.m_tiles[dim*n+n] = pivot_inverse; for (int j = 0; j < dim; j++) { if (j == n) continue; const tile& tnj = *b.m_tiles[dim*n+j]; b.m_tiles[dim*n+j] = std::make_shared< const tile >( C_MULTIPLY, *pivot_inverse, tnj ); } for (int i = 0; i < dim; i++) { if (i == n) continue; const_tile_type tin = b.m_tiles[dim*i+n]; b.m_tiles[dim*i+n] = std::make_shared< const tile >( C_MULTIPLY_NEGATE, *tin, *pivot_inverse ); for (int j = 0; j < dim; j++) { if (j == n) continue; const_tile_type tnj = b.m_tiles[dim*n+j]; tile_type tmp = std::make_shared< tile >( *b.m_tiles[dim*i+j] ); tmp->multiply_subtract_in_place(*tin, *tnj); b.m_tiles[dim*i+j] = tmp; } } } return b; } tile_array inverse_cnc( my_context & c ) { // all participating processes execute this int rank = 0; #ifdef _DIST_ // need rank determine owned fraction of the matrix (FOR_ME) MPI_Comm_rank(MPI_COMM_WORLD,&rank); #endif // copy what process owns from local array to item-collection for (int i = 0; i < m_dim; i++) { for (int j = 0; j < m_dim; j++) if( FOR_ME( c, rank, i, j ) ) { tile_tag t( m_dim, 0, i, j); c.m_tiles.put( t, m_tiles[m_dim*i+j] ); assert( m_tiles[m_dim*i+j].get() ); } } // put control tags (only what this process is reponsible for) for (int i = 0; i < m_dim; i++) { for (int j = 0; j < m_dim; j++) if( FOR_ME( c, rank, i, j ) ) { c.m_tags.put( tile_tag( m_dim, 0, i, j) ); } } // "wait" is an implicit barrier, // no process returns until entire dsitributed graph evaluation reached quiesence c.wait(); // now write back data from item-collection to local arry tile_array b(m_size); // c.m_tiles.size(); // not dist-env-ready for (int i = 0; i < m_dim; i++) { for (int j = 0; j < m_dim; j++) if( FOR_ME_OR_0( c, rank, i, j ) ) {// we need all output on rank 0 for verification const_tile_type _tmp; c.m_tiles.get( tile_tag( m_dim, m_dim, i, j), _tmp ); b.m_tiles[m_dim*i+j] = _tmp; } } return b; } }; int compute_inverse::execute( const tile_tag & tag, my_context & c ) const { int n = tag.m_i0; int i = tag.m_i1; int j = tag.m_i2; tile_tag out_tag( tag.m_dim, n+1, i, j ); if (i == n && j == n ) { const_tile_type tnn; c.m_tiles.get( tag, tnn ); // tile out_nij = tnn.inverse(); c.m_tiles.put( out_tag, std::make_shared< const tile >( C_INVERSE, *tnn ) );//out_nij ); } else if ( i == n ) { const_tile_type tnj; c.m_tiles.get( tag, tnj ); assert( tnj.get() ); const_tile_type tn1nn; c.m_tiles.get( tile_tag( tag.m_dim, n+1, n, n ), tn1nn ); // tile out_nij = tn1nn.multiply(tnj); c.m_tiles.put( out_tag, std::make_shared< const tile >( C_MULTIPLY, *tn1nn, *tnj ) ); } else if ( j == n ) { const_tile_type tin; c.m_tiles.get( tag, tin ); const_tile_type tn1nn; c.m_tiles.get( tile_tag( tag.m_dim, n+1, n, n ), tn1nn ); // tile out_nij = tin.multiply_negate(tn1nn); c.m_tiles.put( out_tag, std::make_shared< const tile >( C_MULTIPLY_NEGATE, *tin, *tn1nn ) );//out_nij ); } else { const_tile_type tij; c.m_tiles.get( tag, tij ); const_tile_type tnin; c.m_tiles.get( tile_tag( tag.m_dim, n, i, n ), tnin ); const_tile_type tn1nj; c.m_tiles.get( tile_tag( tag.m_dim, n+1, n, j ), tn1nj ); tile_type tmp = std::make_shared< tile >( *tij ); tmp->multiply_subtract_in_place( *tnin, *tn1nj ); assert( tmp.get() ); c.m_tiles.put( out_tag, tmp ); // tile out_nij = tij.multiply_subtract( tnin, tn1nj ); // b.m_tiles[dim*i+j] = tmp; // c.m_tiles.put( out_tag, std::make_shared< const tile >( tij->multiply_subtract( *tnin, *tn1nj) ) );//out_nij ); } if ( (n+1) < tag.m_dim ) { c.m_tags.put( out_tag ); } return CnC::CNC_Success; } int my_tuner::get_count( const tag_type & tt ) const { int dim = tt.m_dim; int n = tt.m_i0; if( dim == n ) return CnC::NO_GETCOUNT; int i = tt.m_i1; int j = tt.m_i2; int count = 1; if (i == (n-1) && j == (n-1)) count += (dim-1) + (dim-1); if (i == (n-1) && !(j == n-1)) count += dim-1; if (j == n && !(i == n)) count += dim-1; return count; } my_tuner::my_tuner( int ntiles ) : m_tnx( 0 ), m_tny( 0 ), m_tsx( 1 ), m_tsy( 1 ), m_rows(), m_cols(), m_rowcols(), m_procs(), m_all( 1, CnC::CONSUMER_ALL ) { m_nP = numProcs(); #ifdef _DIST_ int _np = m_nP * 1; assert( _np == 1 || ( ntiles * ntiles ) % _np == 0 ); if( _np > 1 && ntiles ) { // compute grid dimension for partitioning m_tnx = 2, m_tny = _np / 2; while( m_tnx < m_tny && m_tny % 2 == 0 ) { m_tnx *= 2; m_tny = _np / m_tnx; } assert( m_tnx * m_tny == _np ); m_tsx = ntiles / m_tnx; m_tsy = ntiles / m_tny; std::cerr << m_tnx << "x" << m_tny << " tiles of " << m_tsx << "x" << m_tsy << std::endl; assert( m_tnx * m_tsx == ntiles && m_tny * m_tsy == ntiles ); #ifndef NO_CONSUMED_ON // setup vectors to store info for consumed_on m_rows.resize( m_tny ); m_cols.resize( m_tnx ); m_rowcols.resize( m_tnx * m_tny ); for( int i = 0; i < m_tnx; ++i ) { m_cols[i].resize( m_tny ); for( int j = 0; j < m_tny; ++j ) { m_rowcols[j*m_tnx+i].resize( m_tnx+m_tny ); } } for( int j = 0; j < m_tny; ++j ) { m_rows[j].resize( m_tnx ); for( int i = 0; i < m_tnx; ++i ) { m_rows[j][i] = m_cols[i][j] = PROC_FOR_TILE( i, j ); for( int x = 0; x < m_tnx; ++x ) m_rowcols[j*m_tnx+i][x] = PROC_FOR_TILE( x, j ); for( int y = 0; y < m_tny; ++y ) m_rowcols[j*m_tnx+i][m_tnx+y] = PROC_FOR_TILE( i, y ); m_rowcols[j*m_tnx+i][i] = m_rowcols[j*m_tnx+i].back(); m_rowcols[j*m_tnx+i].pop_back(); } } m_procs.resize( m_nP ); for( int i = 0; i < m_nP; ++i ) m_procs[i].resize( 1, i ); assert( m_nP > 0 && (int)m_procs.size() == m_nP ); #endif } #endif } int my_tuner::compute_on( const tile_tag & tag, my_context & ) const { return COMPUTE_ON( tag.m_i1, tag.m_i2 ); } int my_tuner::produced_on( const tile_tag & tag ) const { return ( tag.m_i0 > 0 ? COMPUTE_ON( tag.m_i1, tag.m_i2 ) : 0 ); } template< class dependency_consumer > void my_tuner::depends( const tile_tag & tag, my_context & c, dependency_consumer & dC ) const { int n = tag.m_i0; int i = tag.m_i1; int j = tag.m_i2; dC.depends( c.m_tiles, tag ); //, PROD( tag.m_dim, n-1, i, j ) ); if (i == n && j == n ) { } else if ( i == n ) { dC.depends( c.m_tiles, tile_tag( tag.m_dim, n+1, n, n ) ); } else if ( j == n ) { dC.depends( c.m_tiles, tile_tag( tag.m_dim, n+1, n, n ) ); } else { dC.depends( c.m_tiles, tile_tag( tag.m_dim, n, i, n ) ); dC.depends( c.m_tiles, tile_tag( tag.m_dim, n+1, n, j ) ); } } #ifndef NO_CONSUMED_ON const std::vector< int > & my_tuner::consumed_on( const tile_tag & tag ) const { assert( 0 < m_procs.size() ); int n = tag.m_i0; int i = tag.m_i1; int j = tag.m_i2; if( n - 1 == i && ( i == j || n == j ) ) { return m_rowcols[(j/m_tsy)*m_tnx+(i/m_tsx)]; } else if( n - 1 == i ) { return m_rows[j/m_tsy]; } else if( n == j ) { return m_cols[i/m_tsx]; } return m_procs[COMPUTE_ON( i, j )]; } #endif void report_memory() { std:: cout << "tiles created " << tiles_created << " tiles deleted " << tiles_deleted << " tiles remaining " << tiles_created - tiles_deleted << std::endl; tiles_created = 0; tiles_deleted = 0; static int lastr = 0; #ifdef WIN32 HANDLE self; PROCESS_MEMORY_COUNTERS pmc; SIZE_T resident = 0; self = GetCurrentProcess(); if (GetProcessMemoryInfo(self, &pmc, sizeof(pmc))) { resident = pmc.WorkingSetSize; } CloseHandle(self); #else FILE *f = fopen("/proc/self/statm", "r"); int total, resident, share, trs, drs, lrs, dt; if( fscanf(f,"%d %d %d %d %d %d %d", &total, &resident, &share, &trs, &drs, &lrs, &dt) != 7 ) std::cerr << "error reading /proc/self/statm\n"; #endif std:: cout << "resident memory MB " << double(resident*4096)/1000000 << " increment MB " << double((resident-lastr)*4096)/1000000 << std::endl; lastr = resident; } void report_time( const char * mode, int msz, double time ) { std::cout << mode << " Total Time: " << time << " sec" << std::endl; float Gflops = ((float)2*msz*msz*msz)/((float)1000000000); if (Gflops >= .000001) printf("Floating-point operations executed: %f billion\n", Gflops); if (time >= .001) printf("Floating-point operations executed per unit time: %6.2f billions/sec\n", Gflops/time); } int main(int argc, char *argv[]) { if (!(argc == 2 && 0 != atoi(argv[1]))) { std::cout << "Usage: matrix_inverse dim" << std::endl; return -1; } int sz = atoi(argv[1]); int tdim = (sz + TILESIZE - 1)/TILESIZE; #ifdef _DIST_ // init MPI to mimick SPMD-style interactino with CnC through MPI int p; //!! FIXME passing NULL ptr breaks mvapich1 mpi implementation MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p ); if( p != MPI_THREAD_MULTIPLE ) std::cerr << "Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl; int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD,&rank); { // need a scope for dist_init // init CnC here so that we have the context/tuner // the tuner defines the distribution strategy, which we use when generating the matrix CnC::dist_cnc_init< my_context > dc_init( MPI_COMM_WORLD, true ); { // need a scope for the context #endif tile_array in_array; struct my_context c( tdim ); std::cout << "Generating matrix of size " << argv[1] << std::endl; // all processes do this, we are SPMD! tdim = in_array.generate_matrix(sz, c); report_memory(); // invert serially { std::cout << "Invert serially" << std::endl; tbb::tick_count t0 = tbb::tick_count::now(); #ifndef _DIST_ tile_array out_array = in_array.inverse(); #endif tbb::tick_count t1 = tbb::tick_count::now(); report_time( "Serial", sz, (t1-t0).seconds() ); report_memory(); #ifndef _DIST_ tile_array test = in_array.multiply(out_array); test.identity_check(1e-5); #endif } // end of scope releases out_array and test #ifdef _DIST_ // we need a barrier between CnC context creation and putting stuff MPI_Barrier( MPI_COMM_WORLD ); #endif report_memory(); { std::cout << "Invert CnC steps" << std::endl; //debug::set_num_threads(1); tbb::tick_count t2 = tbb::tick_count::now(); tile_array out_array2 = in_array.inverse_cnc(c); tbb::tick_count t3 = tbb::tick_count::now(); report_time( "CnC", out_array2.size(), (t3-t2).seconds() ); report_memory(); #ifdef _DIST_ // only rank 0 has the full input matrix for the verification if( rank == 0 ) #endif { tile_array test2 = in_array.multiply(out_array2); test2.identity_check(1e-5); } // end of scope releases test2 } // end of scope releases out_array2 report_memory(); #ifdef _DIST_ MPI_Barrier( MPI_COMM_WORLD ); } } MPI_Finalize(); #endif return 0; }
//******************************************************************************** // Copyright (c) 2014-2014 Intel Corporation. All rights reserved. ** // ** // Redistribution and use in source and binary forms, with or without ** // modification, are permitted provided that the following conditions are met: ** // * Redistributions of source code must retain the above copyright notice, ** // this list of conditions and the following disclaimer. ** // * Redistributions in binary form must reproduce the above copyright ** // notice, this list of conditions and the following disclaimer in the ** // documentation and/or other materials provided with the distribution. ** // * Neither the name of Intel Corporation nor the names of its contributors ** // may be used to endorse or promote products derived from this software ** // without specific prior written permission. ** // ** // This software is provided by the copyright holders and contributors "as is" ** // and any express or implied warranties, including, but not limited to, the ** // implied warranties of merchantability and fitness for a particular purpose ** // are disclaimed. In no event shall the copyright owner or contributors be ** // liable for any direct, indirect, incidental, special, exemplary, or ** // consequential damages (including, but not limited to, procurement of ** // substitute goods or services; loss of use, data, or profits; or business ** // interruption) however caused and on any theory of liability, whether in ** // contract, strict liability, or tort (including negligence or otherwise) ** // arising in any way out of the use of this software, even if advised of ** // the possibility of such damage. ** //******************************************************************************** /* Attach a database to an item-table. We create a very simple graph with one step and one item-collection. It doesn't do anything sensible, just a demo. Of course you can write your own application which uses as many DB-attached item_collections as you want. Each item-coll keeps its connection and config separately. We use our sql_tuner to attach to the DB. It uses our wrapper sql_item_table. The sql_config allows configuring the DB (port, table, layout etc) as well as how we actually store (or not) the data in there (by providing SQL statements). E.g. we might pre-fill the collection at start-up with what's already in the table; or we might want to delete everything when attaching to it. The program uses mysql and assumes a user "cnc" with pw "cnc" and a DB "test". You can easily adjust with the SQL config below. Setup your mysql accordingly. */ #include <cnc/cnc.h> #include "mysql_tuner.h" struct my_context; struct my_sqlstep { int execute( int, my_context & ctxt ) const; }; struct my_context : public CnC::context< my_context > { // we use the sql_tuner to attach the item-col to a DB sql_tuner m_tuner; CnC::tag_collection< int > m_tags; CnC::item_collection< int, int, sql_tuner > m_items; CnC::step_collection< my_sqlstep > m_steps; my_context() // the argument to the tuner is a config for the DB : m_tuner( {"tcp://127.0.0.1:3306", // port "cnc", "cnc", // user, password "test", // DB/schema, "DROP TABLE IF EXISTS items;", // drop statement, set to "" to keep table "CREATE TABLE If NOT EXISTS items (id INT, value INT);", // table creation statement, set to "" to not create table "SELECT id, value FROM items;", // read full table statement, set to "" to not read existing entries "DELETE * FROM items;", // clear statements "SELECT value FROM items WHERE id=?;", // get statement "INSERT INTO items( id, value ) VALUES ( ?, ? );", // put statement, set to "" for not putting things into DB "DELETE FROM items WHERE id=?;", // erase statement, set to "" to keep things in table upon collection.reset() }), m_tags( *this, "tags" ), m_items( *this, "items", m_tuner ), m_steps( *this, "steps" ) { m_tags.prescribes( m_steps, *this ); m_steps.consumes( m_items ); m_steps.produces( m_items ); } }; // our step simple gets a piece of data and puts another one int my_sqlstep::execute( int t, my_context & ctxt ) const { int r; ctxt.m_items.get( t-1, r ); ctxt.m_items.put( t, t+r ); return 0; } // we simply create the graph, put the control tags and the initial data. // "mysql -u cnc -p test -pcnc -e "select * from items;" shows what has been stored to the table int main(void) { my_context _ctxt; _ctxt.m_items.put( -1, -1 ); for( int i = 0; i<100; ++i ) { _ctxt.m_tags.put( i ); } _ctxt.wait(); for( int i = 0; i<10; ++i ) { int _ir = -1; _ctxt.m_items.get( i, _ir ); std::cout << _ir << " for " << i << std::endl; } return 0; }