CnC: Samples used in this documentation

samples/fib/fib_tutorial/fibTutorial.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************

// compute fibonacci numbers
//

#define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB

#include <cnc/cnc.h>

// let's use a large type to store fib numbers
typedef unsigned long long fib_type;
// forward declaration
struct fib_context;

// declaration of compute step class
struct fib_step
{
    // declaration of execute method goes here
    int execute( const int & tag, fib_context & c ) const;
};

// this is our context containing collections and defining their depndencies
struct fib_context : public CnC::context< fib_context > // derive from CnC::context
{
    // the step collection for the instances of the compute-kernel
    CnC::step_collection< fib_step >       m_steps;
    // item collection holding the fib number(s)
    CnC::item_collection< int, fib_type >  m_fibs;
    // tag collection to control steps
    CnC::tag_collection< int >             m_tags;

    // constructor
    fib_context();
};

fib_context::fib_context()
        : CnC::context< fib_context >(),
          // pass context to collection constructors
          m_steps( *this ),
          m_fibs( *this ),
          m_tags( *this )
{
    // prescribe compute steps with this (context) as argument
    m_tags.prescribes( m_steps, *this );
    // step consumes m_fibs
    m_steps.consumes( m_fibs );
    // step also produces m_fibs
    m_steps.produces( m_fibs );
}

// the actual step code computing the fib numbers goes here
int fib_step::execute( const int & tag, fib_context & ctxt ) const
{
    switch( tag ) {
        case 0 : ctxt.m_fibs.put( tag, 0 ); break;
        case 1 : ctxt.m_fibs.put( tag, 1 ); break;
        default : 
            // get previous 2 results
            fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 );
            fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 );
            // put our result
            ctxt.m_fibs.put( tag, f_1 + f_2 );
    }
    return CnC::CNC_Success;
}

int main( int argc, char* argv[] )
{
    int n = 42;
    // eval command line args
    if( argc < 2 ) {
        std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl;
    } else n = atol( argv[1] );

    // create context
    fib_context ctxt; 

    // put tags to initiate evaluation
    for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i );

    // wait for completion
    ctxt.wait(); 

    // get result
    fib_type res2;
    ctxt.m_fibs.get( n, res2 );

    // print result
    std::cout << "fib (" << n << "): " << res2 << std::endl;

    return 0;
}

samples/fib/fib_trace/fib_trace.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************

// compute fibonacci numbers
//

#define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB

// let's use a large type to store fib numbers
typedef unsigned long long fib_type;

#include "fib.h"

// the actual step code computing the fib numbers goes here
int fib_step::execute( const int & tag, fib_context & ctxt ) const
{
    switch( tag ) {
        case 0 : ctxt.m_fibs.put( tag, 0 ); break;
        case 1 : ctxt.m_fibs.put( tag, 1 ); break;
        default : 
            // get previous 2 results
            fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 );
            fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 );
            // put our result
            ctxt.m_fibs.put( tag, f_1 + f_2 );
    }
    return CnC::CNC_Success;
}

int main( int argc, char* argv[] )
{
    int n = 42;
    // eval command line args
    if( argc < 2 ) {
        std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl;
    } else n = atol( argv[1] );

    // create context
    fib_context ctxt; 

    // enable debug output for steps
    CnC::debug::trace( ctxt.m_steps );
    // also enable debug output for our items
    CnC::debug::trace( ctxt.m_fibs );

    // put tags to initiate evaluation
    for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i );

    // wait for completion
    ctxt.wait(); 

    // get result
    fib_type res2;
    ctxt.m_fibs.get( n, res2 );

    // print result
    std::cout << "fib (" << n << "): " << res2 << std::endl;

    return 0;
}

samples/fib/fib_trace/fib.h

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************
#ifndef fib_H_ALREADY_INCLUDED
#define fib_H_ALREADY_INCLUDED

#include <cnc/cnc.h>
#include <cnc/debug.h>

// Forward declaration of the context class (also known as graph)
struct fib_context;

// The step classes

struct fib_step
{
    int execute( const int & t, fib_context & c ) const;
};

// The context class
struct fib_context : public CnC::context< fib_context >
{
    // the step collection for the instances of the compute-kernel
    CnC::step_collection< fib_step >      m_steps;
    // Item collections
    CnC::item_collection< int, fib_type > m_fibs;
    // Tag collections
    CnC::tag_collection< int >            m_tags;

    // The context class constructor
    fib_context()
        : CnC::context< fib_context >(),
          // Initialize each step collection
          m_steps( *this, "fib_step" ),
          // Initialize each tag collection
          m_fibs( *this, "fibs" ),
          // Initialize each item collection
          m_tags( *this, "tags" )
    {
        // prescribe compute steps with this (context) as argument
        m_tags.prescribes( m_steps, *this );
        // step consumes m_fibs
        m_steps.consumes( m_fibs );
        // step also produces m_fibs
        m_steps.produces( m_fibs );
    }
};

#endif // fib_H_ALREADY_INCLUDED

samples/fib/fib_stats/fib_stats.cpp

//********************************************************************************
// Copyright (c) 2010-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************

// compute fibonacci numbers
//

#define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB

// let's use a large type to store fib numbers
typedef unsigned long long fib_type;

#include "fib.h"

// the actual step code computing the fib numbers goes here
int fib_step::execute( const int & tag, fib_context & ctxt ) const
{
    switch( tag ) {
        case 0 : ctxt.m_fibs.put( tag, 0 ); break;
        case 1 : ctxt.m_fibs.put( tag, 1 ); break;
        default : 
            // get previous 2 results
            fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 );
            fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 );
            // put our result
            ctxt.m_fibs.put( tag, f_1 + f_2 );
    }
    return CnC::CNC_Success;
}

int main( int argc, char* argv[] )
{
    int n = 42;
    // eval command line args
    if( argc < 2 ) {
        std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl;
    } else n = atol( argv[1] );

    // create context
    fib_context ctxt; 

    // show scheduler statistics when done
    CnC::debug::collect_scheduler_statistics( ctxt );

    // put tags to initiate evaluation
    for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i );

    // wait for completion
    ctxt.wait(); 

    // get result
    fib_type res2;
    ctxt.m_fibs.get( n, res2 );

    // print result
    std::cout << "fib (" << n << "): " << res2 << std::endl;

    return 0;
}

samples/fib/fib_stats/fib.h

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************
#ifndef fib_H_ALREADY_INCLUDED
#define fib_H_ALREADY_INCLUDED

#include <cnc/cnc.h>
#include <cnc/debug.h>

// Forward declaration of the context class (also known as graph)
struct fib_context;

// The step classes

struct fib_step
{
    int execute( const int & t, fib_context & c ) const;
};

// The context class
struct fib_context : public CnC::context< fib_context >
{
    // the step collection for the instances of the compute-kernel
    CnC::step_collection< fib_step >      m_steps;
    // Item collections
    CnC::item_collection< int, fib_type > m_fibs;
    // Tag collections
    CnC::tag_collection< int >            m_tags;

    // The context class constructor
    fib_context()
        : CnC::context< fib_context >(),
          // Initialize each step collection
          m_steps( *this, "fib_step" ),
          // Initialize each tag collection
          m_fibs( *this, "fibs" ),
          // Initialize each item collection
          m_tags( *this, "tags" )
    {
        // prescribe compute steps with this (context) as argument
        m_tags.prescribes( m_steps, *this );
        // step consumes m_fibs
        m_steps.consumes( m_fibs );
        // step also produces m_fibs
        m_steps.produces( m_fibs );
    }
};

#endif // fib_H_ALREADY_INCLUDED

samples/fib/fib_tuner/fib_tuner.cpp

//********************************************************************************
// Copyright (c) 2010-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************

// compute fibonacci numbers
//

#define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB

// let's use a large type to store fib numbers
typedef unsigned long long fib_type;

#include "cnc/cnc.h"

struct fib_context;

// let's use a tuner to pre-declare dependencies
struct fib_tuner : public CnC::step_tuner<>
{
    template< class dependency_consumer >
    void depends( const int & tag, fib_context & c, dependency_consumer & dC ) const;
};

#include "fib.h"

template< class dependency_consumer >
void fib_tuner::depends( const int & tag, fib_context & c, dependency_consumer & dC ) const
{
    // we have item-dependencies only if tag > 1
    if( tag > 1 ) {
      dC.depends( c.m_fibs, tag - 1 );
      dC.depends( c.m_fibs, tag - 2 );
    }
}

// the actual step code computing the fib numbers goes here
int fib_step::execute( const int & tag, fib_context & ctxt ) const
{
    switch( tag ) {
        case 0 : ctxt.m_fibs.put( tag, 0 ); break;
        case 1 : ctxt.m_fibs.put( tag, 1 ); break;
        default : 
            // get previous 2 results
            fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 );
            fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 );
            // put our result
            ctxt.m_fibs.put( tag, f_1 + f_2 );
    }
    return CnC::CNC_Success;
}

int main( int argc, char* argv[] )
{
    int n = 42;
    // eval command line args
    if( argc < 2 ) {
        std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl;
    } else n = atol( argv[1] );

    // create context
    fib_context ctxt; 

    // show scheduler statistics when done
    CnC::debug::collect_scheduler_statistics( ctxt );

    // put tags to initiate evaluation
    for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i );

    // wait for completion
    ctxt.wait(); 

    // get result
    fib_type res2;
    ctxt.m_fibs.get( n, res2 );

    // print result
    std::cout << "fib (" << n << "): " << res2 << std::endl;

    return 0;
}

samples/fib/fib_preschedule/fib_preschedule.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************

// compute fibonacci numbers
//

#define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB

// let's use a large type to store fib numbers
typedef unsigned long long fib_type;

#include "cnc/cnc.h"

struct fib_context;

// let's use a tuner to pre-declare dependencies
struct fib_tuner : public CnC::step_tuner<>
{
    bool preschedule() const { return true; }
};

#include "fib.h"

// the actual step code computing the fib numbers goes here
int fib_step::execute( const int & tag, fib_context & ctxt ) const
{
    switch( tag ) {
        case 0 : ctxt.m_fibs.put( tag, 0 ); break;
        case 1 : ctxt.m_fibs.put( tag, 1 ); break;
        default : 
            // get previous 2 results
            fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 );
            fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 );
            // ctxt.flush_gets(); // uncomment this line to prohibit pre-scheduling from completing full step-execution
            // put our result
            ctxt.m_fibs.put( tag, f_1 + f_2 );
    }
    return CnC::CNC_Success;
}

int main( int argc, char* argv[] )
{
    int n = 42;
    // eval command line args
    if( argc < 2 ) {
        std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl;
    } else n = atol( argv[1] );

    // create context
    fib_context ctxt; 

    // show scheduler statistics when done
    CnC::debug::collect_scheduler_statistics( ctxt );

    // put tags to initiate evaluation
    for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i );

    // wait for completion
    ctxt.wait(); 

    // get result
    fib_type res2;
    ctxt.m_fibs.get( n, res2 );

    // print result
    std::cout << "fib (" << n << "): " << res2 << std::endl;

    return 0;
}

samples/fib/fib_getcount/fib_getcount.cpp

//********************************************************************************
// Copyright (c) 2010-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************

// compute fibonacci numbers
//

#define _CRT_SECURE_NO_DEPRECATE // to keep the VS compiler happy with TBB

#include <cnc/cnc.h>

// let's use a large type to store fib numbers
typedef unsigned long long fib_type;

struct fib_context;

// let's use a tuner to pre-declare dependencies
struct fib_tuner : public CnC::step_tuner<>
{
    // pre-declare data-dependencies
    template< class dependency_consumer >
    void depends( const int & tag, fib_context & c, dependency_consumer & dC ) const;
};

struct item_tuner : public CnC::hashmap_tuner
{
    // provide number gets to each item
    int get_count( const int & tag ) const;
};

#include "fib.h"

template< class dependency_consumer >
void fib_tuner::depends( const int & tag, fib_context & c, dependency_consumer & dC ) const
{
    // we have item-dependencies only if tag > 1
    if( tag > 1 ) {
      dC.depends( c.m_fibs, tag - 1 );
      dC.depends( c.m_fibs, tag - 2 );
    }
}

int item_tuner::get_count( const int & tag ) const
{
    return tag > 0 ? 2 : 1;
}

// the actual step code computing the fib numbers goes here
int fib_step::execute( const int & tag, fib_context & ctxt ) const
{
    switch( tag ) {
        case 0 : ctxt.m_fibs.put( tag, 0 ); break;
        case 1 : ctxt.m_fibs.put( tag, 1 ); break;
        default : 
            // get previous 2 results
            fib_type f_1; ctxt.m_fibs.get( tag - 1, f_1 );
            fib_type f_2; ctxt.m_fibs.get( tag - 2, f_2 );
            // put our result, and specify that this data item will be
            // get'ed twice (i.e. for the next two fib calculations)
            // before it is destroyed
            ctxt.m_fibs.put( tag, f_1 + f_2 );
    }
    return CnC::CNC_Success;
}

int main( int argc, char* argv[] )
{
    int n = 42;
    // eval command line args
    if( argc < 2 ) {
        std::cerr << "usage: " << argv[0] << " n\nUsing default value " << n << std::endl;
    } else n = atol( argv[1] );

    // create context
    fib_context ctxt; 

    // show scheduler statistics when done
    CnC::debug::collect_scheduler_statistics( ctxt );

    // put tags to initiate evaluation
    for( int i = 0; i <= n; ++i ) ctxt.m_tags.put( i );

    // wait for completion
    ctxt.wait(); 

    // get result
    fib_type res2;
    ctxt.m_fibs.get( n, res2 );

    // print result
    std::cout << "fib (" << n << "): " << res2 << std::endl;

    return 0;
}

samples/fib/fib_getcount/fib.h

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************
#ifndef fib_H_ALREADY_INCLUDED
#define fib_H_ALREADY_INCLUDED

#include <cnc/cnc.h>
#include <cnc/debug.h>

// Forward declaration of the context class (also known as graph)
struct fib_context;

// The step classes

struct fib_step
{
    int execute( const int & t, fib_context & c ) const;
};

// The context class
struct fib_context : public CnC::context< fib_context >
{
    // the step collection for the instances of the compute-kernel
    CnC::step_collection< fib_step, fib_tuner > m_steps;
    // Item collections
    CnC::item_collection< int, fib_type, item_tuner > m_fibs;
    // Tag collections
    CnC::tag_collection< int >            m_tags;

    // The context class constructor
    fib_context()
        : CnC::context< fib_context >(),
          // Initialize each step collection
          m_steps( *this, "fib_step" ),
          // Initialize each tag collection
          m_fibs( *this, "fibs" ),
          // Initialize each item collection
          m_tags( *this, "tags" )
    {
        // prescribe compute steps with this (context) as argument
        m_tags.prescribes( m_steps, *this );
        // step consumes m_fibs
        m_steps.consumes( m_fibs );
        // step also produces m_fibs
        m_steps.produces( m_fibs );
    }
};

#endif // fib_H_ALREADY_INCLUDED

samples/primes/primes_range/primes_range.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <tbb/tick_count.h>
#ifdef _DIST_
#include <cnc/dist_cnc.h>
#include <cnc/internal/dist/distributor.h>
#else
#include <cnc/cnc.h>
#endif


struct my_context;

struct FindPrimes
{
    int execute( int n, my_context & c ) const;
};

struct primes_tuner : public CnC::step_tuner<>
{
    int compute_on( const int & p, my_context & ) const
    {
        return ( 1 + p / ( 1000000 / 20 ) ) % numProcs();//( p * 5 / 3 + 1 ) % 5;
    }
};

struct my_context : public CnC::context< my_context >
{
    CnC::step_collection< FindPrimes, primes_tuner > m_steps;
    CnC::tag_collection< int, CnC::tag_tuner< CnC::Internal::strided_range< int > > > m_tags;
    CnC::item_collection< int,int > m_primes;
    my_context() 
        : CnC::context< my_context >(),
          m_steps( *this ),
          m_tags( *this ),
          m_primes( *this )
    {
        m_tags.prescribes( m_steps, *this );
    }
};


int FindPrimes::execute( int n, my_context & c ) const
{
    int factor = 3;

    while ( n % factor ) factor += 2;
    if (factor == n) c.m_primes.put(n, n);

    return CnC::CNC_Success;
}


int main(int argc, char* argv[])
{
#ifdef _DIST_
    CnC::dist_cnc_init< my_context > dc_init;
#endif
    bool verbose = false;
    int n = 0;
    int number_of_primes = 0;

    if (argc == 2) 
    {
        n = atoi(argv[1]);
    }
    else if (argc == 3 && 0 == strcmp("-v", argv[1]))
    {
        n = atoi(argv[2]);
        verbose = true;
    }
    else
    {
        fprintf(stderr,"Usage: primes [-v] n\n");
        return -1;
    }

    my_context c;

    printf("Determining primes from 1-%d \n",n);

    tbb::tick_count t0 = tbb::tick_count::now();

    c.m_tags.put_range( CnC::Internal::strided_range< int >( 3, n, 2 ) );

    c.wait();

    tbb::tick_count t1 = tbb::tick_count::now();
    // FIXME we have to transfer the items to the host first (distCnC)
    number_of_primes = (int)c.m_primes.size() + 1;
    printf("Found %d primes in %g seconds\n", number_of_primes, (t1-t0).seconds());

    if (verbose)
    {
        printf("%d\n", 2);
        CnC::item_collection<int,int>::const_iterator cii;
        for (cii = c.m_primes.begin(); cii != c.m_primes.end(); cii++) 
        {
            printf("%d\n", cii->first); // kludge
        }
    }
}

samples/primes/primes_parfor/primes_parfor.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <tbb/tick_count.h>
#include <cnc/cnc.h>
#include <cnc/debug.h>

struct my_context;

struct FindPrimes
{
    int operator()( int n ) const;
};

struct my_context : public CnC::context< my_context >
{
    CnC::item_collection< int,int > m_primes;
    my_context() 
        : CnC::context< my_context >(),
          m_primes( *this )
    {
        CnC::debug::collect_scheduler_statistics( *this );
    }
};


my_context g_c;

int FindPrimes::operator()( int n ) const
{
    int factor = 3;

    while ( n % factor ) factor += 2;
    if (factor == n) g_c.m_primes.put(n, n);

    return CnC::CNC_Success;
}


int main(int argc, char* argv[])
{
    bool verbose = false;
    int n = 0;
    int number_of_primes = 0;

    if (argc == 2) 
    {
        n = atoi(argv[1]);
    }
    else if (argc == 3 && 0 == strcmp("-v", argv[1]))
    {
        n = atoi(argv[2]);
        verbose = true;
    }
    else
    {
        fprintf(stderr,"Usage: primes [-v] n\n");
        return -1;
    }


    printf("Determining primes from 1-%d \n",n);

    tbb::tick_count t0 = tbb::tick_count::now();

    CnC::parallel_for( 3, n+1, 2, FindPrimes(), CnC::pfor_tuner< false >() );

    tbb::tick_count t1 = tbb::tick_count::now();

    number_of_primes = (int)g_c.m_primes.size() + 1;
    printf("Found %d primes in %g seconds\n", number_of_primes, (t1-t0).seconds());

    if (verbose)
    {
        printf("%d\n", 2);
        for (CnC::item_collection<int,int>::const_iterator cii = g_c.m_primes.begin(); cii != g_c.m_primes.end(); cii++) 
        {
            printf("%d\n", cii->first); // kludge
        }
    }
}

samples/blackscholes/blackscholes/blackscholes.h

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************
#ifndef blackscholes_H_ALREADY_INCLUDED
#define blackscholes_H_ALREADY_INCLUDED

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <vector>
#include <memory>

typedef std::shared_ptr< std::vector< OptionData > > option_vector_type;
typedef std::shared_ptr< std::vector< fptype > > price_vector_type;

CNC_BITWISE_SERIALIZABLE( OptionData );

// Forward declaration of the context class (also known as graph)
struct blackscholes_context;

// The step classes
struct Compute
{
    int execute( const int &, blackscholes_context & ) const;
};

struct bs_tuner : public CnC::step_tuner<>, public CnC::vector_tuner
{
    int compute_on( const int tag ) const {
        return tag % numProcs();
    }
    int compute_on( const int tag, blackscholes_context & ) const {
        return compute_on( tag );
    }
    int consumed_by( const int tag ) const {
        return tag;
    }
    int consumed_on( const int tag ) const {
        return compute_on( consumed_by( tag ) );
    }
    int getcount( const int tag ) {
        return 1;
    }
};

// The context class
struct blackscholes_context : public CnC::context< blackscholes_context >
{
    // Step collections
    CnC::step_collection< Compute, bs_tuner > compute;

    // Item collections
    CnC::item_collection< int, option_vector_type, bs_tuner > opt_data;
    CnC::item_collection< int, price_vector_type, bs_tuner >  prices;

    // Tag collections
    CnC::tag_collection< int > tags;

    int m_vs;
    // The context class constructor
    blackscholes_context( int vs = 0)
        : CnC::context< blackscholes_context >(),
      // Initialize each step collection
          compute( *this ),
          // Initialize each item collection
          opt_data( *this ),
          prices( *this ),
          // Initialize each tag collection
          tags( *this ),
          m_vs( vs )
    {
        // Prescriptive relations
        tags.prescribes( compute, *this );
        compute.produces( prices );
        opt_data.set_max( m_vs );
        prices.set_max( m_vs );
    }
#ifdef _DIST_
    void serialize( CnC::serializer & ser )
    {
        ser & m_vs;
        if( ser.is_unpacking() ) {
            opt_data.set_max( m_vs );
            prices.set_max( m_vs );
        }
            
    }
#endif
};

#endif // blackscholes_H_ALREADY_INCLUDED

samples/cholesky/cholesky/cholesky.h

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************
#ifndef cholesky_H_ALREADY_INCLUDED
#define cholesky_H_ALREADY_INCLUDED

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <memory>

// Forward declaration of the context class (also known as graph)
struct cholesky_context;

// The step classes

struct S1_compute
{
    int execute( const int & t, cholesky_context & c ) const;
};

struct S2_compute
{
    int execute( const pair & t, cholesky_context & c ) const;
};

struct S3_compute
{
    int execute( const triple & t, cholesky_context & c ) const;
};

///////////////

static void mark( int p, bool * d )
{
    if( d[p] == false ) {
        d[p] = true;
    }
}

struct cholesky_tuner : public CnC::step_tuner<>, public CnC::hashmap_tuner
{
    cholesky_tuner( cholesky_context & c, int p = 0, int n = 0, dist_type dt = BLOCKED_ROWS )
        : m_context( c ), m_p( p ), m_n( n ),
          m_div( (((p*p) / 2) + 1) / numProcs() ),
          //          m_div( p ? (((p*(p+1))/2)+((2*numProcs())/2))/(2*numProcs()) : 0 ),
          m_dt( dt )
    {
        if( myPid() == 0 ) {
            switch( dt ) {
            default:
            case BLOCKED_ROWS :
                std::cerr << "Distributing BLOCKED_ROWS\n";
                break;
            case ROW_CYCLIC :
                std::cerr << "Distributing ROW_CYCLIC\n";
                break;
            case COLUMN_CYCLIC :
                std::cerr << "Distributing COLUMN_CYCLIC\n";
                break;
            case BLOCKED_CYCLIC :
                std::cerr << "Distributing BLOCKED_CYCLICS\n";
                break;
            }
        }
    }

    inline static int compute_on( const dist_type dt, const int i, const int j, const int n, const int s )
    {
        switch( dt ) {
        default:
        case BLOCKED_ROWS :
            return ( ((j*j)/2 + 1 + i ) / s ) % numProcs();
            break;
        case ROW_CYCLIC :
            return j % numProcs();
            break;
        case COLUMN_CYCLIC :
            return i % numProcs();
            break;
        case BLOCKED_CYCLIC :
            return ( (i/2) * n + (j/2) ) % numProcs();
            break;
        }
    }

    // step-bits
    int compute_on( const int tag, cholesky_context & /*arg*/ ) const
    {
        return compute_on( m_dt, tag, tag, m_n, m_div );
    }
    
    int compute_on( const pair & tag, cholesky_context & /*arg*/ ) const
    {
        return compute_on( m_dt, tag.first, tag.second, m_p, m_div );
    }
    
    int compute_on( const triple & tag, cholesky_context & /*arg*/ ) const
    {
        return compute_on( m_dt, tag[2], tag[1], m_p, m_div );
    }
    
    // item-bits
    typedef triple tag_type;

    int get_count( const tag_type & tag ) const
    {
        int _k = tag[0], _i = tag[2];
        if( _k == _i+1 ) return CnC::NO_GETCOUNT; // that's our result
        return ( _k > 0 && _k > _i ) ? ( m_p - _k ) : 1;
    }

    // First we determine which *steps* are going to consume this item.
    // Then we use compute_on to determine the *processes* to which the item needs to go.
    // Mostly the two steps are formulated in one line.
    // We avoid duplicate entries by using a helper mechanism "mark"
    std::vector< int > consumed_on( const tag_type & tag  ) const
    {
        int _k = tag[0], _j = tag[1], _i = tag[2];

        if( _i == _j ) { // on diagonal
            if( _i == _k ) return std::vector< int >( 1, compute_on( _k, m_context ) ); // S1 only
            if( _k == m_p ) return std::vector< int >( 1, 0 ); // the end
        }
        if( _i == _k ) return std::vector< int >( 1, compute_on( pair( _k, _j ), m_context ) ); // S2 only

        bool * _d;
        _d = new bool[numProcs()];
        memset( _d, 0, numProcs() * sizeof( *_d ) );

        if( _i == _k-1 ) {
            if( _i == _j ) { // on diagonal on S2
                for( int j = _k; j < m_p; ++j ) {
                    mark( compute_on( pair( _k - 1, j ), m_context ), _d );
                }
            } else { // S3
                for( int j = _j; j < m_p; ++j ) {
                    for( int i = _k; i <= _j; ++i ) {
                        mark( compute_on( triple( _k-1, j, i ), m_context ), _d );
                    }
                }
            }
        }
        mark( compute_on( triple( _k, _j, _i ), m_context ), _d );
        std::vector< int > _v;
        _v.reserve( numProcs()/2 );
        if( _d[myPid()] ) _v.push_back( myPid() );
        for( int i = 0; i < numProcs(); ++i ) {
            if( _d[i] && i != myPid() ) _v.push_back( i );
        }
        delete [] _d;
        return _v;
    }

#ifdef _DIST_
    void serialize( CnC::serializer & ser )
    {
        ser & m_p & m_div & m_n & m_dt;
    }
#endif
private:
    cholesky_context & m_context;
    int m_p;
    int m_n;
    int m_div;
    dist_type m_dt;
};

// The context class
struct cholesky_context : public CnC::context< cholesky_context >
{
    // tuners
    cholesky_tuner tuner;

    // Step Collections
    CnC::step_collection< S1_compute, cholesky_tuner > sc_s1_compute;
    CnC::step_collection< S2_compute, cholesky_tuner > sc_s2_compute;
    CnC::step_collection< S3_compute, cholesky_tuner > sc_s3_compute;

    // Item collections
    CnC::item_collection< triple, tile_const_ptr_type, cholesky_tuner > Lkji;
    int p,b;

    // Tag collections
    CnC::tag_collection< int > control_S1;
    CnC::tag_collection< pair > control_S2;
    CnC::tag_collection< triple > control_S3;

    // The context class constructor
    cholesky_context( int _b = 0, int _p = 0, int _n = 0, dist_type dt = BLOCKED_ROWS )
        : CnC::context< cholesky_context >(),
          // init tuners
          tuner( *this, _p, _n, dt ),
          // init step colls
          sc_s1_compute( *this, tuner, "Cholesky" ),
          sc_s2_compute( *this, tuner, "Trisolve" ),
          sc_s3_compute( *this, tuner, "Update" ),
          // Initialize each item collection
          Lkji( *this, "Lkji", tuner ),
          p( _p ),
          b( _b ),
          // Initialize each tag collection
          control_S1( *this, "S1" ),
          control_S2( *this, "S2" ),
          control_S3( *this, "S3" )
    {
        // Prescriptive relations
        control_S1.prescribes( sc_s1_compute, *this );
        control_S2.prescribes( sc_s2_compute, *this );
        control_S3.prescribes( sc_s3_compute, *this );

        // producer/consumer relations
        sc_s1_compute.consumes( Lkji );
        sc_s1_compute.produces( Lkji );
        sc_s2_compute.consumes( Lkji );
        sc_s2_compute.produces( Lkji );
        sc_s3_compute.consumes( Lkji );
        sc_s3_compute.produces( Lkji );
#if 0
        CnC::debug::trace_all( *this );
#endif
    }

#ifdef _DIST_
    void serialize( CnC::serializer & ser )
    {
        ser & p & b & tuner;
    }
#endif
};

#endif // cholesky_H_ALREADY_INCLUDED

samples/count_words/count_words/count_words.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <cnc/reduce.h>
#include <cassert>
#include <string>
#include <sstream>
#include <fstream>

// Count the number of occurances of given strings in a file.  An
// example of using a pre-defined CnC::graph, e.g. a reduction.
//
// The file is read line by line (on the host).  A computation unit is
// tagged by the pair (line-id/block, search-string).  Such a
// computation gets the line/block to search in.  It looks for the
// string and puts the number of matches into a collection. The
// collection is part of a reduction.
//
// The input to the reduction is counts_per_block with the matched
// string as key/above pair as its tag. The selector (a lambda!) maps
// the input-pair to the string that is matched. The reduction also
// takes "counts" which provides the number of participating elements
// per reduction (for each string one reduction!).  The reduction's
// output is the overall count per string.
//
// This is not optimized in any way. To make this fast, we need to
// adjust the block size and read in parallel. Additionally,
// std::strings are used in a very memory-unfriendly and
// copy-intensive manner.
//
// On distributed memory an intelligent distribution plan
// would probably be helpful.

typedef std::pair< int, std::string > tag_type;

struct cw_context;

struct counter
{
    int execute( const tag_type & t, cw_context & ctxt ) const;
};

struct cw_context : public CnC::context< cw_context >
{
    // the prescribing tags
    CnC::tag_collection< tag_type > tags;
    // holds the lines/blocks of the file(s)
    CnC::item_collection< int, std::string > blocks;
    // for each pair(block,string) we put the number of occurances of string found in block
    CnC::item_collection< tag_type, size_type > counts_per_block;
    // the final number of occurances per string
    CnC::item_collection< std::string, size_type > counts;
    // the number of items (e.g. counts per string) participating in a reduction
    CnC::item_collection< std::string, size_type > red_counts;
    CnC::step_collection< counter > steps;
    // our reduction is provided as a graph, see constructor for its instantiation&wiring
    CnC::graph * reduce;
    
    cw_context()
        : tags( *this, "tags" ),
          blocks( *this, "blocks" ),
          counts_per_block( *this, "counts_per_block" ),
          counts( *this, "counts" ),
          red_counts( *this, "red_counts" ),
          steps( *this, "counter" ),
          reduce( NULL )
    {
        // here we wire the graph/reduction
        reduce = CnC::make_reduce_graph( *this,            // context
                                         "reduce",         // name
                                         counts_per_block, // input collection
                                         red_counts,       // number of items per reduction
                                         counts,           // the final result for each reduction
                                         std::plus<size_type>(), // the reduction operation
                                         size_type(0),     // identity element
                                         // we use a lambda as the selector
                                         // it maps the item to the reduction identified by t.second (the string)
                                         // e.g. it reduces over all blocks
                                         []( const tag_type & t, std::string & _s )->bool{_s=t.second;return true;} );
        tags.prescribes( steps, *this );
        steps.consumes( blocks );
        steps.produces( counts_per_block );
        //CnC::debug::trace( *reduce, 3 );
    }
    
    ~cw_context()
    {
        delete reduce;
    }
};

// get block/line (tag.first) and determine number of occurances of string (tag.second)
// put result into counts_per_block
int counter::execute( const tag_type & t, cw_context & ctxt ) const
{
    std::string _str;
    ctxt.blocks.get( t.first, _str );
    const size_type _sz = _str.size();
    size_type _pos = -1;
    size_type _cnt = 0;
    while( ( _pos = _str.find( t.second, _pos+1 ) ) < _sz ) ++_cnt;
    // always to results, even if 0
    // this allows us to determine the number of reduced items in main
    ctxt.counts_per_block.put( t, _cnt );
    return 0;
}

int main( int argc, char * argv[])
{
#ifdef _DIST_
    CnC::dist_cnc_init< cw_context > _dc;
#endif
    
    if( argc < 3 ) {
        std::cerr << "expected arguments: <file> <word1> [<word2>...]\n";
            exit(1);
    }
    // create the context
    cw_context ctxt;
    std::ifstream _file( argv[1] );
    std::string _block;
    int _id = 0;
    // read line line
    while( std::getline( _file, _block ) ) {
        ctxt.blocks.put( _id, _block );
        int i = 1;
        // for each line, put control for each searched string
        while( ++i < argc ) {
            ctxt.tags.put( std::make_pair( _id, argv[i] ) );
        }
        ++_id;
    }

    // reduction needs number of reduced items
    // as we always put a number in the step, we know its #blocks
    // _id holds #blocks
    int i = 1;
    while( ++i < argc ) ctxt.red_counts.put( argv[i], _id );

    ctxt.wait();

    std::cout << "done" << std::endl;

    std::cout << ctxt.counts.size() << " " << ctxt.blocks.size() << std::endl;
    // iterate and write out all keys/value pairs
    for( auto i = ctxt.counts.begin(); i != ctxt.counts.end(); ++i ) {
        std::cout << i->first << " \t " << *i->second << std::endl;
    }
    ctxt.wait();
    std::cout << ctxt.counts.size() << " " << ctxt.blocks.size() << std::endl;

    return 0;
}

samples/count_all_words/count_all_words/count_all_words.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <cnc/reduce.h>
#include <cassert>
#include <string>
#include <sstream>
#include <algorithm>
#include <fstream>

// Count the number of occurances of all words in a file.  An example
// of using a pre-defined CnC::graph, e.g. a reduction.
//
// The file is read line by line (on the host).  A computation unit is
// tagged by the line-id/block.  Such a computation gets the
// line/block to search in.  It reads word by word and puts an item
// into an item-collection "counts_per_block". To make the tag unique,
// its a triple (block/line, word#, word) [case-insensitive].  The
// data itself is just a dummy.  The collection is part of a reduction
// which will finally compute the number of occurances per word.
//
// The input to the reduction is counts_per_block. The selector (a
// lambda!) maps the input-triple to the word that is matched. The
// reduction also takes "counts". As we don't know which words we will
// find and count, there is no way to know the number of participating
// items per reduction.  Instead we have to indicate when all lines
// have been processed.  After a first call to wait, we know no more
// computation is done, we then call reduction::flush to indicate that
// no more input arrives.  A second wait() makes sure that the
// reduction completes before we access the result.  The reduction's
// output is the overall count for ever word in the file.
//
// As an opimization, we assign a get-count of 0 to all words we put.
// In this example the words are used for nothing else than the
// reduction.  With a get-couhjnt of 0 the items will be passed to the
// reduction but never really stored.
//
// This is not optimized in any way. To make this fast, we need to
// adjust the block size and read in parallel. Additionally,
// std::strings are used in a very memory-unfriendly and
// copy-intensive manner.
//
// On distributed memory an intelligent distribution plan would
// probably be helpful.

typedef long long int id_type;
typedef std::pair< id_type, const std::string > tag_type;

struct cw_context;

struct counter
{
    int execute( const int & t, cw_context & ctxt ) const;
};

struct gc0_tuner : public CnC::hashmap_tuner
{
    template< typename Tag >
    int get_count( const Tag & ) const
    {
        return 0;
    }
};

struct cw_context : public CnC::context< cw_context >
{
    // prescribes our computation
    CnC::tag_collection< int > tags;
    // one input line per item
    CnC::item_collection< int, std::string > blocks;
    // every word instance we find we put in here with a unique tag
    CnC::item_collection< tag_type, size_type, gc0_tuner > counts_per_block;
    // we don't really need this, we use flush
    CnC::item_collection< std::string, int > cnt;
    // the resulting count per word
    CnC::item_collection< std::string, size_type > counts;
    // extracing words
    CnC::step_collection< counter > steps;
    // the reduction sub-graph
    CnC::graph * reduce;
    
    cw_context()
        : tags( *this, "tags" ),
          blocks( *this, "blocks" ),
          counts_per_block( *this, "counts_per_block" ),
          cnt( *this, "cnt" ),
          counts( *this, "counts" ),
          steps( *this, "counter" ),
          reduce( NULL )
    {
        reduce = CnC::make_reduce_graph( *this, "reduce",
                                         counts_per_block, //input collection
                                         cnt,              // counts per reduce, not used
                                         counts,           // result
                                         std::plus<size_type>(), // reduce operation
                                         size_type(0),     // identity element
                                         // we use a lambda as the selector
                                         // it maps the item to the reduction identified by t.second (the string)
                                         // e.g. it reduces over all blocks
                                         []( const tag_type & t, std::string& _s )->bool{_s=t.second;return true;} );
        tags.prescribes( steps, *this );
        steps.consumes( blocks );
        steps.produces( counts_per_block );
        //        CnC::debug::trace_all( *this );
        //CnC::debug::trace( steps );
        //        CnC::debug::trace( *reduce, 2 );
    }
    
    ~cw_context()
    {
        delete reduce;
    }
};

// get the line, read word by word and put each instance
// with unique tag into counts_per_block, which is the
// input collection of the reduction
int counter::execute( const int & t, cw_context & ctxt ) const
{
    std::string _str;
    ctxt.blocks.get( t, _str );
    std::istringstream iss( _str );
    std::string _word;
    id_type _id = (t << 16);
    while( iss >> _word ) {
        // let's a do a case-insensitive matching
        std::transform(_word.begin(), _word.end(), _word.begin(), ::tolower);
        ctxt.counts_per_block.put( tag_type(_id, _word), 1 );
        ++_id;
    }
    return 0;
}

int main( int argc, char * argv[])
{
#ifdef _DIST_
    CnC::dist_cnc_init< cw_context > _dc;
#endif
    
    if( argc < 2 ) {
        std::cerr << "expected arguments: <file>\n";
            exit(1);
    }
    // create the context
    cw_context ctxt;
    std::ifstream _file( argv[1] );
    std::string _block;
    int _id = 0;

    tbb::tick_count startTime = tbb::tick_count::now();
    
    // read line line
    while( std::getline( _file, _block ) ) {
        // put each line
        ctxt.blocks.put( _id, _block );
        // and control to start searching/matching
        ctxt.tags.put( _id );
        ++_id;
    }
    // A reduction needs number of reduced items.
    // We do not know the count in advance.
    // We do not even know which words are going to show up.
    // We have to wait until processing is done
    // and then tell the reduction it can do the final pass
    ctxt.wait();
    ctxt.reduce->flush();
    ctxt.wait();

    tbb::tick_count endTime = tbb::tick_count::now();
    
    std::cout << ctxt.counts.size() << std::endl;
    // iterate and write out all keys/value pairs with counts > 999
    for( auto i = ctxt.counts.begin(); i != ctxt.counts.end(); ++i ) {
        if( *i->second > 999 )
            std::cout << i->first << " \t " << *i->second << std::endl;
    }

    std::cout << "Time: " << (endTime-startTime).seconds() << "s.\n";
    return 0;
}

samples/count_all_words/mapreduce/mapreduce.h

// *******************************************************************************
// Copyright (c) 2013-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
// *******************************************************************************

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <cnc/reduce.h>
#include <cassert>
#include <string>
#include <sstream>
#include <algorithm>
#include <fstream>
#include <cwctype>

/**
   Map-Reduce for CnC.
   
   The mapreduce_context provides a functionality similar to hadoop/mapreduce.
   At instantiation time it accepts the map- and reduce operations.
   Input data is accepted by putting files into its read_data collection.
   A call to wait() makes sure all mapreduce operations are done.
   Output data is stored in the "results" collection and can then be accessed
   through get-calls or iterators.

   Like in common mapreduce frameworks the map operation works on streams - not
   on CnC entities. Also like in mapreduce frameworks the result of the map
   is stored in a collection (of course here we use a CnC item collection).

   It supports an easy way to distribute computation on distributed memory.
   By appending':<processid>' to the input filenames it will execute the corresponding
   map operation on the given process '<processid>'. The reduction part is 
   automatically distributed.

   Internally we use a step for reading files, a step for mapping and
   a reduction graph for the reduce part.

   \todo write a driver for HPFS which provides the distribution information (appending':<processid>')
   \todo what about making better use of blocks (not entire files)?
   \todo what about making this a CnC::graph?
**/
template< typename MapOp, typename ReduceOp >
struct mapreduce_context : public CnC::context< mapreduce_context< MapOp, ReduceOp > >
{
    typedef std::pair< const std::string, long long int > map_tag_type;
    typedef typename MapOp::result_type result_type;
    typedef typename MapOp::key_type key_type;
    typedef std::string char_ptr_type;

    // the step which executes the map operation
    struct mapper
    {
        int execute( const map_tag_type & t, mapreduce_context< MapOp, ReduceOp > & ctxt ) const;
    };
    
    // the step which reads individual files
    struct reader 
    {
        int execute( const std::string & file, mapreduce_context< MapOp, ReduceOp > & ctxt ) const;
    };
    
    // a generic tuner providing constant get-count
    // and executes locally (e.g. whereever the step gets prescribed)
    template< int GC >
    struct gc_tuner : public CnC::hashmap_tuner
    {
        template< typename Tag >
        int get_count( const Tag & ) const
        {
            return GC;
        }
        template< typename Tag >
        int consumed_on( const Tag & ) const
        {
            return CnC::CONSUMER_LOCAL;
        }
        template< typename Tag >
        int produced_on( const Tag & ) const
        {
            return CnC::PRODUCER_LOCAL;
        }
    };

    // tuner for reading files, distributes file-reads
    struct read_tuner : public CnC::step_tuner< false >
    {
        // we analyze the input string and search for ':<pid>' prefix.
        // if found, the corresponding step will be executed on process <pid>.
        // Otherwise we distribute randomly.
        int compute_on( const std::string & file, mapreduce_context< MapOp, ReduceOp > & ) const
        {
            auto _col = file.rfind( ':' );
            if( _col == std::string::npos ) {
                cnc_hash< std::string > _h;
                int _x = _h( file ) % CnC::tuner_base::numProcs();
                return _x;
            }
            // manually convert to int (faster)
            const char * _p = file.c_str() + _col + 1;
            int _x = 0;
            while( *_p >= '0' && *_p <= '9' ) {
                _x = ( _x * 10 ) + ( *_p - '0' );
                ++_p;
            }
            return _x % CnC::tuner_base::numProcs();
        }
    };

    // the mapping stays local
    struct map_tuner : public CnC::step_tuner< false >
    {
        int compute_on( const map_tag_type &, mapreduce_context< MapOp, ReduceOp > & ) const
        {
            return CnC::COMPUTE_ON_LOCAL;
        }
    };

    // prescribes reading files
    CnC::tag_collection< std::string > read_tags = { *this, "read_tags" }; // need recent g++ or icpc
    // prescribes our computation
    CnC::tag_collection< map_tag_type > map_tags = { *this, "map_tags" }; // need recent g++ or icpc
    // one input line per item
    CnC::item_collection< map_tag_type, char_ptr_type, gc_tuner< 1 > > blocks = { *this, "blocks" }; // need recent g++ or icpc
    // every word instance we find we put in here with a unique tag
    CnC::item_collection< std::string, result_type, gc_tuner< 0 > > map_out = { *this, "map_out" }; // need recent g++ or icpc
    // we don't really need this, we use flush
    CnC::item_collection< key_type, int > cnt = { *this, "cnt" }; // need recent g++ or icpc
    // the results
    CnC::item_collection< key_type, result_type/*, OTuner*/ > results = { *this, "results" }; // need recent g++ or icpc
    // reading files
    CnC::step_collection< reader, read_tuner > read_steps = { *this, "reader" }; // need recent g++ or icpc
    // extracing words
    CnC::step_collection< mapper, map_tuner > map_steps = { *this, "mapper" }; // need recent g++ or icpc
    // the reduction sub-graph
    CnC::graph * reduce;
    // our private instance/copy of the map operation
    MapOp m_mapOp;
    // our private instance/copy of the reduce operation
    ReduceOp m_reduceOp;
    
    // we need a default constructor on distributed memory
    // the combination with serialize must match what the parametrized constructor does.
    mapreduce_context()
        : reduce( NULL ),
          m_mapOp(),
          m_reduceOp()
    {
        read_tags.prescribes( read_steps, *this );
        map_tags.prescribes( map_steps, *this );
        read_steps.produces( blocks );
        map_steps.consumes( blocks );
        map_steps.produces( map_out );
    }

    // this is the cosntructor the user actually sees/uses:
    // it provides the map- and reduce operations
    mapreduce_context( const MapOp & mo, const ReduceOp & ro )
        : mapreduce_context< MapOp, ReduceOp >()
    {
        m_mapOp = mo;
        m_reduceOp = ro;
        init_reduce();
    }
    
    // initing the reduce graph
    // called by serialize (unpack) and the programmer-used constructor
    void init_reduce() 
    {
        delete reduce;
        reduce = CnC::make_reduce_graph( *this, "reduce",
                                         map_out, //input collection
                                         cnt,           // counts per reduce, not used
                                         results,       // result
                                         m_reduceOp,    // reduce operation
                                         size_type(0),  // identity element
                                         // we use a lambda as the selector
                                         // it maps the item to the reduction identified by t.second (the string)
                                         // e.g. it reduces over all blocks
                                         []( const std::string & t, std::string& _s )->bool{_s=t;return true;} );
    }

    ~mapreduce_context()
    {
        delete reduce;
    }

    // we overwrite the normal wait to make it even easier
    // in some cases this might not be desired, but we don't care for now.
    void wait() 
    {
        // A reduction needs number of reduced items.
        // We do not know the count in advance.
        // We do not even know which words are going to show up.
        // We have to wait until processing is done
        // and then tell the reduction it can do the final pass.
        CnC::context< mapreduce_context< MapOp, ReduceOp > >::wait();
        reduce->flush();
        CnC::context< mapreduce_context< MapOp, ReduceOp > >::wait();
    }

#ifdef _DIST_
    void serialize( CnC::serializer & ser )
    {
        ser & m_mapOp & m_reduceOp;
        if( ser.is_unpacking() ) init_reduce();
    }
#endif
};

// function templates are easier to use the template classes
// let's define a function which returns the right context type by
// just providing the operations.
template< typename MapOp, typename ReduceOp >
mapreduce_context< MapOp, ReduceOp > * make_mapreduce( const MapOp & mo, const ReduceOp & ro )
{
    return new mapreduce_context< MapOp, ReduceOp >( mo, ro );
}

// get the line, read word by word and put each instance
// with unique tag into map_out, which is the
// input collection of the reduction
template< typename MapOp, typename ReduceOp >
int mapreduce_context< MapOp, ReduceOp >::mapper::execute( const map_tag_type & t, mapreduce_context< MapOp, ReduceOp > & ctxt ) const
{
    char_ptr_type _str;
    ctxt.blocks.get( t, _str );
    std::istringstream iss( _str );
    ctxt.m_mapOp( iss, ctxt.map_out );
    return 0;
}

// read the given file and trigger map operations
template< typename MapOp, typename ReduceOp >
int mapreduce_context< MapOp, ReduceOp >::reader::execute( const std::string & file, mapreduce_context< MapOp, ReduceOp > & ctxt ) const
{
    const int MIN_BLK_SZ = 1024;

    std::string _fn( file );
    auto _col = file.rfind( ':' );
    if( _col != std::string::npos ) _fn.resize( _col );
    std::ifstream _file( _fn );
    if( ! _file ) {
        std::cerr << "Could not open file " << _fn << std::endl;
        return CnC::CNC_Failure;
    }

    long long int _id = 0;
    int _first = 0;
    char_ptr_type _block; //( new char[MIN_BLK_SZ], std::default_delete<char[]>() );
    _block.resize( MIN_BLK_SZ );
    // we read blocks of data
    // as a block might be in the middle of a word, we need some extra stuff
    // e.g. we find the last whitespace to end the block
    //      and copy the rest to the next block
    do {
        char * _b = const_cast< char * >( _block.c_str() );
        _file.read( _b + _first, MIN_BLK_SZ );
        int _cnt = _first + _file.gcount();
        _block.resize( _cnt );
        if( _cnt > 0 ) {
            // we'll miss the last word if the file doesn't end with a whitespace.
            auto _last = _block.find_last_of( " \t\n" );
            _first = _cnt - _last - 1;
            std::string _tmp;
            if( _file ) {
                _tmp.resize( MIN_BLK_SZ + _first );
                if( _first > 0 ) _tmp.replace( 0, _first, _block, _last + 1, _first );
                //                _tmp.resize( MIN_BLK_SZ + _first );
            }
            _block.resize( _last );
            auto _btag = std::make_pair( file, _id );
            // put each line
            ctxt.blocks.put( _btag, _block );
            // and control to start searching/matching
            ctxt.map_tags.put( _btag );
            _block = _tmp;//.reset( _tmp, std::default_delete<char[]>() );
            ++_id;
        }
    } while( _file );

    return 0;
}

samples/count_all_words/mapreduce/mapreduce.cpp

// *******************************************************************************
// Copyright (c) 2013-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
// *******************************************************************************

#include "mapreduce.h"

/*
  Count the number of occurances of all words in a file.  An example
  of using mapreduce_context.
  It's bascially the same functionality as what's done in count_all_words
  but using the mapreduce abstraction.
  
  Please see mapreduce.h and count_all_words.cpp for details.
*/

// we use std::plus as the reduce operation
namespace std {
    template< typename T >
    inline CnC::bitwise_serializable serializer_category( const std::plus< T >  * ) {
        return CnC::bitwise_serializable();
    }
}

// our map function (functor)
// reads given stream word by word and counts all occuring words
// by simply putting 1 for each word
struct count
{
    typedef std::string key_type;
    typedef size_type result_type;

    template< typename IStream, typename OutCollection >
    inline void operator()( IStream & iss, OutCollection & out ) const
    {
        std::string _word;
        while( iss >> _word ) {
            // let's a do a case-insensitive matching
            std::transform(_word.begin(), _word.end(), _word.begin(), ::tolower);
            out.put( _word, 1 );
        }
    }
};
CNC_BITWISE_SERIALIZABLE( count );

int main( int argc, char * argv[])
{
    typedef mapreduce_context< count, std::plus<size_type> > caw_context;
#ifdef _DIST_
    CnC::dist_cnc_init< caw_context > _dc;
#endif
    
    if( argc < 2 ) {
        std::cerr << "expected arguments: <file1>[ <file2> ...]\n";
            exit(1);
    }
    // create the context
    caw_context * mapred = make_mapreduce( count(), std::plus<size_type>() );

    tbb::tick_count startTime = tbb::tick_count::now();

    for( int i = 1; i<argc; ++i ) {
        mapred->read_tags.put( argv[i] );
        //        reader_execute( argv[i], mapred );
    }
    mapred->wait();

    tbb::tick_count endTime = tbb::tick_count::now();

    std::cout << mapred->results.size() << std::endl;
    // iterate and write out all keys/value pairs with counts > 999
    for( auto i = mapred->results.begin(); i != mapred->results.end(); ++i ) {
        //        if( *i->second > 999 )
            std::cout << i->first << " \t " << *i->second << std::endl;
    }

    delete mapred;

    std::cout << "Time: " << (endTime-startTime).seconds() << "s.\n";

    return 0;
}

samples/reduce_2d/reduce_2d/reduce_2d.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <cnc/reduce.h>
#include <cassert>

/*
  Demo for connecting 2 dependent graphs (e.g. reductions).
  The input to the program are values ina  2d-space (a).
  The output is the sum of all values (sum2).
  The first reduction computes the sum for each row (sum1)
  The second takes this output and sums it to the final value (sum2).
  There is no barrier and only a single wait
  (ignore whats enclosed in #ifdef _CNC_TESTING_ ... #endif).
  We just use sum1 as the output for the first reduction (red1)
  and also as the input to the second (red2).
  Evaluation executes fully asynchronously.

  Additionally, the selector ignores certain values when summing the rows.

  Last but not least we also increase the stress on the runtime by providing
  the counts per reduction at different stages (early, mid, late).
 */

// size of "matrix"
const int N = 32;
// threashold for selecting candidates for row-sum
const int MX = 24;
// describes our 2d space
typedef std::pair< int, int > tag_type;


// selector performing different selections dependening on reduction/tag
struct selector
{
    // functor, assign second as reduction-id
    // return false if first >= MX (we only reduce items with tag->first < MX)
    template< typename T, typename S, typename R >
    bool operator()( const std::pair< T, S > & t, R & r ) const {
        if( t.first >= MX ) return false;
        r = t.second;
        return true;
    }

    // here we select all and return singleton 0
    template< typename T, typename R >
    bool operator()( const T & t, R & r ) const {
        r = 0;
        return true;
    }
};


// the step just puts the value to the "matrix"
// in real life this might first do some meaningful computation
struct putter
{
    // simply puts 1 with same tag to item-coll
    template< typename T, typename C >
    int execute( const T & t, C & ctxt ) const
    {
        ctxt.a.put( t, 1 );
        // in some cases we put the count here (just testing)
        if( t.first >= MX && t.first == t.second ) ctxt.cnt1.put( t.first, MX );
        return 0;
    }
};

struct reduce_context : public CnC::context< reduce_context >
{
    CnC::tag_collection< tag_type > tags;         // control tags for our step
    CnC::item_collection< tag_type, int > a;      // the matrix
    CnC::item_collection< int, int > cnt1, cnt2;  // provides number of participating items per reduction
    CnC::item_collection< int, int > sum1, sum2;  // outputs of reductions
    CnC::step_collection< putter > sc;            // the steps
    CnC::graph * red1, * red2;                    // out 2 sub-graphs: 2 reductions

    reduce_context()
        : tags( *this, "tags" ),
          a( *this, "a" ),
          cnt1( *this, "cnt1" ),
          cnt2( *this, "cnt2" ),
          sum1( *this, "sum1" ),
          sum2( *this, "sum2" ),
          sc( *this, "step" ),
          red1( NULL ),
          red2( NULL )
    {
        tags.prescribes( sc, *this );
        sc.produces( a );
        // first reduction inputs a and outputs sum1
        red1 = CnC::make_reduce_graph( *this, "red_row",
                                       a,            // input collection: our matrix
                                       cnt1,         // count of items per reduction
                                       sum1,         // output collection: sum per row
                                       std::plus<int>(),  // reduction operation '+'
                                       0,            //identity element
                                       selector() ); // selector
        // second reduction inputs sum1 and outputs sum2
        red2 = CnC::make_reduce_graph( *this, "red_red",
                                       sum1,         // the input collection is the output of red1
                                       cnt2,         // count of items per reduction
                                       sum2,         // the final output: sum of all values
                                       std::plus<int>(),  // reduction operation '+'
                                       0,            //identity element
                                       selector() ); // selector
#ifdef _CNC_TESTING_
        CnC::debug::trace( sum1, 1 );
        CnC::debug::trace( *red1, 2 );
        CnC::debug::trace( *red2, 2 );
#endif
    }

    ~reduce_context()
    {
        delete red1;
    }
};

int main()
{
#ifdef _DIST_
    CnC::dist_cnc_init< reduce_context > _dc;
#endif
    // create the context
    reduce_context ctxt;
    // put the count for some reduction before eveything else
    for( int i = 2; i < MX; ++i ) ctxt.cnt1.put( i, MX );
    // put control tags
    for( int i = 0; i < N; ++i ) {
        for( int j = N-1; j >= 0; --j ) ctxt.tags.put( tag_type( i, j ) );
    }

#ifdef _CNC_TESTING_
    // wait for all reductions except 3
    ctxt.wait();
#endif

    ctxt.cnt1.put( 0, MX );
    ctxt.cnt2.put(0,N);

#ifdef _CNC_TESTING_
    // wait for all reductions except one to complete
    ctxt.wait();
#endif
    // put the count for one reduction after everything else
    ctxt.cnt1.put( 1, MX );
    // wait for safe state
    ctxt.wait();

    // iterate, check and write out all keys/value pairs
    auto _sz = ctxt.sum1.size();
    if( _sz != N ) {
        std::cerr << "#reductions is " << _sz << ", expected " << N << std::endl;
        return 1239;
    }
    for( auto i = ctxt.sum1.begin(); i != ctxt.sum1.end(); ++i ) {
        if( *i->second != MX ) {
            std::cerr << "cnt1 of " << i->first << " is " << *i->second << ", expected " << MX << std::endl;
            return 1234;
        }
    }
    int n = 0;
    ctxt.sum2.get(0, n );
    if( n != N*MX ) {
        std::cerr << "reduce count is " << n << ", expected " << N*MX << std::endl;
        return 1236;
    }
    std::cout << "Success with " << n << std::endl;

    return 0;
}

samples/hidden_graph/hidden_graph/hidden_graph.cpp

//********************************************************************************
// Copyright (c) 2013-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************

/*
  Demonstration of a hidden graph that's active outside the on_put callbacks.
  Using leave_quiescence and enter_quiescence to communicate its internal state.
  Environment puts data that the graph operates on.
  The graph's computation is just sleeping random time, but when done it puts 
  control and data that a consumer step outside the hidden graph uses.
  For distribution we use tuner::consumed_on to assign the input data to remote
  processes.
 */

#ifdef _DIST_
# include <cnc/dist_cnc.h>
#else
# include <cnc/cnc.h>
#endif
#include <cnc/debug.h>
#include <cassert>
#include <ctime>
#include <tbb/concurrent_queue.h>
#include <thread>         // std::thread
#include <chrono>         // std::chrono::milliseconds

// This is our hidden graph.  It is parametrized by 3 collections: the
// input data colleciton, an output tag collection and a output data
// collection.
//
// It sets its state to active and registers a on_put callback which
// stores the input data in an internal queue.
//
// An internal thread pops entries from the queue, sleeps for a while
// and then puts a control tag ana corresponding data item.  When the
// thread has processed 7 entries it reports quiescence and exits.
//
// The environment only puts data, no control. Without quiescence
// reporting the wait call could return immediately as no steps are
// prescribed. AS the hidden graph sets its state to active right at
// the beginning, the wait() call will not succeed until the graph
// signaled its quiescence.
template< typename IC_IN, typename IC_OUT, typename TC_OUT >
class hidden_graph : public CnC::graph
{
private:
    typedef tbb::concurrent_bounded_queue< typename IC_IN::data_type > blocking_queue;

    // the callback for incoming data just pushes the items on our internal queue
    struct on_data : public IC_IN::callback_type
    {
        on_data( blocking_queue & q ) : m_q(q) {}
        void on_put( const typename IC_IN::tag_type & /*tag*/, const typename IC_IN::data_type & val )
        {
            m_q.push( val );
        };
        blocking_queue & m_q;
    };

    // we start a thread which takes data from our internal queue and works on it.
    // it enters quiescence and exits after processing 7 items
    // this is just a demo, so our work is simply sleepings
    struct observe_and_compute {
        void operator()( blocking_queue * queue, IC_OUT * dataout, TC_OUT * tagout, const hidden_graph< IC_IN, IC_OUT, TC_OUT > * graph )
        {
            typename IC_IN::data_type _item;
            typename TC_OUT::tag_type _tag(CnC::tuner_base::myPid()*1000);
            do {
                // get next input
                queue->pop( _item );
                // and do some work, we just sleep, but it could be anything
                // ideally of course this would use TBB tasks
                int _tm = rand() % 1111;
                std::this_thread::sleep_for( std::chrono::milliseconds(_tm) );
                // now produce our output, to make it a little more intersting let's make it conditional
                if( _tm % 4 ) {
                    dataout->put( _tag, _item + _tm );
                    tagout->put( _tag );
                    ++_tag;
                }
            } while( _tag < CnC::tuner_base::myPid()*1000+7 );
            // we are done, before exiting we must inform the runtime
            graph->enter_quiescence();
            CnC::Internal::Speaker oss;
            oss << "done observe_and_compute";
        }
    };
public:
    // the constructor tells the runtime that the graph is not quiescent
    // it also starts the thread and registers the on_put callback
    template< typename Ctxt >
    hidden_graph( CnC::context< Ctxt > & ctxt, const std::string & name, IC_IN & ic1, IC_OUT & ic2, TC_OUT & tc )
        : CnC::graph( ctxt, name ),
          m_input( ic1 ),
          m_dataout( ic2 ),
          m_tagout( tc ),
          m_queue(),
          m_thread( observe_and_compute(), &m_queue, &m_dataout, &m_tagout, this )
    {
        // we started a thread, we are not quiescent until it exits
        this->leave_quiescence();
        // register the callback
        m_input.on_put( new on_data( m_queue ) );
    }
    virtual ~hidden_graph()
    {
        m_thread.join();
    }
private:
    IC_IN  & m_input;   // here we expect the input to arrive
    IC_OUT & m_dataout; // here we put our output data
    TC_OUT & m_tagout;  // here we put the control tags
    blocking_queue m_queue; // the queue where we push the data and the threads pops data from
    std::thread m_thread;   // our internal thread
};

// a template function is much easier to use than a template class with many template arguments.
template< typename Ctxt, typename IC_IN, typename IC_OUT, typename TC_OUT >
hidden_graph< IC_IN, IC_OUT, TC_OUT > * make_hgraph( CnC::context< Ctxt > & ctxt, const std::string & name, IC_IN & ic1, IC_OUT & ic2, TC_OUT & tc )
{
    return new hidden_graph< IC_IN, IC_OUT, TC_OUT >( ctxt, name, ic1, ic2, tc );
}

struct hg_context;

// here we consume the values produced by the hidden graph
struct consume
{
    int execute( const int tag, hg_context & ctxt ) const;
};

// our tuner makes sure the data gets distributed
// as it gets produced in the env, we must define the consumed_on method
struct hg_tuner : public CnC::hashmap_tuner
{
    int consumed_on( const int & tag ) const
    {
        return tag % numProcs();
    }
};

// our actual "global" context
struct hg_context : public CnC::context< hg_context >
{
    CnC::step_collection< consume >   consumer;
    CnC::item_collection< int, int, hg_tuner >  input_data;
    CnC::item_collection< int, int >  processed_data;
    CnC::item_collection< int, int >  result_data;
    CnC::tag_collection< int >        consumer_tags;
    CnC::graph * hgraph;

    hg_context()
        : consumer( *this, "consumer" ),
          input_data( *this, "input_data" ),
          processed_data( *this, "processed_data" ),
          result_data( *this, "result_data" ),
          consumer_tags( *this, "consumer_tags" )
    {
        consumer_tags.prescribes( consumer, *this );
        consumer.consumes( processed_data );
        consumer.produces( result_data );
        hgraph = make_hgraph( *this, "hidden_graph", input_data, processed_data, consumer_tags );
        //        CnC::debug::trace_all( *this );
    }
};

// the consumer just gets the data and puts something into result
int consume::execute( const int tag, hg_context & ctxt ) const
{
    int _val;
    ctxt.processed_data.get( tag, _val );
    ctxt.result_data.put( tag+tag, _val*_val );
    return 0;
}

// the env only puts data and waits for completion
int main()
{
    srand( 11 );
#ifdef _DIST_
    CnC::dist_cnc_init< hg_context > _dc;
#endif
    hg_context _ctxt;
    for( int i = 0; i < 444; ++i ) {
        _ctxt.input_data.put( i, rand() );
    }
    _ctxt.wait();
    // we use srand, so the number of generated tuples should always be identical
    if( _ctxt.result_data.size() != 7 * CnC::tuner_base::numProcs() ) {
        std::cerr << "Error: expected " << (7 * CnC::tuner_base::numProcs()) << " items, found " << _ctxt.result_data.size() << std::endl;
        exit(11);
    }
    return 0;
}

samples/mpicnc/mpicnc/mpicnc.cpp

/* *******************************************************************************
 *  Copyright (c) 2013-2014, Intel Corporation
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 *  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 *  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ********************************************************************************/

/*
  Showcasing ths SPMD style interface to CnC.  We'll alternate between
  MPI and CnC phases.  Additionally we'll let different groups of
  processes concurrently operate on separate CnC phases (we'll use the
  same context, but different instances of it).

  This is an "ariticial" example, we don't really compute anything
  meaningful.  The MPI-phase is a simple All_to_all communication. The
  CnC phase prescribes a set of steps which simple take an input and
  produce one output.  We use the control tag as the key of the output
  item.  We use tag/F as the key of the input item, as tags are
  integers this makes F many steps depend on the same item instance.
 */

#include <mpi.h>
#ifdef _DIST_
#include <cnc/dist_cnc.h>
#else
#include <cnc/cnc.h>
#endif
#include <cnc/debug.h>

struct my_context;

// our simple step
struct MyStep
{
    int execute( int n, my_context & c ) const;
};

const int N = 480; // number of step-instances, will create control tags 0-N
const int F = 6;   // number of steps reading the same item

// include the tuner, needed for testing (only)
#include "mpicnc.h"

// The CnC context defines a trivial graph with 3 collections.
// Nothing specific to our MPI/CNC/SPMD combo
struct my_context : public CnC::context< my_context >
{
    CnC::step_collection< MyStep, my_tuner > m_steps;
    CnC::tag_collection<int>                 m_tags;
    CnC::item_collection<int, int, my_tuner> m_items;
    my_context() 
        : CnC::context< my_context >(),
          m_steps( *this ),
          m_tags( *this ),
          m_items( *this )
    {
        m_tags.prescribes( m_steps, *this );
        m_steps.consumes( m_items );
        m_steps.produces( m_items );
        CnC::debug::trace( m_items );
        //CnC::debug::trace_all( *this );
    }
};

// ok, let's get one item and put another one.
// we don't care what the data is.
int MyStep::execute( int t, my_context & c ) const
{
    int item = 11;
    // tags are integers; so t/F returns the same value for multiple values of t(ag)
    if( t ) c.m_items.get( t/F, item );
    c.m_items.put( t, t+item*item );

    return CnC::CNC_Success;
}

// This is our CnC phase.
// We instantiate the dist_cnc_init object, put a number of tags and wait.
// The function can be called with different parameters.  We need the
// communicator in any case, but of course it can be MPI_Comm_World.
// If dist_env is true we do it the SPMD-way: every process, not only
// root, executes everything. If dist_env is false only the root
// executes things outside the dist_cnc_init constructor.
void cnc_phase( MPI_Comm mc, bool dist_env )
{
#ifdef _DIST_
    // Init distribution infrastructure (RAII), most likely behaves like a barrier
    // if dist_env==false client processes get blocked in here forever.
    CnC::dist_cnc_init< my_context > dc_init( mc, dist_env );
#endif

    // as we provided a communicator to dc_init, all processes
    // continue here (by default only rank 0 gets here)

    std::cout << "Let's go\n";
    // now let's get MPI rank etc.
    int rank = 0, numranks = 0;
    MPI_Comm_rank(mc,&rank);
    MPI_Comm_size(mc,&numranks);

    // The environment code should be executed on rank 0 and
    // on all client processes of a distributed environment.
    // We might have more than one rank 0
    // if different communicators are in use.
    // Multiple environments are semantically tricky, combined with
    // CnC they make sense if the MPI phase creates distributed
    // data to be used by CnC. We mimic this by distributing
    // the control-puts across all processes of a distributed env.
    if( rank == 0 || dist_env ) {
        // create the context, the runtime will distribute it under the hood
        my_context c;
        // dist_env requires a sync, e.g. a barrier
        if( dist_env ) MPI_Barrier( mc );
        // start tag, 0 if root
        int s = rank;
        // in a dist env each process puts a subset of tags
        // we do this by setting the loop-increment to the number
        // of processes in the env
        int inc = dist_env ? numranks : 1;
        // Simply put all (owned) control tags
        for (int number = s; number < N; number += inc) {
            c.m_tags.put(number);
        }
        // everyone in the env waits for quiescence
        c.wait();
        std::cout << "done\n";
    }
    // here the CnC distribution infrastructure gets shut down 
    // when the dist_cnc_init object gets destructed (RAII)
}

int main( int argc, char *argv[] )
{
    int  numranks, rank; 
    int p;
    MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p );
    if( p != MPI_THREAD_MULTIPLE ) std::cerr << "Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl;

    MPI_Comm_size(MPI_COMM_WORLD,&numranks);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    if( rank == 0 ) {
        system("ldd /home/fschlimb/cnc/trunk/distro/tests_runtime/tests-linux-intel64/distmpicnc");
    }

    // Let's split COMM_WORLD into 2 MPI_Comm's
    MPI_Comm  newcomm;
    int color = rank%2;
    MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm);
    
    // let's not only iterate through alternating CnC/MPI phases, but
    // also let 2 CnC "instances" work concurrently (each has its own
    // MPICommunicator)
    for( int i = 0; i < 2; ++i ) { // you can loop as often as you like
        // Now let's do some CnC, all processes execute this, but on different MPI_Comm's.
        // One communicator is executed as a dist_env (color==0)
        // the other as a "normal" single-env on its root (color==1)
        cnc_phase( newcomm, color == 1 );
        // And some MPI in between
        // All procs do this together on MPI_COMM_WORLD
        int res;
        MPI_Allreduce(&color, &res, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    }
    // Done: collectively shut down MPI
    MPI_Finalize();

    if( rank == 0 ) std::cerr << "\n\nFinalized and out.\nThis should be the last message.\n";
}

matrix_inverse/matrix_inverse_distenv/matrix_inverse.cpp

//********************************************************************************
// Copyright (c) 2007-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  **
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE    **
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE   **
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE     **
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR          **
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF         **
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS     **
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN      **
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)      **
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF       **
// THE POSSIBILITY OF SUCH DAMAGE.                                              **
//********************************************************************************

/* this version differs from the plain version
   - it uses smart-pointersm, which avoids data-copies and allows distributed
   allocation of the tile-array;
   performance seems to suffer a bit from this, but memory footprint is cut to half
   - in dist-mode, the array/matrix is generated distributedly, e.g. each
   process generates what it owns; input to the CnC graph is also done distributedly
   Mimicking SPMD-style interaction with MPI codes.
   Search for "FOR_ME" and "_DIST_" to look at these changes.
*/

#ifdef _DIST_
#include <mpi.h>
#include <cnc/dist_cnc.h>
#include <cnc/internal/dist/distributor.h>
#else
#include <cnc/cnc.h>
#endif
#include "cnc/debug.h"

#include <utility>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <string.h>
#include <cassert>
#include <memory>

#ifdef WIN32
#include <windows.h>
#include <psapi.h>
#else
#include <sys/resource.h>
#endif

#include "tbb/tick_count.h"
#include "tbb/atomic.h"

#include "../tile.h"

using namespace CnC;


class tile_array;

struct tile_tag {
    int m_i0, m_i1, m_i2, m_dim;
    
    tile_tag( int dim, int i0, int i1, int i2 ) :
        m_i0(i0), m_i1(i1), m_i2(i2), m_dim(dim) {};

    bool operator==( const tile_tag & t ) const {
        return m_dim == t.m_dim &&
            m_i0 == t.m_i0 && m_i1 == t.m_i1 && m_i2 == t.m_i2;
    }
#ifdef _DIST_
    tile_tag()
        : m_i0( -1 ), m_i1( -1 ), m_i2( -1 ), m_dim( 0 )
    {}
    void serialize( CnC::serializer & ser )
    {
        ser & m_i0 & m_i1 & m_i2 & m_dim;
    }
#endif
    
};


template <>
struct cnc_hash< tile_tag > 
{
    size_t operator()(const tile_tag& tt) const
    {
        unsigned int h = (int)tt.m_dim;
        
        unsigned int high = h & 0xf8000000;
        h = h << 5;
        h = h ^ (high >> 27);
        h = h ^ tt.m_i0;
        
        unsigned int high1 = h & 0xf8000000;
        h = h << 5;
        h = h ^ (high1 >> 27);
        h = h ^ tt.m_i1;

        unsigned int high2 = h & 0xf8000000;
        h = h << 5;
        h = h ^ (high2 >> 27);
        h = h ^ tt.m_i2;

        return size_t(h);
    }
};


namespace std {
    std::ostream & cnc_format( std::ostream& os, const tile_tag &tt )
    {
        return os << "(" << tt.m_dim << ":"
                  << tt.m_i0 << "," << tt.m_i1 << "," << tt.m_i2 << ")";
    }
}

struct my_context;



struct compute_inverse
{
    int execute( const tile_tag & t, my_context & c ) const;
};

//#define NO_CONSUMED_ON 1

struct my_tuner : public CnC::step_tuner<>, public CnC::hashmap_tuner
{
    typedef tile_tag tag_type;

    int PROC_FOR_TILE( int _x, int _y ) const { return ( ( ( ( (_y) * m_tnx + 31 ) + (_x) ) ) % m_nP ); }
    int COMPUTE_ON( int _i, int _j )    const { return ( PROC_FOR_TILE( (_i) / m_tsx, (_j) / m_tsy ) ); }

    template< class dependency_consumer >
    void depends( const tile_tag & tag, my_context & c, dependency_consumer & dC ) const;
    my_tuner( int ntiles = 0 );
    int compute_on( const tile_tag & tag, my_context & ) const;

    int get_count( const tag_type & tag ) const;
#ifdef NO_CONSUMED_ON
    int consumed_on( const tag_type & tag ) const { return CnC::CONSUMER_UNKNOWN; }
#else
    const std::vector< int > & consumed_on( const tile_tag & tag ) const;
#endif
    int produced_on( const tile_tag & tag ) const;
    int m_nP;
    int m_tnx, m_tny, m_tsx, m_tsy;
    std::vector< std::vector< int > > m_rows, m_cols, m_rowcols, m_procs;
    std::vector< int > m_all;
    void serialize( CnC::serializer & ser ) 
    {
        ser & m_nP & m_tnx & m_tny & m_tsx & m_tsy
            & m_rows & m_cols & m_rowcols & m_procs & m_all;
        assert( m_nP > 0 && m_nP == (int)m_procs.size() );
    }
};


typedef std::shared_ptr< const tile > const_tile_type;
typedef std::shared_ptr< tile > tile_type;

              
struct my_context : public CnC::context< my_context >
{
    my_tuner m_tuner;
    CnC::step_collection< compute_inverse, my_tuner > m_steps;
    CnC::item_collection< tile_tag, const_tile_type, my_tuner >  m_tiles;
    CnC::tag_collection< tile_tag > m_tags;

    my_context( int nt = 0 )
        : CnC::context< my_context >(),
          m_tuner( nt ),
          m_steps( *this, "mi", compute_inverse(), m_tuner ),
          m_tiles( *this, m_tuner ),
          m_tags( *this )
    {
        m_tags.prescribes( m_steps, *this );
#if 0
        CnC::debug::trace( m_tiles, 3 );
        //        CnC::debug::trace( m_tags, 3 );
        //        CnC::debug::trace( m_steps, 3 );
#endif        
        CnC::debug::collect_scheduler_statistics(*this);
    }

    virtual void serialize( CnC::serializer & ser ) 
    {
        ser & m_tuner;
    }
};


#ifdef _DIST_
// returns if the given tile (x,y) is owned by my (r)
#  define FOR_ME( _c, _r, _x, _y ) ((_r) == _c.m_tuner.COMPUTE_ON( _x, _y ))
// returns if the given tile (x,y) is owned by my (r)m always true if r==0
#  define FOR_ME_OR_0( _c, _r, _x, _y ) (((_r)==0) || FOR_ME( _c, _r, _x, _y ))
#else
#  define FOR_ME( _c, _r, _x, _y ) true
#  define FOR_ME_OR_0( _c, _r, _x, _y ) true
#endif


class tile_array
{
    int m_dim;
    int m_size;
    const_tile_type * m_tiles;
    
public:

    int dim() const { return m_dim; }
    int size() const { return m_size; }

    tile_array( int size = 0 ) :
        m_dim((size + TILESIZE - 1)/TILESIZE), // Size/TILESIZE rounded up
        m_size(size), 
        m_tiles( NULL )
    {
        if( m_dim ) m_tiles = new const_tile_type[m_dim*m_dim];
    }
    
    ~tile_array()
    {
        delete[] m_tiles;
    }

    tile_array(const tile_array& t)
    {
        m_size = t.m_size;
        m_dim = t.m_dim;
        int sz = m_dim*m_dim;
        m_tiles = new const_tile_type[sz];
        for( int i = 0; i < sz; ++i ) {
            m_tiles[i] = t.m_tiles[i];
        }
    }
    
    tile_array& operator=(const tile_array& t)
    {
        if (this != &t) {
            int sz = t.m_dim*t.m_dim;
            if( m_dim != t.m_dim ) {
                delete[] m_tiles;
                m_tiles = new const_tile_type[sz];
                m_size = t.m_size;
                m_dim = t.m_dim;
            }
            for( int i = 0; i < sz; ++i ) {
                m_tiles[i] = t.m_tiles[i];
            }
        }
        return *this;
    }
    
    void dump( double epsilon = 1e-12 ) const {
        for (int i = 0; i < m_dim; i++ ) {
            for (int j = 0; j < m_dim; j++ ) {
                std::cout << "(" << i << "," << j << ")" << std::endl;
                m_tiles[m_dim*i+j]->dump(epsilon);
            }
            std::cout << std::endl;
        }
    }

    int generate_matrix( int dimension, my_context & c  )
    {
#ifdef _DIST_
        // need rank determine owned fraction of the matrix (FOR_ME)
        int rank;
        MPI_Comm_rank(MPI_COMM_WORLD,&rank);
#endif
        
        printf("Floating point elements per matrix: %i x %i\n", dimension, dimension);
        printf("Floating point elements per tile: %i x %i\n", TILESIZE, TILESIZE);

        if( m_size != dimension ) {
            delete[] m_tiles;
            m_size = dimension;
            m_dim = (m_size + TILESIZE - 1)/TILESIZE; // Size/TILESIZE rounded up
            m_tiles = new const_tile_type[m_dim*m_dim];
        }

        printf("tiles per matrix: %i x %i\n", m_dim, m_dim);
        int dim = m_dim;
        int size = m_size;

        std::cout << "dim(" << dim << ") size(" << size << ")" << std::endl;
        double e = 0.0;
        for (int I = 0; I < dim; I++) {
            for (int J = 0; J < dim; J++) {
                if( FOR_ME_OR_0( c, rank, I, J ) ) { // for the identity check we need the entire matrix on 0
                    srand( I*m_dim+J );
                    int ii = I * TILESIZE;;
                    tile_type _tile = std::make_shared< tile >();
                    for (int i = 0; i < TILESIZE; i++) {
                        int jj = J * TILESIZE;
                        for (int j = 0; j < TILESIZE; j++) {
                            if ((ii < size)&(jj < size)) e = double(rand())/RAND_MAX;
                            else if (ii == jj) e = 1; // On-diagonal padding
                            else e = 0; // Off-diagonal padding
                            //                        std::cout << "m[" << ii << "," << jj << "(" << I << "," << J << "," << i << "," << j << ")]=" << e << std::endl;
                            _tile->set(i,j,e);
                            jj++;
                        }
                        ii++;
                    }
                    //                    std::cerr << rank << " generated tile " << I << "," << J << std::endl;
                    m_tiles[dim*I + J] = _tile;
                }
            }
        }
        return m_dim;
    }
    
    
    int identity_check( double epsilon = MINPIVOT ) const
    {
        int ecount = 0;
        
        for (int i = 0; i < m_dim; i++ ) {
            for (int j = 0; j < m_dim; j++ ) {
                int tcount = 0;
                const_tile_type t = m_tiles[m_dim*i+j];

                tcount = (i == j) ?  t->identity_check(epsilon) : t->zero_check(epsilon);                
                if (tcount == 0 ) continue;
                
                std::cout << "problem in tile(" << i << "," << j << ")" << std::endl;
                ecount += tcount;
            }
        }
        return ecount;
    }
    
    bool equal( const tile_array &b ) const
    {
        if (b.m_dim != m_dim) return false;
        
        for (int i = 0; i < m_dim; i++ ) {
            for (int j = 0; j < m_dim; j++ ) {
                const_tile_type t = m_tiles[m_dim*i+j];
                if (!t->equal( *b.m_tiles[m_dim*i+j] )) return false;
            }
        }
        return true;
    }
    
    // c = this * b
    tile_array multiply(const tile_array &b) const
    {
        tile_array c(m_size);
        for (int i = 0; i < m_dim; i++) {
            for (int j = 0; j < m_dim; j++) {
                tile_type t = std::make_shared< tile >();
                t->zero();
                for (int k = 0; k < m_dim; k++) {
                    t->multiply_add_in_place(*m_tiles[m_dim*i+k], *b.m_tiles[m_dim*k+j]);
                }
                c.m_tiles[m_dim*i+j] = t;
            }
        }
        return c;
    }


    tile_array inverse()
    {
        tile_array b = *this;
        int dim = m_dim;

        for (int n = 0; n < dim; n++) {
            const_tile_type pivot_inverse = std::make_shared< const tile >( C_INVERSE, *b.m_tiles[dim*n+n] );
            b.m_tiles[dim*n+n] = pivot_inverse;
    
            for (int j = 0; j < dim; j++) {
                if (j == n) continue;
            
                const tile& tnj = *b.m_tiles[dim*n+j];
                b.m_tiles[dim*n+j] = std::make_shared< const tile >( C_MULTIPLY, *pivot_inverse, tnj );
            }
        
            for (int i = 0; i < dim; i++) {
                if (i == n) continue;

                const_tile_type tin = b.m_tiles[dim*i+n];
                b.m_tiles[dim*i+n] = std::make_shared< const tile >( C_MULTIPLY_NEGATE, *tin, *pivot_inverse );
                
                for (int j = 0; j < dim; j++) {
                    if (j == n) continue;
                    const_tile_type tnj = b.m_tiles[dim*n+j];
                    tile_type tmp = std::make_shared< tile >( *b.m_tiles[dim*i+j] );
                    tmp->multiply_subtract_in_place(*tin, *tnj);
                    b.m_tiles[dim*i+j] = tmp;
                }
            }
        }
        return b;
    }


    tile_array inverse_cnc( my_context & c )
    {
        // all participating processes execute this
        int rank = 0;
#ifdef _DIST_
        // need rank determine owned fraction of the matrix (FOR_ME)
        MPI_Comm_rank(MPI_COMM_WORLD,&rank);
#endif
        // copy what process owns from local array to item-collection
        for (int i = 0; i < m_dim; i++) {
            for (int j = 0; j < m_dim; j++) if( FOR_ME( c, rank, i, j ) ) {
                    tile_tag t( m_dim, 0, i, j);
                    c.m_tiles.put( t, m_tiles[m_dim*i+j] );
                    assert( m_tiles[m_dim*i+j].get() );
                }
        }
        // put control tags (only what this process is reponsible for)
        for (int i = 0; i < m_dim; i++) {
            for (int j = 0; j < m_dim; j++) if( FOR_ME( c, rank, i, j ) ) {
                    c.m_tags.put( tile_tag( m_dim, 0, i, j) );
                }
        }

        // "wait" is an implicit barrier,
        // no process returns until entire dsitributed graph evaluation reached quiesence
        c.wait();

        // now write back data from item-collection to local arry
        tile_array b(m_size);
        //        c.m_tiles.size(); // not dist-env-ready
        for (int i = 0; i < m_dim; i++) {
            for (int j = 0; j < m_dim; j++) if( FOR_ME_OR_0( c, rank, i, j ) ) {// we need all output on rank 0 for verification
                    const_tile_type _tmp;
                    c.m_tiles.get( tile_tag( m_dim, m_dim, i, j), _tmp );
                    b.m_tiles[m_dim*i+j] = _tmp;
                }
        }
        return b;
    }
};
    

int compute_inverse::execute( const tile_tag & tag, my_context & c ) const
{
    int n = tag.m_i0;
    int i = tag.m_i1;
    int j = tag.m_i2;

    tile_tag out_tag( tag.m_dim, n+1, i, j );

    if (i == n && j == n ) 
        {
            const_tile_type tnn;
            c.m_tiles.get( tag, tnn );
            //        tile out_nij = tnn.inverse();
            c.m_tiles.put( out_tag, std::make_shared< const tile >( C_INVERSE, *tnn ) );//out_nij );
        }
    else if ( i == n ) 
        {
            const_tile_type tnj;
            c.m_tiles.get( tag, tnj );
            assert( tnj.get() );
            const_tile_type tn1nn;
            c.m_tiles.get( tile_tag( tag.m_dim, n+1, n, n ), tn1nn );
            //        tile out_nij = tn1nn.multiply(tnj);
            c.m_tiles.put( out_tag, std::make_shared< const tile >( C_MULTIPLY, *tn1nn, *tnj ) );
        }
    else if ( j == n ) 
        {
            const_tile_type tin;
            c.m_tiles.get( tag, tin );
            const_tile_type tn1nn;
            c.m_tiles.get( tile_tag( tag.m_dim, n+1, n, n ), tn1nn );
            //        tile out_nij = tin.multiply_negate(tn1nn);
            c.m_tiles.put( out_tag, std::make_shared< const tile >( C_MULTIPLY_NEGATE, *tin, *tn1nn ) );//out_nij );
        }
    else 
        {
            const_tile_type tij;
            c.m_tiles.get( tag, tij );
            const_tile_type tnin;
            c.m_tiles.get( tile_tag( tag.m_dim, n, i, n ), tnin );
            const_tile_type tn1nj;
            c.m_tiles.get( tile_tag( tag.m_dim, n+1, n, j ), tn1nj );
            tile_type tmp = std::make_shared< tile >( *tij );
            tmp->multiply_subtract_in_place( *tnin, *tn1nj );
            assert( tmp.get() );
            c.m_tiles.put( out_tag, tmp );
            //        tile out_nij = tij.multiply_subtract( tnin, tn1nj );
            //        b.m_tiles[dim*i+j] = tmp;
            //        c.m_tiles.put( out_tag, std::make_shared< const tile >( tij->multiply_subtract( *tnin, *tn1nj) ) );//out_nij );
        }
    
    if ( (n+1) < tag.m_dim ) 
        {
            c.m_tags.put( out_tag );
        }
    return CnC::CNC_Success;
}

int my_tuner::get_count( const tag_type & tt ) const
{
    int dim = tt.m_dim;
    int n = tt.m_i0;
    if( dim == n ) return CnC::NO_GETCOUNT;
    int i = tt.m_i1;
    int j = tt.m_i2;
    
    int count = 1;

    if (i == (n-1) && j == (n-1)) count += (dim-1) + (dim-1);
    if (i == (n-1) && !(j == n-1)) count += dim-1;
    if (j == n  && !(i == n)) count += dim-1;

    return count;
}


my_tuner::my_tuner( int ntiles )
    : m_tnx( 0 ),
      m_tny( 0 ),
      m_tsx( 1 ),
      m_tsy( 1 ),
      m_rows(),
      m_cols(),
      m_rowcols(),
      m_procs(),
      m_all( 1, CnC::CONSUMER_ALL )
{
    m_nP = numProcs();
#ifdef _DIST_
    int _np = m_nP * 1;
    assert( _np == 1 || ( ntiles * ntiles ) % _np == 0 );
    if( _np > 1 && ntiles ) { 
        // compute grid dimension for partitioning
        m_tnx = 2, m_tny = _np / 2;
        while( m_tnx < m_tny && m_tny % 2 == 0 ) {
            m_tnx *= 2;
            m_tny = _np / m_tnx;
        }
        assert( m_tnx * m_tny == _np );
        m_tsx = ntiles / m_tnx;
        m_tsy = ntiles / m_tny;
        std::cerr << m_tnx << "x" << m_tny << " tiles of " << m_tsx << "x" << m_tsy << std::endl;
        assert( m_tnx * m_tsx == ntiles && m_tny * m_tsy == ntiles );
#ifndef NO_CONSUMED_ON
        // setup vectors to store info for consumed_on
        m_rows.resize( m_tny );
        m_cols.resize( m_tnx );
        m_rowcols.resize( m_tnx * m_tny );
        for( int i = 0; i < m_tnx; ++i ) {
            m_cols[i].resize( m_tny );
            for( int j = 0; j < m_tny; ++j ) {
                m_rowcols[j*m_tnx+i].resize( m_tnx+m_tny );
            }
        }
        for( int j = 0; j < m_tny; ++j ) {
            m_rows[j].resize( m_tnx );
            for( int i = 0; i < m_tnx; ++i ) {
                m_rows[j][i] = m_cols[i][j] = PROC_FOR_TILE( i, j );
                for( int x = 0; x < m_tnx; ++x ) m_rowcols[j*m_tnx+i][x] = PROC_FOR_TILE( x, j );
                for( int y = 0; y < m_tny; ++y ) m_rowcols[j*m_tnx+i][m_tnx+y] = PROC_FOR_TILE( i, y );
                m_rowcols[j*m_tnx+i][i] = m_rowcols[j*m_tnx+i].back();
                m_rowcols[j*m_tnx+i].pop_back();
            }
        }
        m_procs.resize( m_nP );
        for( int i = 0; i < m_nP; ++i ) m_procs[i].resize( 1, i );
        assert( m_nP > 0 && (int)m_procs.size() == m_nP );
#endif
    }
#endif
}

int my_tuner::compute_on( const tile_tag & tag, my_context & ) const
{
    return COMPUTE_ON( tag.m_i1, tag.m_i2 );
}

int my_tuner::produced_on( const tile_tag & tag ) const
{
    return ( tag.m_i0 > 0 ? COMPUTE_ON( tag.m_i1, tag.m_i2 ) : 0 );
}

template< class dependency_consumer >
void my_tuner::depends( const tile_tag & tag, my_context & c, dependency_consumer & dC ) const
{
    int n = tag.m_i0;
    int i = tag.m_i1;
    int j = tag.m_i2;

    dC.depends( c.m_tiles, tag ); //, PROD( tag.m_dim, n-1, i, j ) );
    
    if (i == n && j == n ) {
    } else if ( i == n ) {
        dC.depends( c.m_tiles, tile_tag( tag.m_dim, n+1, n, n ) );
    } else if ( j == n ) {
        dC.depends( c.m_tiles, tile_tag( tag.m_dim, n+1, n, n ) );
    } else {
        dC.depends( c.m_tiles, tile_tag( tag.m_dim, n, i, n ) );
        dC.depends( c.m_tiles, tile_tag( tag.m_dim, n+1, n, j ) );
    }
}

#ifndef NO_CONSUMED_ON
const std::vector< int > & my_tuner::consumed_on( const tile_tag & tag ) const
{
    assert( 0 < m_procs.size() );
    int n = tag.m_i0;
    int i = tag.m_i1;
    int j = tag.m_i2;
    
    if( n - 1 == i && ( i == j || n == j ) ) {
        return m_rowcols[(j/m_tsy)*m_tnx+(i/m_tsx)];
    } else if( n - 1 == i ) {
        return m_rows[j/m_tsy];
    } else if( n == j ) {
        return m_cols[i/m_tsx];
    }
    return m_procs[COMPUTE_ON( i, j )]; 
}
#endif

void report_memory()
{
    std:: cout << "tiles created " << tiles_created << " tiles deleted " << tiles_deleted << " tiles remaining " << tiles_created - tiles_deleted << std::endl;
    tiles_created = 0;
    tiles_deleted = 0;

    static int lastr = 0;

#ifdef WIN32
    HANDLE self;
    PROCESS_MEMORY_COUNTERS pmc;
    SIZE_T resident = 0;
    self = GetCurrentProcess();
    if (GetProcessMemoryInfo(self, &pmc, sizeof(pmc))) {
        resident = pmc.WorkingSetSize;
    }
    CloseHandle(self);
#else
    FILE *f = fopen("/proc/self/statm", "r");
    int total, resident, share, trs, drs, lrs, dt;
    if( fscanf(f,"%d %d %d %d %d %d %d", &total, &resident, &share, &trs, &drs, &lrs, &dt) != 7 ) std::cerr << "error reading /proc/self/statm\n";
#endif

    std:: cout << "resident memory MB " << double(resident*4096)/1000000 << "   increment MB " << double((resident-lastr)*4096)/1000000 << std::endl;
    lastr = resident;
}


void report_time( const char * mode, int msz, double time ) 
{
    std::cout <<  mode << " Total Time: " << time << " sec" << std::endl;
    float Gflops = ((float)2*msz*msz*msz)/((float)1000000000);
    if (Gflops >= .000001) printf("Floating-point operations executed: %f billion\n", Gflops);
    if (time >= .001) printf("Floating-point operations executed per unit time: %6.2f billions/sec\n", Gflops/time);
}
    


int main(int argc, char *argv[])
{
    if (!(argc == 2 && 0 != atoi(argv[1]))) {
        std::cout << "Usage: matrix_inverse dim" << std::endl;
        return -1;
    }

    int sz = atoi(argv[1]);
    int tdim = (sz + TILESIZE - 1)/TILESIZE;

#ifdef _DIST_
    // init MPI to mimick SPMD-style interactino with CnC through MPI
    int p;
    //!! FIXME passing NULL ptr breaks mvapich1 mpi implementation
    MPI_Init_thread( 0, NULL, MPI_THREAD_MULTIPLE, &p );
    if( p != MPI_THREAD_MULTIPLE ) std::cerr << "Warning: not MPI_THREAD_MULTIPLE (" << MPI_THREAD_MULTIPLE << "), but " << p << std::endl;
    int rank = 0;
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    { // need a scope for dist_init

        // init CnC here so that we have the context/tuner 
        // the tuner defines the distribution strategy, which we use when generating the matrix
        CnC::dist_cnc_init< my_context > dc_init( MPI_COMM_WORLD, true );

        { // need a scope for the context
#endif
            tile_array in_array;
            struct my_context c( tdim );
            std::cout << "Generating matrix of size " << argv[1] << std::endl;
            // all processes do this, we are SPMD!
            tdim = in_array.generate_matrix(sz, c);

            report_memory();

            // invert serially
            {
                std::cout << "Invert serially" << std::endl;
                tbb::tick_count t0 = tbb::tick_count::now();
#ifndef _DIST_
                tile_array out_array = in_array.inverse();
#endif
                tbb::tick_count t1 = tbb::tick_count::now();
                report_time( "Serial", sz, (t1-t0).seconds() );
                report_memory();
        
#ifndef _DIST_
                tile_array test = in_array.multiply(out_array);
                test.identity_check(1e-5);
#endif
            } // end of scope releases out_array and test
#ifdef _DIST_
            // we need a barrier between CnC context creation and putting stuff
            MPI_Barrier( MPI_COMM_WORLD );
#endif
            report_memory();

            {
                std::cout << "Invert CnC steps" << std::endl;
                //debug::set_num_threads(1);
                tbb::tick_count t2 = tbb::tick_count::now();
                tile_array out_array2 = in_array.inverse_cnc(c);
                tbb::tick_count t3 = tbb::tick_count::now();
                report_time( "CnC", out_array2.size(), (t3-t2).seconds() );
                report_memory();
        
#ifdef _DIST_
                // only rank 0 has the full input matrix for the verification
                if( rank == 0 )
#endif
                    {
                        tile_array test2 = in_array.multiply(out_array2);
                        test2.identity_check(1e-5);
                    } // end of scope releases test2
            } // end of scope releases out_array2
            report_memory();
#ifdef _DIST_
            MPI_Barrier( MPI_COMM_WORLD );
        }
    }

    MPI_Finalize();
#endif
    return 0;
}

samples/db/mysql_simple/mysql_simple.cpp

//********************************************************************************
// Copyright (c) 2014-2014 Intel Corporation. All rights reserved.              **
//                                                                              **
// Redistribution and use in source and binary forms, with or without           **
// modification, are permitted provided that the following conditions are met:  **
//   * Redistributions of source code must retain the above copyright notice,   **
//     this list of conditions and the following disclaimer.                    **
//   * Redistributions in binary form must reproduce the above copyright        **
//     notice, this list of conditions and the following disclaimer in the      **
//     documentation and/or other materials provided with the distribution.     **
//   * Neither the name of Intel Corporation nor the names of its contributors  **
//     may be used to endorse or promote products derived from this software    **
//     without specific prior written permission.                               **
//                                                                              **
// This software is provided by the copyright holders and contributors "as is"  **
// and any express or implied warranties, including, but not limited to, the    **
// implied warranties of merchantability and fitness for a particular purpose   **
// are disclaimed. In no event shall the copyright owner or contributors be     **
// liable for any direct, indirect, incidental, special, exemplary, or          **
// consequential damages (including, but not limited to, procurement of         **
// substitute goods or services; loss of use, data, or profits; or business     **
// interruption) however caused and on any theory of liability, whether in      **
// contract, strict liability, or tort (including negligence or otherwise)      **
// arising in any way out of the use of this software, even if advised of       **
// the possibility of such damage.                                              **
//********************************************************************************


/* 
 Attach a database to an item-table.

 We create a very simple graph with one step and one item-collection.
 It doesn't do anything sensible, just a demo.  Of course you can
 write your own application which uses as many DB-attached
 item_collections as you want. Each item-coll keeps its connection and
 config separately.

 We use our sql_tuner to attach to the DB. It uses our wrapper
 sql_item_table.  The sql_config allows configuring the DB (port,
 table, layout etc) as well as how we actually store (or not) the data
 in there (by providing SQL statements).  E.g. we might pre-fill the
 collection at start-up with what's already in the table; or we might
 want to delete everything when attaching to it.

 The program uses mysql and assumes a user "cnc" with pw "cnc" and a
 DB "test".  You can easily adjust with the SQL config below.  Setup
 your mysql accordingly.
*/

#include <cnc/cnc.h>
#include "mysql_tuner.h"

struct my_context;
struct my_sqlstep
{
    int execute( int, my_context & ctxt ) const;
};

struct my_context : public CnC::context< my_context >
{
    // we use the sql_tuner to attach the item-col to a DB
    sql_tuner m_tuner;
    CnC::tag_collection< int > m_tags;
    CnC::item_collection< int, int, sql_tuner > m_items;
    CnC::step_collection< my_sqlstep > m_steps;
    my_context()
        // the argument to the tuner is a config for the DB
        : m_tuner( {"tcp://127.0.0.1:3306",                                  // port
                    "cnc", "cnc",                                            // user, password
                    "test",                                                  // DB/schema,
                    "DROP TABLE IF EXISTS items;",                           // drop statement, set to "" to keep table
                    "CREATE TABLE If NOT EXISTS items (id INT, value INT);", // table creation statement, set to "" to not create table
                    "SELECT id, value FROM items;",                          // read full table statement, set to "" to not read existing entries
                    "DELETE * FROM items;",                                  // clear statements
                    "SELECT value FROM items WHERE id=?;",                   // get statement
                    "INSERT INTO items( id, value ) VALUES ( ?, ? );",       // put statement, set to "" for not putting things into DB
                    "DELETE FROM items WHERE id=?;",                         // erase statement, set to "" to keep things in table upon collection.reset()
                    }),
          m_tags( *this, "tags" ),
          m_items( *this, "items", m_tuner ),
          m_steps( *this, "steps" )
    {
        m_tags.prescribes( m_steps, *this );
        m_steps.consumes( m_items );
        m_steps.produces( m_items );
    }
};

// our step simple gets a piece of data and puts another one
int my_sqlstep::execute( int t, my_context & ctxt ) const
{
    int r;
    ctxt.m_items.get( t-1, r );
    ctxt.m_items.put( t, t+r );
    return 0;
}

// we simply create the graph, put the control tags and the initial data.
// "mysql -u cnc -p test -pcnc -e "select * from items;" shows what has been stored to the table
int main(void)
{
    my_context _ctxt;
    _ctxt.m_items.put( -1, -1 );
    for( int i = 0; i<100; ++i ) {
        _ctxt.m_tags.put( i );
    }
    _ctxt.wait();
    for( int i = 0; i<10; ++i ) {
        int _ir = -1;
        _ctxt.m_items.get( i, _ir );
        std::cout << _ir << " for " << i << std::endl;
    }

    return 0;
}