
#include "stdafx.h"
#include "qlist.h"
#include "gate.h"
#include "gategroup.h"
#include "matrix.h"
#include "CalculationThreads.h"

// CCalculationThreads::CCalculationThreads( int iThreads, int iPriority )
//
// Creates threads for the calculations to be performed. iThreads gives the
// number of threads and iPriority gives their priority
CCalculationThreads::CCalculationThreads( int iThreads, int iPriority )
: m_iThreads( iThreads )
, m_WorkToDo( 0, iThreads )
, m_FreeThreads( iThreads, iThreads )
, m_bTerminate( FALSE )
{
    m_pCalculation = new Calculation[ m_iThreads ];

    for( int i = 0; i < m_iThreads; i++ ){
        m_pCalculation[ i ].m_State = Waiting;
        m_pCalculation[ i ].m_cvWorkSpaceOut.SetLength( INITIAL_WORKSPACE_LENGTH, FALSE );
        m_pCalculation[ i ].m_cvWorkSpaceIn.SetLength( INITIAL_WORKSPACE_LENGTH, FALSE );
        AfxBeginThread( ThreadStart, (LPVOID) this, iPriority );
    }
}

// CCalculationThreads::~CCalculationThreads( )
//
//
CCalculationThreads::~CCalculationThreads( )
{
    // wait until we've grabbed all threads
    for( int i = 0; i < m_iThreads; i++ ){
        m_FreeThreads.Lock();
    }

    // Set the termination flag and the free threads waiting for 
    // a calculation
    m_bTerminate = TRUE;
    m_WorkToDo.Unlock( m_iThreads );

    // again, wait until we've grabbed all threads. The will signal
    // a free process when they terminate.
    for( i = 0; i < m_iThreads; i++ ){
        m_FreeThreads.Lock();
    }

    // No more threads, safe to delete this
    ASSERT( m_pCalculation );
    delete [] m_pCalculation;
}

// UINT CCalculationThreads::ThreadStart( LPVOID lpVoid )
//
// Static method called where the threads start. lpVoid is a pointer
// to the CCalculationThreads object which created the thread.
//
UINT CCalculationThreads::ThreadStart( LPVOID lpVoid )
{
    CCalculationThreads *pThis = (CCalculationThreads*) lpVoid;
    ASSERT( pThis );

    // Run the main thread loop
    UINT iReturn = pThis->ThreadMainLoop( );

    // Signal that the thread is about to terminate.
    pThis->m_FreeThreads.Unlock( 1 );

    // Termination occurs on return
    return iReturn;
}

// UINT CCalculationThreads::ThreadMainLoop( )
//
// Main loop for the thread
UINT CCalculationThreads::ThreadMainLoop( )
{
    for(;;){
        // Wait until there is work to do.
        m_WorkToDo.Lock();

        int iCalc;

        // grab the mutex on this object
        {   CSingleLock l1( &m_ObjectMutex, TRUE );

            // Are we to terminate?
            if( m_bTerminate )
                return 0;

            // Find a calculation which is ready to run
            for( iCalc = 0; iCalc < m_iThreads; iCalc++ )
                if( m_pCalculation[iCalc].m_State == Ready )
                    break;

            // There must be a calculation as we were signalled
            // to proceed (m_WorkToDo)
            ASSERT( iCalc < m_iThreads );

            // We will handle this calculation. Set its state to running.
            m_pCalculation[iCalc].m_State = Running;
        } // release the mutex

        CalcGate( m_pCalculation[iCalc].m_iBaseAmp, iCalc );

        // grab the mutex on this object
        {   CSingleLock l1( &m_ObjectMutex, TRUE );

            // This slot is now free for further calcs
            m_pCalculation[iCalc].m_State = Waiting;
        } // release the mutex

        // Signal that this thread is now free
        m_FreeThreads.Unlock( 1 );
    }
    return 0;
}

// void CCalculationThreads::CalcGatesAsync( int iBaseAmp )
//
// Called to queue a calculation. SetCalcData must have been called
// previously with the rest of the required data.
void CCalculationThreads::CalcGatesAsync( int iBaseAmp )
{
    // Wait for a free thread
    m_FreeThreads.Lock();

    // gain mutex
    {   CSingleLock l1( &m_ObjectMutex, TRUE );

        // find free slot
        for( int i = 0; i < m_iThreads; i++ )
            if( m_pCalculation[i].m_State == Waiting )
                break;

        // there should be at least one (as we have a free process)
        ASSERT( i < m_iThreads );
        
        // Set the data.
        m_pCalculation[i].m_State = Ready;
        m_pCalculation[i].m_iBaseAmp = iBaseAmp;

        // Signal work to do
        m_WorkToDo.Unlock( 1 );
    } // release mutex

}

// void CCalculationThreads::SetCalcData( CQList<CGateGroup>    &listGates
//
// Called to set static parameters prior to a CalcGatesAsync. As this data
// is used by all calculating threads it is up to the caller to ensure
// that there are no threads currently calculating when this is called
// (i.e. the caller must call WaitForCompletion before attempting to change
// this data)
void CCalculationThreads::SetCalcData( CQList<CGateGroup>    &listGates
                                     , CBaseComplexVector    &cvAmps
                                     , int                   *piGatePinMask
                                     , int                   iBaseAmpBits
                                     , int                   iPinMask
                                     )
{
#ifdef _DEBUG
    for(int i = 0; i < m_iThreads; i++ )
        ASSERT( m_pCalculation[i].m_State == Waiting );
#endif

    m_pListGates = &listGates;
    m_pcvAmps = &cvAmps;
    m_piGatePinMask = piGatePinMask;
    m_iBaseAmpBits = iBaseAmpBits;
    m_iPinMask = iPinMask;

    ASSERT( m_pListGates );
    ASSERT( m_pcvAmps );
    ASSERT( m_piGatePinMask );
}

// void CCalculationThreads::WaitForCompletion( )
//
// Wait until all threads have finished calculating
void CCalculationThreads::WaitForCompletion( )
{
    // Grab all threads
    for( int i = 0; i < m_iThreads; i++ ){
        m_FreeThreads.Lock();
    }

#ifdef _DEBUG
    for(i = 0; i < m_iThreads; i++ )
        ASSERT( m_pCalculation[i].m_State == Waiting );
#endif

    // release them all
    m_FreeThreads.Unlock( m_iThreads );
}

// void CCalculationThreads::CalcGate( int iBaseAmp, int iCalc )
//
// Called to calculate a gate. This is an almost exact copy of the
// inner loop of CMetaverse::SimulateSubGroup, with the difference
// that we specify where the gate should place its work space (as we
// have multiple threads calculating simulataneously they cannot all
// use the same work space)
void CCalculationThreads::CalcGate( int iBaseAmp, int iCalc )
{
    // For each gate in the list
    for( int i = 0; i < m_pListGates->Length(); i++ ){

        // Next gate to process
        CGate *pGate = const_cast<CGate *>((*m_pListGates)[i].Gate( 0 ));

        // Mask for the bits not to iterate over
        int iWholeMask = m_piGatePinMask[i] | m_iBaseAmpBits;

        // MFC C++ requires that an object allocated on one thread must be 
        // destroyed by the same thread. Thus we need to ensure that the SetLength
        // calls below do not allocate any memory. This is done by allocating a large
        // amount of memory in the constructor to the class.
        // (if this assertion fails the code will still work but will leak memory)
        ASSERT( pow2( pGate->InputPins() ) <= INITIAL_WORKSPACE_LENGTH );

        // Set the length of the vectors
        m_pCalculation[ iCalc ].m_cvWorkSpaceOut.SetLength( pow2( pGate->InputPins() ), FALSE );
        m_pCalculation[ iCalc ].m_cvWorkSpaceIn.SetLength( pow2( pGate->InputPins() ), FALSE );

        // Calculate the gate
        for( int z = 0; z <= m_iPinMask; z++ ){
            if( z & iWholeMask ){
                z += (z & iWholeMask) - 1;
                continue;
            }
            pGate->CalcOutput( *m_pcvAmps
                             , iBaseAmp | z
                             , m_pCalculation[ iCalc ].m_cvWorkSpaceIn
                             , m_pCalculation[ iCalc ].m_cvWorkSpaceOut
                             );
        }
    }

}

