
#include "stdafx.h"
#include "time.h"

#include "qlist.h"
#include "qexception.h"
#include "matrix.h"
#include "metaverse.h"
#include "pin.h"
#include "circuit.h"
#include "GateGroup.h"
#include "gate.h"
#include "CalculationThreads.h"

#define new DEBUG_NEW

// Define DB to output loads of diagnostic stuff
// #define DB

// Helper routines --------------------------------------------------------------------

// To Binary
//
// Fills *out with a string which is the binary representation of iValue,
// zero padded to length
void ToBinary( char *out, int iValue, int length )
{
    ASSERT( length>0 && length<32 );
    int iMask = pow2( length - 1 );

    for(; iMask; iMask >>=1 )
        *out++ = ((iValue & iMask) ? '1' : '0');
    *out = 0;
}

ostream& OutputBinary( ostream &os, int iValue, int iLength )
{
    char szOutput[ sizeof( iValue ) * 8 + 2 ];
    ToBinary( szOutput, iValue, iLength );
    return os << szOutput;
}

// gcd
//
// Euclid's algorithm for finding the greatest common divisor
double gcd( double a, double b )
{
    ASSERT( a > 0 );
    ASSERT( b > 0 );
/*
    if( a > b ){
        double d = a / b;
        if( d > 2 )
            a -= floor( d ) * b;
    }
*/
    while( !FLOAT_EQ( a, b) && a > 1 && b > 1 ){
        if( a > b ) 
            a -= b;
        else if( b > a ) 
            b -= a;
    }

    return min( a, b );
}

// powerMod
//
// returns (iX to the power iA) mod iN
//
// We can't use standard floating point algorithms (pow, fmod) 
// as we need the _exact_ result. (iX to the power iA may well be greater
// than 20 digits and doubles are only accurate to 15 digits).
//
// We use the property that a*b (mod n) = (a mod n)*b (mod n)
double powerMod( int iX, int iA, int iN )
{
    double dXAModN = 1;

    for( int iBit = 0; iBit < 32; iBit++ ){
        // For each bit set in iA multiply dXAModN by 
        // iX to the power 2 to the power iBit
        if( iA & pow2( iBit ) ){

            // d will be iX to the power 2 to the power iBit (mod n)
            double d = (double) iX;
            for( int j = 0; j < iBit; j++ )
                d = fmod( d * d, (double) iN );

            dXAModN = fmod( dXAModN * d, (double) iN );
        }
    }

    return dXAModN;
}

// Metaverse -------------------------------------------------------------------------
CMetaverse::CMetaverse( ostream &os )
    : m_os( os )
    , m_pID( NULL )
    , m_pSimCallback( NULL )
    , m_bStopSim( FALSE )
    , m_pCalcThreads( NULL )
    , m_pcomplexOutputAmplitudes( NULL )
    , m_pcomplexInitialAmplitudes( NULL )
{
}

CMetaverse::~CMetaverse()
{
    if( m_pCalcThreads ){
        delete m_pCalcThreads;
        m_pCalcThreads = NULL;
    }

    FreeAmplitudes();
}

// CMetaverse::DumpAmplitudes
//
// Outputs the amplitudes in cvAmps to the stream m_os. Will not dump
// more than 256 amplitudes, and only those which are non-zero.
void CMetaverse::DumpAmplitdues( const CBaseComplexVector &cvAmps, int iBits ) const
{
    CComplex total;
    int iZero = 0;

    // How many amplitudes are zero?
    for( int i = 0; i < cvAmps.Length(); i++ )
        if( cvAmps[i].MagSquared() < FLOAT_EQUALITY )
            iZero ++;

    // If we have more than 256 non-zero amplitudes then don't print any
    if( cvAmps.Length() - iZero > 256 ){
        m_os << "<Too many amplitudes to dump>" << endl;

        for( int i = 0; i < cvAmps.Length(); i++ ){
            total += cvAmps[ i ].MagSquared();
        }
    } else {
        // Otherwise dump all non-zero ampltudes
        for( int i = 0; i < cvAmps.Length(); i++ )
        {
            if( cvAmps[i].MagSquared() > FLOAT_EQUALITY ){
                ::OutputBinary( m_os << "|", i, iBits ) << "> = " << cvAmps[ i ] << endl;

                total += cvAmps[ i ].MagSquared();
            }
        }
    }

    // Output the total probability
    m_os << "Total probability: " << total << endl << endl;
}

// CMetaverse::VerifyCircuitSize
//
// Verify that the circuit is of a size that we can happily simulate
void CMetaverse::VerifyCircuitSize( CCircuit *pCircuit )
{
    ASSERT( pCircuit );

    // Workspace required to simulate. n bits gives 2**n possible combinations,
    // for each of these combinations we must store a complex amplitude
    int iOutputBits = pCircuit->NumberOfOutputBits();
    int iWorkspace = pow2( pCircuit->NumberOfInputBits() );

    if( !pCircuit->AllPinsConnected() )
        throw "Circuit is not a valid quantum circuit. Not all gate pins are connected";
 
    if( pCircuit->NumberOfGates() == 0 )
        throw "The circuit is empty!";

    if( pCircuit->NumberOfInputBits() == 0 )
        throw "A circuit needs input bits in order to be simulated!";

    if( pCircuit->NumberOfInputBits() != iOutputBits )
        throw "Circuit has unequal number of input and output bits (can't be reversible)";

    // Simple simulation can't cope with circuits with more than 32 quantum bits
    // and we can't use more than 1Gb memory (NT user space is 2Gb max)
    if( pCircuit->NumberOfInputBits() >= 32 )
        throw "This circuit is too copmlex to simulate using the simple method.";

    if( iWorkspace * sizeof CComplex >= 1073741824 )
        throw "This circuit would require greater than 1Gb of memory to simulate.";
}

/////////////////////////////////////////////////////////////////////////////////////////
// Initialisation routines
/////////////////////////////////////////////////////////////////////////////////////////

// CMetaverse::SetInitialAmplitudes
//
// Set cvAmps with the initial amplitudes
void CMetaverse::SetInitialAmplitudes( CBaseComplexVector& cvAmps, const CCircuit *pCircuit )
{
    ASSERT( pCircuit );

    switch( m_iCircuitInputs ){
    case InputFromDefault:
        InitialiseFromDefault( cvAmps, pCircuit );
        break;

    case InputFromFile:
        ASSERT( !"Not Implemented yet" );
        break;

    case InputForQFactorisation:
        InitialiseForFactorisation( cvAmps, pCircuit );
        break;
    }
}

// CMetaverse::InitialiseFromDefault
//
// Initialise cvAmps with the default values (i.e. those provided in 
// the construction of pCircuit
//
// pre condition: Assumes that cvAmps[i] is 0, for each i
void CMetaverse::InitialiseFromDefault( CBaseComplexVector &cvAmps, const CCircuit *pCircuit )
{
    // Find the binary value of the inputs
    int iInputValue = 0;

    // for each gate
    for( int i = 0; i < pCircuit->NumberOfGates(); i++ ){
        CGate *pGate = pCircuit->Gate(i);

        // only interested in source gates
        if( pGate->InputPins() == 0 ){
            // assume only one output bit per source gate
            ASSERT( pGate->OutputPins() == 1 );
    
            // If it is set to output 1 then set its corresponding bit in iInputValue
            if( (pGate->CalcOutput( NULL ))[1] == CComplex( 1.0, 0 ) )
                iInputValue |= pow2( pGate->OutputPin(0)->BitNumber() );
        }
    }

    // Set the amplitude of iInputValue to 1, all other amplitudes are 0
    cvAmps.SetElement( iInputValue, 1.0 );
}

// CMetaverse::InitialiseForFactorisation
//
// Initialise cvAmps with values for quantum factorisation, that we set
// equal amplitudes for |x to power a (mod n)>|a>, where x is a random
// number, n is the number to factorise and a ranges over
// 0..(pow2( CircuitSize / 2 ) - 1).
//
// We require that the circuit is an even number of bits and at least 4 bits
// in size
//
// pre condition: Assumes that cvAmps[i] is 0, for each i
void CMetaverse::InitialiseForFactorisation( CBaseComplexVector &cvAmps, const CCircuit *pCircuit )
{
    int iRegisterASize = 2 * pCircuit->NumberOfInputBits() / 3;
    int iRegisterBSize = pCircuit->NumberOfInputBits()- iRegisterASize;
    int iPow2ASize = pow2( iRegisterASize );
    int iPow2BSize = pow2( iRegisterBSize );

    ASSERT( iRegisterASize > 2 );
    ASSERT( iRegisterASize > 1 );
    ASSERT( m_iNumberToFactorise & 1 );

    // get a random number in the range 1..2**RegisterASize - 1
    m_iRandomNumber = 1 + (rand() % (iPow2ASize-1));
    //m_iRandomNumber = 17;
    m_os << "Random number is: " << m_iRandomNumber << endl;

    // Find the amplitude of each output
    double dSuperAmp = 1 / sqrt( (double) iPow2ASize );

    // For each a
    for( int iA = 0; iA < iPow2ASize; iA ++ ){
        // find x to the power a (mod n)
        double dXAModN = powerMod( m_iRandomNumber, iA, m_iNumberToFactorise );
        
        // Check that it is in range and convert to an integer
        ASSERT( dXAModN >= 0 && dXAModN < iPow2BSize );
        int iXAModN = (int) dXAModN;

        // Set the amplitude
        cvAmps.SetElement( iA | (iXAModN << iRegisterASize), dSuperAmp);
    }
}

/////////////////////////////////////////////////////////////////////////////////////////
// Simulation routines
/////////////////////////////////////////////////////////////////////////////////////////

// CMetaverse::StartSimulation
//
// Starts a simulation going. Sets up the parameters and timers and calls Simulate
void CMetaverse::StartSimulation( CCircuit          *pCircuit
                                , SimCallbackFunc   pSimCallback
                                , void              *pID 
                                , SimParams         simParams
                                )
{
    ASSERT( pCircuit );

    // Flag that the simulation has started
    m_bSimulating = TRUE;

    // Catch any errors
    try{
        // Copy in the simulation parameters
        m_pSimCallback          = pSimCallback;
        m_pID                   = pID;
        *((SimParams*)this)     = simParams;

        // Free any old amplitudes which are still hanging around
        FreeAmplitudes();

        ASSERT( !m_pcomplexOutputAmplitudes );
        ASSERT( !m_pcomplexInitialAmplitudes );

        if( m_bSparseVectors ){
            m_pcomplexInitialAmplitudes = new CSparseComplexVector;
            m_pcomplexOutputAmplitudes = new CSparseComplexVector;
        } else {
            m_pcomplexOutputAmplitudes = new CComplexVector;
            m_pcomplexInitialAmplitudes = new CComplexVector;
        }

        // Start off the timer.
        clock_t finish, simstart = clock();

        // Simulate
        Simulate( pCircuit, *m_pcomplexOutputAmplitudes );

        // Stop timing.
        finish = clock();
  
        // Output the circuit results
        OutputResults( *m_pcomplexOutputAmplitudes, pCircuit );

        m_os << endl 
             << "Completed in " 
             << ((double)(finish-simstart) / CLOCKS_PER_SEC) 
             << " seconds. "
             << endl
             << "Maximum memory usage: "
             << m_pcomplexOutputAmplitudes->MemUsage()
             << " bytes "
             << endl;

    } catch (...){
        m_bSimulating = FALSE;
        throw;
    }
}

// CMetaverse::Simulate
//
// Initialises the input amplitudes, kicks off either the simple or complex
// simulation algorithm and outputs the results
void CMetaverse::Simulate( CCircuit *pCircuit, CBaseComplexVector &complexOutputAmps )
{
    ASSERT( pCircuit );

    // Make sure that the random number generator for this thread is seeded.
    srand( (unsigned)time( NULL ) );

//    m_os << *pCircuit << "\n\n";
    pCircuit->OrderGates();
//    m_os << *pCircuit << "\n\n";
    pCircuit->PrepareForProcessing();

    // Check that we can cope with this size of circuit
    VerifyCircuitSize( pCircuit );

    // Workspace required to simulate. n bits gives 2**n possible combinations,
    // for each of these combinations we must store a complex amplitude
    int iInputBits = pCircuit->NumberOfInputBits();
    int iWorkspace = pow2( iInputBits );

    // Create an array of complex amplitudes. Array will be initialised with zeros
    complexOutputAmps.SetLength( iWorkspace );

    // Set the initial amplitudes of the inputs to the circuit
    SetInitialAmplitudes( complexOutputAmps, pCircuit );

    // Store these amplitudes for later reference?
    if( m_bStoreInitialAmps )
        *m_pcomplexInitialAmplitudes = complexOutputAmps;

    // Test bit, dump all amplitudes
    //m_os << "pre-simulation amplitudes:" << endl;
    //DumpAmplitdues( complexOutputAmps, pCircuit->NumberOfInputBits() );

    // Do the actual circuit simulation
    if( m_bSimulateSimply )
        SimulateCircuitSimply( complexOutputAmps, pCircuit );
    else
        SimulateCircuitComplexly( complexOutputAmps, pCircuit );

    if( m_bStopSim ){
        m_os << "Simulation terminated" << endl;
        return;
    }

    // Test bit, dump all amplitudes
    //m_os << "post-simulation amplitudes:" << endl;
    //DumpAmplitdues( complexOutputAmps, pCircuit->NumberOfInputBits() );
}

// CMetaverse::SimulateCircuitSimply
//
// Simulates pCircuit. cvAmps gives the initial state of the circuit and will
// return the final state. Uses a simple algorithm
//
// pre-condition: The gates in pCircuit are ordered in to simulation order
// ( if a depends on the output of b, then b occurs before a in pCircuit )
void CMetaverse::SimulateCircuitSimply( CBaseComplexVector& cvAmps, CCircuit *pCircuit )
{
    ASSERT( pCircuit );
    ASSERT( pCircuit->Ordered() );

    // Find the number of gates which actually require processing
    // work (sources and sinks do not require work)
    int iProcessingGates = pCircuit->NumberOfGates() 
                            - pCircuit->NumberOfInputBits() 
                            - pCircuit->NumberOfOutputBits();

    // iWorkDone is the number of processing Gates completed
    int iWorkDone = 0;

    // For each gate in the circuit
    for( int iThisGate = 0; iThisGate < pCircuit->NumberOfGates(); iThisGate++ ){       
        // define pGate, the current gate that we are working on
        const CGate *pGate = pCircuit->Gate( iThisGate );

        // Ignore input / output gates
        if( pGate->InputPins() == 0 || pGate->OutputPins() == 0 )
            continue;

        // Check that it is valid
        ASSERT( pGate->InputPins() == pGate->OutputPins() );

        // Somebody want us to stop?
        if( m_bStopSim ) return;

        // Inform the user of the progress
        (*m_pSimCallback)( iWorkDone * 1000 / iProcessingGates, m_pID, &cvAmps );

        // Build a mask of the input pin connections to this gate
        int iPinMask = 0;

        for( int j = 0; j < pGate->InputPins(); j++ ){
            iPinMask |= pow2( pGate->InputPin( j ) ->BitNumber() );
        }

#ifdef DB
        m_os << "iPinMask = " << iPinMask << endl << endl;
#endif

        // Create input / output vectors for this gate
        int iInputSize = pow2( pGate->InputPins() );

        // For each amplitude in in the current state
        // - but ignoring amps which refer to our input / output connections
        for( int iBaseAmp = 0; iBaseAmp < cvAmps.Length(); iBaseAmp++ ){

            if(iBaseAmp & iPinMask){
                iBaseAmp += (iBaseAmp & iPinMask) -1;
                continue;
            }

#ifdef DB
            m_os << "Processing base combination " << iBaseAmp << endl << endl;
#endif

            // Get the gate to calculate its output and store in cvAmps
            const_cast<CGate *>(pGate)->CalcOutput(cvAmps, iBaseAmp );
        }

#ifdef DB
        m_os << "Amplitudes after gate :" << i << " (" << pGate->Name() << ")" << endl;
        DumpAmplitdues( cvAmps, pCircuit->NumberOfInputBits() );
#endif
        //m_os << "Memory usage " << cvAmps.MemUsage() << endl;

        // Increment the amount of work done
        iWorkDone++;
    }
    /*
    (*m_pSimCallback)( 1000, m_pID, &cvAmps );
    for( int i = 0; i < 10 && !m_bStopSim; i++ ){
        Sleep( 1000 );
    }
    m_bStopSim = FALSE;
    */
}

// CMetaverse::SimulateCircuitComplexly
//
// Simulates pCircuit. cvAmps gives the initial state of the circuit and will
// return the final state. Uses a simple algorithm
void CMetaverse::SimulateCircuitComplexly( CBaseComplexVector& cvAmps, CCircuit *pCircuit )
{
    const int iMaxBits = 14;

    ASSERT( pCircuit );

    CGateGroup ggAllGates;
    CQList<CGateGroup> listGroups;


    // Add each gate to the gate group ggAllGates
    for( int i = 0; i < pCircuit->NumberOfGates(); i++ ){
        CGate *pGate = pCircuit->Gate( i );

        if( pGate->InputPins() > 0 && pGate->OutputPins() > 0 ){
            ggAllGates.AddGate( pCircuit->Gate( i ) );
        }
    }

    // Partition the group
    PartitionGroup( ggAllGates, iMaxBits, listGroups );

    // Kill off any threads lying around (may have been thrown out of
    // a simulation)
    if( m_pCalcThreads ){
        delete m_pCalcThreads;
        m_pCalcThreads = NULL;
    }

    // Create new threads
    if( m_iMaxThreads > 1 )
        m_pCalcThreads = new CCalculationThreads( m_iMaxThreads, m_iThreadPriority );

    // Simulate each group
    int iWorkDone = 0;
    for( int iWorkingFrom = 0; iWorkingFrom < listGroups.Length(); iWorkingFrom++ ){
        if( m_bStopSim ) return;
        SimulateGroup( cvAmps, pCircuit, listGroups[iWorkingFrom], iWorkDone);
    }

    (*m_pSimCallback)( 1000, m_pID, &cvAmps );
    // Tidy up
    if( m_pCalcThreads ){
        delete m_pCalcThreads;
        m_pCalcThreads = NULL;
    }

}

// CMetaverse::PartitionGroup
//
// Given a gate group ggIn, partition it according to iMaxBits and
// return the list of partitions in listGroupsOut
void CMetaverse::PartitionGroup( const CGateGroup& ggIn
                               , int iMaxBits
                               , CQList<CGateGroup> &listGroupsOut
                               )
{
    // Create a seperate group for each gate in ggIn. This will be
    // ordered in to dependency order
    CQList<CGateGroup> listGates;
    ggIn.ExpandGatesToSeparateGroups( listGates );

    // Sort in to dependency order
    listGates.PartialOrderSort( );
    listGroupsOut.Empty();

    iMaxBits = max( 2, iMaxBits );

    // While there are still gates to process
    for( int iGate = 0; iGate < listGates.Length(); ){
        CGateGroup gg;
        CQList<int> listBits;

        // Add the next gate to the group
        CGate *pGate = const_cast<CGate *>(listGates[iGate++].Gate(0));
        gg.AddGate( pGate );

        // Add its bits to the list of bits
        for( int i = 0; i < pGate->InputPins(); i++ )
            listBits += pGate->InputPin( i )->BitNumber();

        // While there are still gates left and we the list of
        // bits hasn't exceeded iMaxBits, add the new gates to the current
        // group
        while( iGate < listGates.Length() ){
            pGate = const_cast<CGate *>(listGates[iGate].Gate(0));
            for( i = 0; i < pGate->InputPins(); i++ )
                listBits += pGate->InputPin( i )->BitNumber();

            if( listBits.Length() > iMaxBits )
                break;

            iGate++;
            gg.AddGate( pGate );
        }

        // add the current group to the list of groups
        listGroupsOut += gg;
    }

}

// CMetaverse::SimulateGroup
//
// Simulate a group - partition each group in to sub groups and simulate
// each one of them
void CMetaverse::SimulateGroup( CBaseComplexVector& cvAmps
                              , CCircuit *pCircuit
                              , const CGateGroup& listGroups 
                              , int &iWorkDone
                              )
{
    CQList<CGateGroup> listSubGroups;

    const int iMaxBits = 8;

    PartitionGroup( listGroups, iMaxBits, listSubGroups );

    for( int j = 0; j < listSubGroups.Length(); j++ ){
        if( m_bStopSim ) return;
        SimulateSubGroup( cvAmps, pCircuit, listSubGroups[j], iWorkDone );
    }

   
}

// CMetaverse::SimulateSubGroup
//
// Simulate a sub group.
void CMetaverse::SimulateSubGroup( CBaseComplexVector& cvAmps
                                 , CCircuit *pCircuit
                                 , const CGateGroup& ggIn 
                                 , int &iWorkDone
                                 )
{
    ASSERT( pCircuit );

    // Inform the user of progress to date
    int iProcessingGates = pCircuit->NumberOfGates() 
                            - pCircuit->NumberOfInputBits() 
                            - pCircuit->NumberOfOutputBits();

    (*m_pSimCallback)( iWorkDone * 1000 / iProcessingGates, m_pID, &cvAmps );

    // Create a seperate group for each gate in ggIn
    CQList<CGateGroup> listGates;
    ggIn.ExpandGatesToSeparateGroups( listGates );

    // Build a mask of the input pin connections
    int *piGatePinMask = new int[ listGates.Length() ];
    int iPinMask = 0;

    for( int iGate = 0; iGate < listGates.Length(); iGate++ ){
        CGate *pGate = const_cast<CGate *>(listGates[iGate].Gate(0));
        piGatePinMask[iGate] = 0;

        int iInputSize = pow2( pGate->InputPins() );

        for( int j = 0; j < pGate->InputPins(); j++ ){
            int iBitNumber = pGate->InputPin( j ) ->BitNumber();

            iPinMask |= pow2( iBitNumber );
            piGatePinMask[ iGate ] |= pow2( iBitNumber );
        }
    }

    int iBaseAmpBits = (pow2( pCircuit->NumberOfInputBits() ) - 1) ^ iPinMask;
    // For each amplitude in in the current state
    // - but ignoring amps which refer to our input / output connections

    if( m_pCalcThreads )
        m_pCalcThreads->SetCalcData( listGates, cvAmps, piGatePinMask, iBaseAmpBits, iPinMask );

    for( int iBaseAmp = 0; iBaseAmp < cvAmps.Length(); iBaseAmp++ ){

        if(iBaseAmp & iPinMask){
            iBaseAmp += (iBaseAmp & iPinMask) - 1;
            continue;
        }

        if( m_bStopSim ){
            delete [] piGatePinMask;
            return;
        }

        if( m_pCalcThreads )
        {
            //m_os << "async: " << iBaseAmp << endl;
            m_pCalcThreads->CalcGatesAsync( iBaseAmp );
        }
        else {
            // For each gate in the list
            // NOTE: This code is almost exactly copied in the multi-threaded
            // simulation method CCalculationThreads::CalcGate. Any change here
            // should be mirrored CalculatinoThreads.cpp
            for( int i = 0; i < listGates.Length(); i++ ){

                CGate *pGate = const_cast<CGate *>(listGates[i].Gate( 0 ));
                int iWholeMask = piGatePinMask[i] | iBaseAmpBits;

                for( int z = 0; z <= iPinMask; z++ ){
                    if( z & iWholeMask ){
                        z += (z & iWholeMask) - 1;
                        continue;
                    }
                    pGate->CalcOutput( cvAmps, iBaseAmp | z );
                }
            }

        }
    }

    if( m_pCalcThreads )
    {
        m_pCalcThreads->WaitForCompletion();
    }

    delete [] piGatePinMask;

    iWorkDone += listGates.Length();
}


                                 // CMetaverse::F
void CMetaverse::FreeAmplitudes()
{
    if( m_pcomplexOutputAmplitudes ){
        delete m_pcomplexOutputAmplitudes;
        m_pcomplexOutputAmplitudes = NULL;
    }

    if( m_pcomplexInitialAmplitudes ){
        delete m_pcomplexInitialAmplitudes;
        m_pcomplexInitialAmplitudes = NULL;
    }
}


void CMetaverse::OutputResults( CBaseComplexVector &cvAmps, const CCircuit *pCircuit )
{
    if( m_iCircuitOutputs == OutputNothing )
        return;

    ASSERT( pCircuit->NumberOfInputBits() > 0 );

    int iStart, iTo;
    int iBitsFrom, iBitsTo;

    if( m_iBitsToOutput == OutputAllBits ){
        iBitsFrom = 0;
        iBitsTo = pCircuit->NumberOfInputBits() - 1;
    } else {
        ASSERT( m_iBitsFrom >= 0 && m_iBitsFrom < 31 );
        ASSERT( m_iBitsTo   >=  0 && m_iBitsTo   < 31 );

        iBitsFrom = m_iBitsFrom;
        iBitsTo = min( m_iBitsTo, pCircuit->NumberOfInputBits() - 1 );
    }

    iStart = (iBitsFrom == 0) ? 0 : pow2( iBitsFrom );
    iTo    = (iBitsTo   == 0) ? 1 : pow2( iBitsTo + 1 ) - 1;

    ofstream ofs;

    if( m_bOutputToFile ){
        ofs.open( m_szOutputFile );
        if( !ofs )
            m_os << "Cannot open output file \"" << m_szOutputFile << '\"' << endl ;
    }

    ostream *pOutputStream;

    if( m_bOutputToFile && ofs )
        pOutputStream = &ofs;
    else
        pOutputStream = &m_os;
    
    if( m_iCircuitOutputs == OutputProbabilities || m_iCircuitOutputs == OutputAmplitudes ){
        for( int i = iStart; i <= iTo; i++ ){
            if( m_iOutputFormat == OutputBinary ){
                ::OutputBinary( *pOutputStream, i, pCircuit->NumberOfInputBits() );
            } else
                (*pOutputStream) << i;

            (*pOutputStream) << '\t';

            if( m_iCircuitOutputs == OutputProbabilities )
                (*pOutputStream) << cvAmps[i].MagSquared();
            else
                (*pOutputStream) << cvAmps[i];

            (*pOutputStream) << endl;
        }
    } else { // Histogram
        CComplexVector cvProbs( pow2( iBitsTo - iBitsFrom + 1), TRUE );
        
        for( int i = 0; i < cvAmps.Length(); i++ ){
            int iBucket = (i & iTo) >> iBitsFrom;
            cvProbs[ iBucket ] += cvAmps[ i ].MagSquared();
        }

        for( i = 0; i < cvProbs.Length(); i++ ){
            if( m_iOutputFormat == OutputBinary ){
                ::OutputBinary( *pOutputStream, i, iBitsTo - iBitsFrom + 1 );
            } else
                (*pOutputStream) << i;
            
            (*pOutputStream) << '\t' << cvProbs[i] << endl;
        }

        if( m_iCircuitInputs == InputForQFactorisation ){
            int iMaxima = 0;

            for( int i = 0; i < cvProbs.Length(); i++ ){
                int iBefore = ( i + cvProbs.Length() - 1 ) % cvProbs.Length();
                int iAfter  = ( i + 1 )  % cvProbs.Length();

                if( cvProbs[ i ].Real > 0.001
                 && cvProbs[ iBefore ].Real < cvProbs[ i ].Real
                 && cvProbs[i].Real > cvProbs[ iAfter ].Real
                  ){
                    ++iMaxima;
                }
            }

            m_os << "\nNumber of maxima = " << iMaxima;
            if( iMaxima & 1 )
                m_os << " - maxima odd - can't estimate factor" << endl;
            else {
                double dEstimate = gcd( powerMod( m_iRandomNumber
                                                , iMaxima / 2
                                                , m_iNumberToFactorise
                                                ) - 1
                                      , m_iNumberToFactorise
                                      );

                if( FLOAT_EQ( dEstimate, 1 ) )
                    m_os << ". Esimation failed - x**r = -1 (mod n) (perhaps " 
                         << m_iNumberToFactorise << " is a prime power?)" << endl;
                else
                    m_os << ". Estimate of a factor of " << m_iNumberToFactorise << " is " 
                         << dEstimate
                         << endl;
            }
        }
    }

}



//----- Test stuff
// #include "MetaverseTests.cpp"

