/*
 *  CPUID Benchmark Version 2.0 Copyright (c) Roy Longbottom 2004 
 *
 * The program uses special CPU instructions identify the type of CPU 
 * and to measure speed in MHz. Then it executes add instructions via 
 * 1, 2, 3 and 4 registers to identify maximum speeds of integer, 
 * floating point and MMX areas. The sum is checked for a simple aid 
 * on reliability. Results can be saved in a log file. Version 2.0 
 * includes tests for SSE, SSE2 and 3DNow instructions, when 
 * available. Version 2.0 also includes double precision floating 
 * point calculations besides single precision. These generally 
 * produce the same speeds but are provided to compare with results of 
 * the new instructions.
 *
 * Performance is measured in terms of Integer Millions of 
 * Instructions Per Second (Integer MIPS) and Millions of Floating 
 * Point Operations Per Second (MFLOPS). For MMX instructions, that 
 * use 64 bit registers, Version 1 showed MMX MIPS. As two 32 bit 
 * integers are added via one instruction, new results are twice those 
 * in Version 1, with a different definition. Tests now run are for 
 * the following: 
 *
 *              32 bit Integer MIPS
 *              32 bit Float MFLOPS 
 *              64 bit Float MFLOPS 
 *              32 bit MMX Integer MIPS
 *              32 bit SSE MFLOPS 
 *              64 bit SSE2 MFLOPS 
 *              32 bit 3DNow MFLOPS
 *
 *  Results can be saved in a text log file, default CPUInfo.txt. The
 *  tests also check numeric results for correct addition.
 *
 *  Modules
 *
 *  Config.cpp - via WhatConfig() identifies CPU type/speed and OS version
 *     plus whether support for MMX, SSE, SSE2 and 3DNow is available
 *
 *  CPUID.rc - menus and dialog boxes
 *     MENU_ABOUT         - AboutDlgProc()
 *     MENU_RUNALL        - runAllTests();
 *     MENU_SAVE          - GetFileName() and saveData()
 *     MENU_EXIT          - CleanUpAndPostQuit()
 *     Dialog TEST_PANEL  - TestPanel()
 *     Dialog AboutBox    - AboutDlgProc()
 *
 *  CPUID.h and CPUID.ico
 *
 *  CPUID.cpp
 *
 *  Functions
 *
 *  WinMain() - usual Windows functions
 *  InitApp() - usual Windows functions, allocates variable values,
 *     uses WhatConfig() to determine support for MMX, SSE, SSE2, 3DNow
 *     and displayWhatever(), processes command line.
 *  WindowProc() - usual Windows functions for menus   
 *  AboutDlgProc() - displays About text
 *  CleanUpAndPostQuit() - set up exit
 *  TestPanel() - dialog box displayed at start with information and
 *     test results, the tests being run on start up. Three buttons
 *     are provided:
 *           Re-Run - RUN_ALL     - as MENU_RUNALL
 *           Save   - SAVE        - as MENU_SAVE
 *           Exit   - EXIT_BUTTON - as MENU_EXIT
 *  displayWhatever() clears and redisplays TestPanel()
 *  runAllTests() - Allocates some memory, runs local_time(),         
 *  then runs the following assembly functions when the options are
 *  available, using start_time() and getRunSeconds():
 *
 *         mips1Reg()    - Integer MIPS
 *         mips2Reg()
 *         mips3Reg()
 *         mips4Reg()
 *        
 *         mflopD1Reg()  - Double Precision Floating Point MFLOPS
 *         mflopD2Reg()
 *         mflopD3Reg()
 *         mflopD4Reg()
 *        
 *         mflopS1Reg()  - Single Precision Floating Point MFLOPS
 *         mflopS2Reg()
 *         mflopS3Reg()
 *         mflopS4Reg()
 *        
 *         mmx1Reg()     - MMX 32 bit Integer MIPS
 *         mmx2Reg()
 *         mmx3Reg()
 *         mmx4Reg()
 *        
 *         SSE1Reg()     - SSE1 Single Precision Floating Point MFLOPS
 *         SSE2Reg()
 *         SSE3Reg()
 *         SSE4Reg()
 *        
 *         SSE21Reg()    - SSE2 Double Precision Floating Point MFLOPS
 *         SSE22Reg()
 *         SSE23Reg()
 *         SSE24Reg()
 *        
 *         Now3D1Reg()   - 3DNow Single Precision Floating Point MFLOPS
 *         Now3D2Reg()
 *         Now3D3Reg()
 *         Now3D4Reg()
 *
 *  Each is executed 10 times and the maximum speed noted.
 *
 *  start_time() - start time from high resolution timer
 *  getRunSeconds() - running time from high resolution timer
 *  local_time() - date/time of day
 *  saveData() - save results in log file
 *  GetFileName() - uses standard Windows Dialog to select
 *     device/path/file name for log file
 */
 
#define NAME "Test"
#define TITLE " Roy Longbottom's CPU ID Program "
#define INITGUID

#include <windows.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <xmmintrin.h>
#include <time.h>

#include "CpuID.h"

//  ************** PROTOPTYPES *****************

BOOL FAR PASCAL AboutDlgProc( HWND, UINT, WPARAM, LPARAM);
static BOOL InitApp(HINSTANCE, LPSTR, int);
long FAR PASCAL WindowProc(HWND, UINT, WPARAM, LPARAM);                  
static void CleanUpAndPostQuit(void);
static BOOL GetFileName();
void displayWhatever(HWND);

BOOL  FAR PASCAL TestPanel( HWND, UINT, WPARAM, LPARAM);
BOOL  runAllTests();

int  mips1Reg(void);
int  mips2Reg(void);
int  mips3Reg(void);
int  mips4Reg(void);

double mflopD1Reg(void);
double mflopD2Reg(void);
double mflopD3Reg(void);
double mflopD4Reg(void);

float mflopS1Reg(void);
float mflopS2Reg(void);
float mflopS3Reg(void);
float mflopS4Reg(void);

void mmx1Reg(void);
void mmx2Reg(void);
void mmx3Reg(void);
void mmx4Reg(void);

void SSE1Reg(void);
void SSE2Reg(void);
void SSE3Reg(void);
void SSE4Reg(void);

void SSE21Reg(void);
void SSE22Reg(void);
void SSE23Reg(void);
void SSE24Reg(void);

void Now3D1Reg(void);
void Now3D2Reg(void);
void Now3D3Reg(void);
void Now3D4Reg(void);

void local_time();
void start_time();
double getRunSeconds(void);

BOOL saveData();

// ************* GLOBAL VARIABLES **********
 
int     mmxSum[2];
double * dsum;
float  * ssum1;
float  * ssumA;


char   version[30] = "Version 2.0";

BOOL bQuit = FALSE;

HWND   mainHWND;
HWND   paramHwnd = NULL;
HINSTANCE  MyInstance;

char    timeday[30];

LARGE_INTEGER starttime;
double        runSeconds;

char    writeMsg[200];
char    windowText[256];
char    commandLine[1024] = "";

__m64   mmxa = { 0x0000000100000001 };
__m64   mmx0 = { 0x0000000000000000 };

int  runStage = -2;

int  mipsReg[4];
int  mflopsSP[4];
int  mflopsDP[4];
int  mmxMipsReg[4];
int  SSEmflops[4];
int  SSE2mflops[4];
int  Now3Dmflops[4];

BOOL  firstRun = TRUE;

BOOL  correctInt;
BOOL  correctDP;
BOOL  correctSP;
BOOL  correctMMX;
BOOL  correctSSE;
BOOL  correctSSE2;
BOOL  correct3DNow;

BOOL  autoRun = FALSE;

unsigned int intCount;
float         FPSCount;
double        FPDCount;
double        MMXcount;
double        SSEcount;
double        SSE2count;
double        Now3Dcount;

char fileName[256] = "CPUInfo.txt";


void displayWhatever(HWND hwnd)
{
    if (paramHwnd)   DestroyWindow(paramHwnd);
    sprintf(windowText, " Roy Longbottom's CPU ID and Speed Test");
    SetWindowText(mainHWND, windowText);        
    paramHwnd = CreateDialog( MyInstance,"TEST_PANEL", hwnd, (DLGPROC)TestPanel);
          
} // displayWhatever

/*
 * WinMain - initialization, message loop
 */
int PASCAL WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
                     LPSTR cmdline, int cmdshow)
{
    MSG         msg;
    
    hPrevInstance = hPrevInstance;

    if (!InitApp(hInstance, cmdline, cmdshow))
         return FALSE;

    if (autoRun)
    {
       if (!runAllTests())
       {
          CleanUpAndPostQuit();
       }
       if (!bQuit)
       {
          if (paramHwnd) DestroyWindow(paramHwnd);            
          saveData();
          CleanUpAndPostQuit();
       }
    }

    while(!bQuit)
    {
        if( PeekMessage( &msg, NULL, 0, 0, PM_NOREMOVE ) )
        {
            
           if (firstRun)
           {
               if (!runAllTests())
               {
                  CleanUpAndPostQuit();
                  break;
               }
               firstRun = FALSE;
               displayWhatever(mainHWND);
           }     
           if (msg.message == WM_QUIT)
           {
                CleanUpAndPostQuit();
                break;
           }
   
           if( !GetMessage( &msg, NULL, 0, 0 ) ) return msg.wParam;
           TranslateMessage(&msg);
           DispatchMessage(&msg);
        }
        else
        {
           // make sure we go to sleep if we have nothing else to do
           WaitMessage();
        }
    }
    return msg.wParam;
    
} /* WinMain */

/****************************************************************************/
/*                   Initialization and object creation                     */
/****************************************************************************/
/*
 * InitApp
 * Creates window
 */
static BOOL
InitApp(HINSTANCE hInstance, LPSTR cmdline, int cmdshow)
{
    HWND hwnd;
    WNDCLASS wc;
    int   i, j;
    char *delims = {" ,"};
    char commands[20][256];
    char *p;
    char buf[256] = "";
  
    /*
     * set up and registers the window class
     */
    wc.style = CS_HREDRAW | CS_VREDRAW;
    wc.lpfnWndProc = WindowProc;
    wc.cbClsExtra = 0;
    wc.cbWndExtra = sizeof(DWORD);
    wc.hInstance = hInstance;
    wc.hIcon = LoadIcon(hInstance, "GenericIcon");
    wc.hCursor = LoadCursor(NULL, IDC_ARROW);
    wc.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH);
    wc.lpszMenuName = "GenericMenu";
    wc.lpszClassName = NAME;
    if (!RegisterClass(&wc))
        return FALSE;

    /*
     * Create the window
     */

    hwnd =
        CreateWindow(
            NAME,
            TITLE,
            WS_OVERLAPPEDWINDOW | WS_HSCROLL | WS_VSCROLL,
            0,
            0,
            640,
            480,
            NULL, 
            NULL, 
            hInstance, 
            NULL 
        );

    if (!hwnd) return FALSE;

    mainHWND = hwnd;
    strcpy(commandLine, strupr(cmdline));
   
    WhatConfig();
    
    i = 0;
    p = strtok(commandLine, delims);
    while (p != NULL)
    {
        strcpy(commands[i], p);
        strcat(buf, commands[i]);
        strcat(buf, " ");
        i = i + 1;
        if (i > 8) break;
        p = strtok(NULL, delims);
    }
    if (i > 0)
    {
        strcpy(commandLine, buf);
    }
    else
    {
        strcpy(commandLine, "None");
    }
    autoRun = FALSE;
    for (j=0; j<i; j++)
    {
        switch(commands[j][0])
        {            
            case 'A': // Auto Run
             autoRun = TRUE;
             break;            


            case 'L': // log file
             if (j < i -1)
             {
                 sscanf(commands[j+1], "%s", fileName);
             }
             break;
        }
        if (strnicmp(commands[j],  "NOMMX",  5) == 0) hasMMX = FALSE; 
        if (strnicmp(commands[j],  "NOSSE1", 6) == 0) hasSSE = FALSE; 
        if (strnicmp(commands[j],  "NOSSE2", 6) == 0) hasSSE2 = FALSE; 
        if (strnicmp(commands[j],  "NO3DNO", 6) == 0) has3DNow = FALSE; 
    }

    // Display the window

    ShowWindow(hwnd, cmdshow);    
    UpdateWindow(hwnd);

    displayWhatever(mainHWND);
    
    return TRUE;
    
}  // InitApp


/*
 * WindowProc - handle messages for the main application window
 */

long FAR PASCAL WindowProc(HWND hwnd, UINT msg,
                            WPARAM wparam, LPARAM lparam)                  
{
       
    switch( msg )
    {
    case WM_ACTIVATEAPP:
        break;
        
    case WM_INITMENUPOPUP:
        break;
        
    case WM_CREATE:
        break;

    case WM_DESTROY:
        CleanUpAndPostQuit();
        break;

    case WM_COMMAND:
        switch( LOWORD( wparam ) )
        {
        case MENU_ABOUT:
            DialogBox( MyInstance,"AboutBox", hwnd, (DLGPROC)AboutDlgProc );
            break;

       case MENU_RUNALL:
            if (!runAllTests()) CleanUpAndPostQuit();
            displayWhatever(mainHWND);                                     
            break;

       case MENU_SAVE:
            if (paramHwnd)   DestroyWindow(paramHwnd);            
            if( GetFileName())
            {
               saveData();
            }
            displayWhatever(mainHWND);
            break;

        case MENU_EXIT:
            CleanUpAndPostQuit();
            break;
                
        }
        break;
   
        case WM_PAINT:
             break;
           
        case WM_KEYDOWN:
            break;

       case WM_ACTIVATE:
            break;
    } 
    return DefWindowProc( hwnd, msg, wparam, lparam );

} /* WindowProc */



/*
 * AboutDlgProc - processes messages for the about dialog.
 */
BOOL FAR PASCAL AboutDlgProc( HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam )
{
    lparam = lparam;

    switch( msg ) {
    case WM_INITDIALOG:
        return( TRUE );

    case WM_COMMAND:
        if( LOWORD( wparam ) == IDOK ) {
            EndDialog( hwnd, TRUE );
            return( TRUE );
        }
        break;
    }
    return( FALSE );

} /* AboutDlgProc */



  
void local_time()
{

    time_t time_of_day;
    time_of_day = time( NULL ); 
    strcpy (timeday, ctime(&time_of_day)); // timeday = date and time
    return;
}



/*
 * CleanUpAndPostQuit
 * Release all D3DRM objects, post a quit message and set the bQuit flag
 */
void
CleanUpAndPostQuit(void)
{
    bQuit = TRUE;
    return;
}


BOOL FAR PASCAL TestPanel( HWND hdlg, UINT msg,
                                WPARAM wparam, LPARAM lparam )
{
    wparam = wparam;
    lparam = lparam;
    char     tab = 9;
    char   lineSpace[2] = " ";
    int FAR  tabs[4] = {80, 125, 170, 215};
    hdlg = hdlg;

     switch( msg )
     {
       case WM_INITDIALOG:

        SendDlgItemMessage(hdlg, DATA_LIST, LB_SETTABSTOPS, (WPARAM) 4,
                                                 (LPARAM) (int FAR*) tabs);
        SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) configData1);
        if (hasSSEOS || hasSSE2OS)
        {
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) configData5);
        }
        SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) configData2);
        if (hasMMX || hasSSE || hasSSE2)
        {
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) configData4);
        }

        if (runStage == -1)
        {
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                            (LPARAM) lineSpace);
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                (LPARAM) " Integer Test Running");
        }
        if (runStage > -1)
        {
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) lineSpace);
            sprintf(writeMsg, " Speeds adding to %c 1 Register %c2 Registers"
                              " %c3 Registers %c4 Registers", tab, tab, tab, tab); 
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);

            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) lineSpace);

            sprintf(writeMsg, " 32 bit Integer MIPS %c%8d %c%8d %c%8d %c%8d",
                tab, mipsReg[0], tab, mipsReg[1], tab, mipsReg[2], tab, mipsReg[3]);    
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);
            if (runStage == 0)
            {
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                                (LPARAM) lineSpace);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                    (LPARAM) " SP Floating Point Test Running");
            }
        }
        if (runStage > 0)
        {
            sprintf(writeMsg, " 32 bit Float MFLOPS %c%8d %c%8d %c%8d %c%8d",
                tab, mflopsSP[0], tab, mflopsSP[1], tab, mflopsSP[2], tab, mflopsSP[3]);    
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);

            if (runStage == 1)
            {
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                                (LPARAM) lineSpace);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                    (LPARAM) " DP Floating Point Test Running");
            }
        }
        if (runStage > 1)
        {
            sprintf(writeMsg, " 64 bit Float MFLOPS %c%8d %c%8d %c%8d %c%8d",
                tab, mflopsDP[0], tab, mflopsDP[1], tab, mflopsDP[2], tab, mflopsDP[3]);    
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);
            if (runStage == 2)
            {
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                                (LPARAM) lineSpace);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                    (LPARAM) " MMX Test Running");
            }
        }
        if (runStage > 2)
        {
            if (hasMMX)
            {
                sprintf(writeMsg, " 32 bit MMX Int MIPS %c%8d %c%8d %c%8d %c%8d",    
                 tab, mmxMipsReg[0], tab, mmxMipsReg[1], tab, mmxMipsReg[2], tab, mmxMipsReg[3]);    
                 SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);
            }
            if (runStage == 3)
            {
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                                (LPARAM) lineSpace);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                    (LPARAM) " SSE Floating Point Test Running");
            }
        }
        if (runStage > 3)
        {
            if (hasSSE && hasSSEOS)
            {
                sprintf(writeMsg, " 32 bit SSE MFLOPS %c%8d %c%8d %c%8d %c%8d",    
                 tab, SSEmflops[0], tab, SSEmflops[1], tab, SSEmflops[2], tab, SSEmflops[3]);    
                 SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);
            }
            if (runStage == 4)
            {
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                                (LPARAM) lineSpace);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                    (LPARAM) " SSE2 Floating Point Test Running");
            }
        }
        if (runStage > 4)
        {
            if (hasSSE2 && hasSSE2OS)
            {
                sprintf(writeMsg, " 64 bit SSE2 MFLOPS %c%8d %c%8d %c%8d %c%8d",    
                 tab, SSE2mflops[0], tab, SSE2mflops[1], tab, SSE2mflops[2], tab, SSE2mflops[3]);    
                 SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);
            }
            if (runStage == 5)
            {
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                                (LPARAM) lineSpace);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,
                    (LPARAM) " 3DNow Floating Point Test Running");
            }
        }
        if (runStage > 5)
        {
            if (has3DNow)
            {
                sprintf(writeMsg, " 32 bit 3DNow MFLOPS %c%8d %c%8d %c%8d %c%8d",    
                tab, Now3Dmflops[0], tab, Now3Dmflops[1], tab, Now3Dmflops[2], tab, Now3Dmflops[3]);    
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) writeMsg);
            }
        }
        if (runStage > 9)
        {
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) lineSpace);
            if (correctInt)
            {
                sprintf(writeMsg, " 32 bit Integer MIPS %c 810M instructions of r=r+1 correct result", tab);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
            }
            else
            {
                sprintf(writeMsg, " 32 bit Integer MIPS %c ERRORS INCORRECT ADDITIONS", tab);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
            }
            if (correctSP)
            {
                sprintf(writeMsg, " 32 bit Float MFLOPS %c 810M instructions of r=r+1 correct result", tab);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
            }
            else
            {
                sprintf(writeMsg, " 32 bit Float MFLOPS %c ERRORS INCORRECT ADDITIONS", tab);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
            }
            if (correctDP)
            {
                sprintf(writeMsg, " 64 bit Float MFLOPS %c 810M instructions of r=r+1 correct result", tab);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
            }
            else
            {
                sprintf(writeMsg, " 64 bit Float MFLOPS %c ERRORS INCORRECT ADDITIONS", tab);
                SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
            }
            if (hasMMX)
            {
                if (correctMMX)
                {
                    sprintf(writeMsg, " 32 bit MMX Int MIPS %c 810M instructions of r=r+1 correct result, 2 adds/instruction", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }
                else
                {
                    sprintf(writeMsg, " 32 bit MMX Int MIPS %c ERRORS INCORRECT ADDITIONS", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }
            }
            if (hasSSE && hasSSEOS)
            {
                if (correctSSE)
                {
                    sprintf(writeMsg, " SSE 32 bit MFLOPS %c 810M instructions of r=r+1 correct result, 4 adds/instruction", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }
                else
                {
                    sprintf(writeMsg, " SSE 32 bit MFLOPS %c ERRORS INCORRECT ADDITIONS", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }                
            }
            if (hasSSE2 && hasSSE2OS)
            {
                if (correctSSE2)
                {
                    sprintf(writeMsg, " SSE2 64 bit MFLOPS %c 810M instructions of r=r+1 correct result, 2 adds/instruction", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }
                else
                {
                    sprintf(writeMsg, " SSE2 64 bit MFLOPS %c ERRORS INCORRECT ADDITIONS", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }                
            }
            if (has3DNow)
            {
                if (correct3DNow)
                {
                    sprintf(writeMsg, " 32 bit 3DNow MFLOPS%c 810M instructions of r=r+1 correct result, 2 adds/instruction", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }
                else
                {
                    sprintf(writeMsg, " 32 bit 3DNow MFLOPS%c ERRORS INCORRECT ADDITIONS", tab);
                    SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0,(LPARAM) writeMsg);
                }                
            }

            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) lineSpace);
            SendDlgItemMessage(hdlg, DATA_LIST, LB_ADDSTRING, 0, (LPARAM) " Test Finished");
            SendDlgItemMessage(hdlg, DATA_LIST, LB_SETCURSEL, 17, 0L);
        }

        return( TRUE );
                    
       case WM_COMMAND:
 
        if( HIWORD( wparam ) == LBN_SELCHANGE)
        {
             displayWhatever(mainHWND);
        }
        else if( HIWORD( wparam ) == BN_CLICKED)
        {
            switch (LOWORD (wparam))
            {
                case RUN_ALL:
                 PostMessage(mainHWND, WM_COMMAND, MENU_RUNALL, 0);
                 break;

                case SAVE:
                 PostMessage(mainHWND, WM_COMMAND, MENU_SAVE, 0);
                 break;

                case EXIT_BUTTON:
                 PostMessage(mainHWND, WM_COMMAND, MENU_EXIT, 0);
                 break;
            }
            displayWhatever(mainHWND);
        }

        return( TRUE );         
     }
     return( FALSE );
    
} // TestPanel


void start_time(void)
{
    QueryPerformanceCounter(&starttime);
}
 
double getRunSeconds(void)
{
    LARGE_INTEGER liDiff;
    LARGE_INTEGER liFreq;

    QueryPerformanceCounter(&liDiff);

    liDiff.QuadPart -= starttime.QuadPart;

    (void)QueryPerformanceFrequency(&liFreq);
    
    runSeconds = (double)liDiff.QuadPart / (double) liFreq.QuadPart;
    return runSeconds;
}

BOOL  runAllTests()
{
   int i, mips, max, count;
   int mflops;
   double countD;
   float  countS;
   unsigned int  totalI1  = 22000000;
   unsigned int  totalI2  = 23000000;
   double         totalSP1 = 20000000;
   double         totalSP2 = 21000000;
   double         totalD1  = 20000000;
   double         totalD2  = 21000000;
   double         totalM1  = 20000000;
   double         totalM2  = 21000000;
   double         totalA1  = 40000000;
   double         totalA2  = 42000000;
   double         totalS21 = 40000000;
   double         totalS22 = 42000000;
   double         totalS1  = 80000000;
   double         totalS2  = 84000000;

   dsum = (double *)_aligned_malloc(64, 16);
   if (dsum == NULL)
   {
        sprintf(writeMsg, "Cannot allocate memory");
        if (!autoRun) MessageBox(NULL, writeMsg, "ERROR WILL EXIT", MB_ICONWARNING | MB_OK);
        CleanUpAndPostQuit();
        return FALSE;
   }        
   ssum1 = (float *)_aligned_malloc(64, 16);
   if (ssum1 == NULL)
   {
        sprintf(writeMsg, "Cannot allocate memory");
        if (!autoRun) MessageBox(NULL, writeMsg, "ERROR WILL EXIT", MB_ICONWARNING | MB_OK);
        _aligned_free(dsum);
        CleanUpAndPostQuit();
        return FALSE;
   }        
   ssumA = (float *)_aligned_malloc(64, 16);
   if (ssumA == NULL)
   {
        sprintf(writeMsg, "Cannot allocate memory");
        if (!autoRun) MessageBox(NULL, writeMsg, "ERROR WILL EXIT", MB_ICONWARNING | MB_OK);
        _aligned_free(ssum1);
        _aligned_free(dsum);
        CleanUpAndPostQuit();
        return FALSE;
   }        

   local_time();
   
   runStage = -1;
   correctInt = TRUE;
   correctSP  = TRUE;
   correctDP  = TRUE;
   correctMMX = TRUE;
   correctSSE = TRUE;
   correctSSE2 = TRUE;
   correct3DNow = TRUE;

   mmxSum[0] = 0;
   mmxSum[1] = 0;

   intCount = 0;
   FPSCount  = 0;
   FPDCount  = 0;
   MMXcount = 0;
   SSEcount = 0;
   SSE2count = 0;
   Now3Dcount = 0;

   max = 0;
   for (i=0; i<10; i++)
   {
       count = intCount;
       start_time();
       mips1Reg();
       getRunSeconds();
       count = intCount - count;       
       mips = (int)((double)count / 1000000.0 / runSeconds + 0.5);
       if(mips > max) max = mips;
   }       
   mipsReg[0] = max;

   max = 0;
   for (i=0; i<10; i++)
   {
       count = intCount;
       start_time();
       mips2Reg();
       getRunSeconds();
       count = intCount - count;       
       mips = (int)((double)count / 1000000.0 / runSeconds + 0.5);
       if(mips > max) max = mips;
   }
   mipsReg[1] = max;

   max = 0;
   for (i=0; i<10; i++)
   {
       count = intCount;
       start_time();
       mips3Reg();
       getRunSeconds();
       count = intCount - count;       
       mips = (int)((double)count / 1000000.0 / runSeconds + 0.5);
       if(mips > max) max = mips;
   }
   mipsReg[2] = max;
   
   max = 0;
   for (i=0; i<10; i++)
   {
       count = intCount;
       start_time();
       mips4Reg();
       getRunSeconds();
       count = intCount - count;       
       mips = (int)((double)count / 1000000.0 / runSeconds + 0.5);
       if(mips > max) max = mips;
   }
   if(intCount != (totalI1 * 3 + totalI2) * 10) correctInt = FALSE;

   mipsReg[3] = max;

   runStage = 0;
   displayWhatever(mainHWND);

   mflopsSP[0] = 0;
   mflopsSP[1] = 0;
   mflopsSP[2] = 0;
   mflopsSP[3] = 0;

   max = 0;
   for (i=0; i<10; i++)
   {
       countS = FPSCount;
       start_time();
       mflopS1Reg();       
       getRunSeconds();
       countS = FPSCount - countS;
       mflops = (int)(countS / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsSP[0] = max;

   max = 0;
   for (i=0; i<10; i++)
   {
       countS = FPSCount;
       start_time();
       FPSCount = mflopS2Reg();       
       getRunSeconds();
       countS = FPSCount - countS;
       mflops = (int)(countS / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsSP[1] = max;

   max = 0;
   for (i=0; i<10; i++)
   {
       countS = FPSCount;
       start_time();
       FPSCount = mflopS3Reg();       
       getRunSeconds();
       countS = FPSCount - countS;
       mflops = (int)(countS / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsSP[2] = max;

   max = 0;
   for (i=0; i<10; i++)
   {
       countS = FPSCount;
       start_time();
       FPSCount = mflopS4Reg();       
       getRunSeconds();
       countS = FPSCount - countS;
       mflops = (int)(countS / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsSP[3] = max;
   if(FPSCount != (totalSP1 * 3 + totalSP2) * 10) correctSP = FALSE;

   runStage = 1;
   displayWhatever(mainHWND);

   mflopsDP[0] = 0;
   mflopsDP[1] = 0;
   mflopsDP[2] = 0;
   mflopsDP[3] = 0;

   max = 0;
   for (i=0; i<10; i++)
   {
       countD = FPDCount;
       start_time();
       mflopD1Reg();       
       getRunSeconds();
       countD = FPDCount - countD;
       mflops = (int)(countD / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsDP[0] = max;


   max = 0;
   for (i=0; i<10; i++)
   {
       countD = FPDCount;
       start_time();
       FPDCount = mflopD2Reg();       
       getRunSeconds();
       countD = FPDCount - countD;
       mflops = (int)(countD / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsDP[1] = max;


   max = 0;
   for (i=0; i<10; i++)
   {
       countD = FPDCount;
       start_time();
       FPDCount = mflopD3Reg();       
       getRunSeconds();
       countD = FPDCount - countD;
       mflops = (int)(countD / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsDP[2] = max;

   max = 0;
   for (i=0; i<10; i++)
   {
       countD = FPDCount;
       start_time();
       FPDCount = mflopD4Reg();       
       getRunSeconds();
       countD = FPDCount - countD;
       mflops = (int)(countD / 1000000.0 / runSeconds + 0.5);
       if(mflops > max) max = mflops;
   }
   mflopsDP[3] = max;
   if(FPDCount != (totalD1 * 3 + totalD2) * 10) correctDP = FALSE;

   runStage = 2;
   displayWhatever(mainHWND);

   if (hasMMX)
   {
       mmxMipsReg[0] = 0;
       mmxMipsReg[1] = 0;
       mmxMipsReg[2] = 0;
       mmxMipsReg[3] = 0;
       max = 0;
       for (i=0; i<10; i++)
       {    
           start_time();
           mmx1Reg();
           getRunSeconds();
           mips = (int)(totalM1 / 1000000.0 / runSeconds + 0.5);
           if(mips > max) max = mips;
       }
       mmxMipsReg[0] = max * 2;
       
       max = 0;
       for (i=0; i<10; i++)
       {
           start_time();
           mmx2Reg();
           getRunSeconds();
           mips = (int)(totalM1 / 1000000.0 / runSeconds + 0.5);
           if(mips > max) max = mips;
       }
       mmxMipsReg[1] = max * 2;

       max = 0;
       for (i=0; i<10; i++)
       {
           start_time();
           mmx3Reg();
           getRunSeconds();
           mips = (int)(totalM2 / 1000000.0 / runSeconds + 0.5);
           if(mips > max) max = mips;
       }
       mmxMipsReg[2] = max * 2;

       max = 0;
       for (i=0; i<10; i++)
       {
           start_time();
           mmx4Reg();
           getRunSeconds();
           mips = (int)(totalM1 / 1000000.0 / runSeconds + 0.5);
           if(mips > max) max = mips;
       }
       mmxMipsReg[3] = max * 2;
       MMXcount = ((double) mmxSum[0] + (double) mmxSum[1]) / 2;
       if(MMXcount != (totalM1 * 3 + totalM2) * 10) correctMMX = FALSE;
   }
   runStage = 3;
   displayWhatever(mainHWND);

   if (hasSSE && hasSSEOS)
   {
       ssum1[4] = 1;
       ssum1[5] = 1;
       ssum1[6] = 1;
       ssum1[7] = 1;
       max = 0;
       for (i=0; i<10; i++)
       {    
           ssum1[0] = 0;
           ssum1[1] = 0;
           ssum1[2] = 0;
           ssum1[3] = 0;
           start_time();
           SSE1Reg();
           getRunSeconds();
           mflops = (int)(totalS1 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSEcount = SSEcount + (double) ssum1[0] + (double) ssum1[1]
                               + (double) ssum1[2] + (double) ssum1[3];
       }
       SSEmflops[0] = max;

      max = 0;
       for (i=0; i<10; i++)
       {
           ssum1[0] = 0;
           ssum1[1] = 0;
           ssum1[2] = 0;
           ssum1[3] = 0;
           start_time();
           SSE2Reg();
           getRunSeconds();
           mflops = (int)(totalS1 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSEcount = SSEcount + (double) ssum1[0] + (double) ssum1[1]
                               + (double) ssum1[2] + (double) ssum1[3];
       }
       SSEmflops[1] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           ssum1[0] = 0;
           ssum1[1] = 0;
           ssum1[2] = 0;
           ssum1[3] = 0;
           start_time();
           SSE3Reg();
           getRunSeconds();
           mflops = (int)(totalS2 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSEcount = SSEcount + (double) ssum1[0] + (double) ssum1[1]
                               + (double) ssum1[2] + (double) ssum1[3];
       }
       SSEmflops[2] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           ssum1[0] = 0;
           ssum1[1] = 0;
           ssum1[2] = 0;
           ssum1[3] = 0;
           start_time();
           SSE4Reg();
           getRunSeconds();
           mflops = (int)(totalS1 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSEcount = SSEcount + (double) ssum1[0] + (double) ssum1[1]
                               + (double) ssum1[2] + (double) ssum1[3];
       }
       SSEmflops[3] = max;
       if(SSEcount != (totalS1 * 3 + totalS2) * 10) correctSSE = FALSE;
   }

   runStage = 4;
   displayWhatever(mainHWND);

   if (hasSSE2 && hasSSE2OS)
   {
       dsum[2] = 1;
       dsum[3] = 1;
       max = 0;
       for (i=0; i<10; i++)
       {    
           dsum[0] = 0;
           dsum[1] = 0;
           start_time();
           SSE21Reg();
           getRunSeconds();
           mflops = (int)(totalS21 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSE2count = SSE2count + dsum[0] + dsum[1];
       }
       SSE2mflops[0] = max;

      max = 0;
       for (i=0; i<10; i++)
       {
           dsum[0] = 0;
           dsum[1] = 0;
           start_time();
           SSE22Reg();
           getRunSeconds();
           mflops = (int)(totalS21 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSE2count = SSE2count + dsum[0] + dsum[1];
       }
       SSE2mflops[1] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           dsum[0] = 0;
           dsum[1] = 0;
           start_time();
           SSE23Reg();
           getRunSeconds();
           mflops = (int)(totalS22 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSE2count = SSE2count + dsum[0] + dsum[1];
       }
       SSE2mflops[2] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           dsum[0] = 0;
           dsum[1] = 0;
           start_time();
           SSE24Reg();
           getRunSeconds();
           mflops = (int)(totalS21 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           SSE2count = SSE2count + dsum[0] + dsum[1];
       }
       SSE2mflops[3] = max;
       if(SSE2count != (totalS21 * 3 + totalS22) * 10) correctSSE2 = FALSE;
   }
   runStage = 5;
   displayWhatever(mainHWND);

   if (has3DNow)
   {
       ssumA[2] = 1.0;
       ssumA[3] = 1.0;
       max = 0;
       for (i=0; i<10; i++)
       {    
           ssumA[0] = 0;
           ssumA[1] = 0;
           start_time();
           Now3D1Reg();
           getRunSeconds();
           mflops = (int)(totalA1 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           Now3Dcount = Now3Dcount + ((double) ssumA[0] + (double) ssumA[1]);
       }
       Now3Dmflops[0] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           ssumA[0] = 0;
           ssumA[1] = 0;
           start_time();
           Now3D2Reg();
           getRunSeconds();
           mflops = (int)(totalA1 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           Now3Dcount = Now3Dcount + ((double) ssumA[0] + (double) ssumA[1]);
       }
       Now3Dmflops[1] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           ssumA[0] = 0;
           ssumA[1] = 0;
           start_time();
           Now3D3Reg();
           getRunSeconds();
           mflops = (int)(totalA2 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           Now3Dcount = Now3Dcount + ((double) ssumA[0] + (double) ssumA[1]);
       }
       Now3Dmflops[2] = max;

       max = 0;
       for (i=0; i<10; i++)
       {
           ssumA[0] = 0;
           ssumA[1] = 0;
           start_time();
           Now3D4Reg();
           getRunSeconds();
           mflops = (int)(totalA1 / 1000000.0 / runSeconds + 0.5);
           if(mflops > max) max = mflops;
           Now3Dcount = Now3Dcount + ((double) ssumA[0] + (double) ssumA[1]);
       }
       Now3Dmflops[3] = max;
       if(Now3Dcount != (totalA1 * 3 + totalA2) * 10) correct3DNow = FALSE;
   }
   runStage = 6;
   displayWhatever(mainHWND);

   runStage = 10;
   _aligned_free(ssumA);
   _aligned_free(ssum1);
   _aligned_free(dsum);   
   return TRUE;

} // runAllTests

int mips4Reg(void)
{
    unsigned int count;

    count = intCount / 4;
        __asm
        {
              mov edi, 1000000
              mov eax, count
              mov ebx, count
              mov ecx, count
              mov edx, count
           lp:add eax, 1
              add ebx, 1
              add ecx, 1
              add edx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add edx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add edx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add edx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add edx, 3
              dec edi
              jnz lp
              add eax, ebx
              add eax, ecx
              add eax, edx
              mov intCount, eax
        }
    return count;
}


int mips3Reg(void)
{
    unsigned int count;

    count = 0;
    
        start_time();
        __asm
        {
              mov edi, 1000000
              mov eax, intCount
              mov ebx, 0
              mov ecx, 0
           lp:add eax, 1
              add ebx, 1
              add ecx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add eax, 1
              add ebx, 1
              add ecx, 1
              add eax, 1
              add ebx, 1
              add ecx, 3
              dec edi
              jnz lp
              add eax, ebx
              add eax, ecx
              mov intCount, eax
    }
    return count;
}

int mips2Reg(void)
{
    unsigned int count;

    count = intCount / 2;
        __asm
        {
              mov edi, 1000000
              mov eax, count
              mov ebx, count
           lp:add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 1
              add eax, 1
              add ebx, 3
              dec edi
              jnz lp
              add eax, ebx
              mov intCount, eax
        }
    return count;
}


int mips1Reg(void)
{
    unsigned int count;

    count = 0;
    
        __asm
        {
              mov edi, 1000000
              mov eax, intCount
           lp:add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 1
              add eax, 3
              dec edi
              jnz lp
              mov intCount, eax
    }
    return count;
}

float mflopS1Reg(void)
{
    float countR;
    float st0;

    countR = 0;
    st0 = 1.0;

    __asm
    {
          mov edi, 1000000
          fld FPSCount
          fld1 
       lp:fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          dec edi
          jnz lp
          fstp  st0
          fstp FPSCount
    }
    return countR;
}

float mflopS2Reg(void)
{
    float countD1;
    float countD2;
    float st0;

    countD1 = FPSCount / 2;
    countD2 = FPSCount / 2;
    st0 = 1.0;
    
    __asm
    {
          mov edi, 1000000
          fld countD1
          fld countD2
          fld1
       lp:fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          dec edi
          jnz lp
          fstp  dword ptr st0
          fstp  dword ptr countD1
          fstp  dword ptr countD2          
    }
    return countD1+countD2;
}

float mflopS3Reg(void)
{
    float countD1;
    float countD2;
    float countD3;
    float st0;

    countD1 = FPSCount;
    countD2 = 0.0;
    countD3 = 0.0;
    st0 = 1.0;

    __asm
    {
          mov edi, 1000000
          fld  countD1
          fldz
          fldz 
          fld1 
       lp:fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          dec edi
          jnz lp
          fstp  dword ptr st0
          fstp  dword ptr countD1
          fstp  dword ptr countD2          
          fstp  dword ptr countD3          
    }
    return countD1+countD2+countD3;
}

float mflopS4Reg(void)
{
    float countD1;
    float countD2;
    float countD3;
    float countD4;
    float st0;

    countD1 = FPSCount / 4;
    countD2 = FPSCount / 4;
    countD3 = FPSCount / 4;
    countD4 = FPSCount / 4;
    st0 = 1.0;
    
    __asm
    {
          mov edi, 1000000
          fld countD1
          fld countD2
          fld countD3
          fld countD4
          fld1
       lp:fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          dec edi
          jnz lp
          fstp  dword ptr st0
          fstp  dword ptr countD1
          fstp  dword ptr countD2          
          fstp  dword ptr countD3
          fstp  dword ptr countD4          
    }
    return countD1+countD2+countD3+countD4;
}


double mflopD1Reg(void)
{
    double countR;
    double st0;

    countR = 0;
    st0 = 1.0;

    __asm
    {
          mov edi, 1000000
          fld FPDCount
          fld1 
       lp:fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          fadd st(1), st
          dec edi
          jnz lp
          fstp  st0
          fstp FPDCount
    }
    return countR;
}

double mflopD2Reg(void)
{
    double countD1;
    double countD2;
    double st0;

    countD1 = FPDCount / 2;
    countD2 = FPDCount / 2;
    st0 = 1.0;
    
    __asm
    {
          mov edi, 1000000
          fld countD1
          fld countD2
          fld1
       lp:fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          dec edi
          jnz lp
          fstp  qword ptr st0
          fstp  qword ptr countD1
          fstp  qword ptr countD2          
    }
    return countD1+countD2;
}

double mflopD3Reg(void)
{
    double countD1;
    double countD2;
    double countD3;
    double st0;

    countD1 = FPDCount;
    countD2 = 0.0;
    countD3 = 0.0;
    st0 = 1.0;

    __asm
    {
          mov edi, 1000000
          fld  countD1
          fldz
          fldz 
          fld1 
       lp:fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          fadd st(1), st
          fadd st(2), st
          fadd st(3), st
          dec edi
          jnz lp
          fstp  qword ptr st0
          fstp  qword ptr countD1
          fstp  qword ptr countD2          
          fstp  qword ptr countD3          
    }
    return countD1+countD2+countD3;
}

double mflopD4Reg(void)
{
    double countD1;
    double countD2;
    double countD3;
    double countD4;
    double st0;

    countD1 = FPDCount / 4;
    countD2 = FPDCount / 4;
    countD3 = FPDCount / 4;
    countD4 = FPDCount / 4;
    st0 = 1.0;
    
    __asm
    {
          mov edi, 1000000
          fld countD1
          fld countD2
          fld countD3
          fld countD4
          fld1
       lp:fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          fadd st(1), st(0)
          fadd st(2), st(0)
          fadd st(3), st(0)
          fadd st(4), st(0)
          dec edi
          jnz lp
          fstp  qword ptr st0
          fstp  qword ptr countD1
          fstp  qword ptr countD2          
          fstp  qword ptr countD3
          fstp  qword ptr countD4          
    }
    return countD1+countD2+countD3+countD4;
}


void mmx1Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        movq    mm0, mmxa
        movq    mm1, mmx0
     lp:paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        paddd   mm1, mm0
        dec     ebx
        jnz     lp
        movq    mm0, mmxSum
        paddd   mm1, mm0
        movq    mmxSum, mm1
        emms
    }       
}

void mmx2Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        movq    mm0, mmxa
        movq    mm1, mmx0
        movq    mm2, mmxa
        movq    mm3, mmx0
     lp:paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm1, mm0
        paddd   mm3, mm2
        dec     ebx
        jnz     lp
        paddd   mm1, mm3
        movq    mm0, mmxSum
        paddd   mm1, mm0
        movq    mmxSum, mm1
        emms
    }       
}

void mmx3Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        movq    mm0, mmxa
        movq    mm1, mmx0
        movq    mm2, mmxa
        movq    mm3, mmx0
        movq    mm4, mmxa
        movq    mm5, mmx0
     lp:paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        dec     ebx
        jnz     lp
        paddd   mm1, mm3
        paddd   mm1, mm5
        movq    mm0, mmxSum
        paddd   mm1, mm0
        movq    mmxSum, mm1
        emms
    }
}

void mmx4Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        movq    mm0, mmxa
        movq    mm1, mmx0
        movq    mm2, mmxa
        movq    mm3, mmx0
        movq    mm4, mmxa
        movq    mm5, mmx0
        movq    mm6, mmxa
        movq    mm7, mmx0
     lp:paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm7, mm6
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm7, mm6
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm7, mm6
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm7, mm6
        paddd   mm1, mm0
        paddd   mm3, mm2
        paddd   mm5, mm4
        paddd   mm7, mm6
        dec     ebx
        jnz     lp
        paddd   mm1, mm3
        paddd   mm1, mm5
        paddd   mm1, mm7
        movq    mm0, mmxSum
        paddd   mm1, mm0
        movq    mmxSum, mm1
        emms
    }       
}

void SSE1Reg(void)
{
    __asm
    {
        mov     eax, ssum1
        movaps  xmm0, [eax+16]
        mov     ebx, 500000
        movaps  xmm1, [eax] 
     lp:addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        addps   xmm1, xmm0
        dec     ebx
        jnz     lp
        mov     ebx, 500000
        movaps  xmm2, [eax] 
    lp2:addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        addps   xmm2, xmm0
        dec     ebx
        jnz     lp2
        addps   xmm1, xmm2
        movaps  [eax], xmm1
    }       
}

void SSE2Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, ssum1
        movaps  xmm0, [eax+16]
        movaps  xmm1, [eax] 
        movaps  xmm2, [eax+16]
        movaps  xmm3, [eax]
     lp:addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        dec     ebx
        jnz     lp
        addps   xmm1, xmm3
        movaps    [eax], xmm1
    }       
}

void SSE3Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, ssum1
        movaps  xmm0, [eax+16]
        movaps  xmm1, [eax] 
        movaps  xmm2, [eax+16]
        movaps  xmm3, [eax]
        movaps  xmm4, [eax+16]
        movaps  xmm5, [eax]
     lp:addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        dec     ebx
        jnz     lp
        addps   xmm1, xmm3
        addps   xmm1, xmm5
        movaps    [eax], xmm1
    }
}

void SSE4Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, ssum1
        movaps  xmm0, [eax+16]
        movaps  xmm1, [eax] 
        movaps  xmm2, [eax+16]
        movaps  xmm3, [eax]
        movaps  xmm4, [eax+16]
        movaps  xmm5, [eax]
        movaps  xmm6, [eax+16]
        movaps  xmm7, [eax]
     lp:addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm7, xmm6
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm7, xmm6
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm7, xmm6
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm7, xmm6
        addps   xmm1, xmm0
        addps   xmm3, xmm2
        addps   xmm5, xmm4
        addps   xmm7, xmm6
        dec     ebx
        jnz     lp
        addps   xmm1, xmm3
        addps   xmm1, xmm5
        addps   xmm1, xmm7
        movaps    [eax], xmm1
    }       
}

void SSE21Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, dsum
        movapd  xmm0, [eax+16]
        movapd  xmm1, [eax] 
     lp:addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        addpd   xmm1, xmm0
        dec     ebx
        jnz     lp
        movapd    [eax], xmm1
    }       
}

void SSE22Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, dsum
        movapd  xmm0, [eax+16]
        movapd  xmm1, [eax] 
        movapd  xmm2, [eax+16]
        movapd  xmm3, [eax]
     lp:addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        dec     ebx
        jnz     lp
        addpd   xmm1, xmm3
        movapd    [eax], xmm1
    }       
}

void SSE23Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, dsum
        movapd  xmm0, [eax+16]
        movapd  xmm1, [eax] 
        movapd  xmm2, [eax+16]
        movapd  xmm3, [eax]
        movapd  xmm4, [eax+16]
        movapd  xmm5, [eax]
     lp:addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        dec     ebx
        jnz     lp
        addpd   xmm1, xmm3
        addpd   xmm1, xmm5
        movapd    [eax], xmm1
    }
}

void SSE24Reg(void)
{
    __asm
    {
        mov     ebx, 1000000
        mov     eax, dsum
        movapd  xmm0, [eax+16]
        movapd  xmm1, [eax] 
        movapd  xmm2, [eax+16]
        movapd  xmm3, [eax]
        movapd  xmm4, [eax+16]
        movapd  xmm5, [eax]
        movapd  xmm6, [eax+16]
        movapd  xmm7, [eax]
     lp:addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm7, xmm6
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm7, xmm6
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm7, xmm6
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm7, xmm6
        addpd   xmm1, xmm0
        addpd   xmm3, xmm2
        addpd   xmm5, xmm4
        addpd   xmm7, xmm6
        dec     ebx
        jnz     lp
        addpd   xmm1, xmm3
        addpd   xmm1, xmm5
        addpd   xmm1, xmm7
        movapd    [eax], xmm1
    }       
}


void Now3D1Reg(void)
{
    __asm
    {
        femms
        mov     eax, ssumA
        movq    mm0, [eax+8]
        movq    mm1, [eax] 
        mov     ebx, 500000
     lp:pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        pfadd   mm1, mm0
        dec     ebx
        jnz     lp
        movq    mm2, [eax] 
        mov     ebx, 500000
    lp2:pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        pfadd   mm2, mm0
        dec     ebx
        jnz     lp2
        pfadd   mm1, mm2
        movq    [eax], mm1
        femms
    }       
}

void Now3D2Reg(void)
{
    __asm
    {
        femms
        mov     ebx, 1000000
        mov     eax, ssumA
        movq    mm0, [eax+8]
        movq    mm1, [eax] 
        movq    mm2, [eax+8]
        movq    mm3, [eax]
     lp:pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        dec     ebx
        jnz     lp
        pfadd   mm1, mm3
        movq    [eax], mm1
        femms
    }       
}

void Now3D3Reg(void)
{
    __asm
    {
        femms
        mov     ebx, 1000000
        mov     eax, ssumA
        movq    mm0, [eax+8]
        movq    mm1, [eax] 
        movq    mm2, [eax+8]
        movq    mm3, [eax]
        movq    mm4, [eax+8]
        movq    mm5, [eax]
     lp:pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        dec     ebx
        jnz     lp
        pfadd   mm1, mm3
        pfadd   mm1, mm5
        movq    [eax], mm1
        femms
    }
}

void Now3D4Reg(void)
{
    __asm
    {
        femms
        mov     ebx, 1000000
        mov     eax, ssumA
        movq    mm0, [eax+8]
        movq    mm1, [eax] 
        movq    mm2, [eax+8]
        movq    mm3, [eax]
        movq    mm4, [eax+8]
        movq    mm5, [eax]
        movq    mm6, [eax+8]
        movq    mm7, [eax]
     lp:pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm7, mm6
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm7, mm6
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm7, mm6
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm7, mm6
        pfadd   mm1, mm0
        pfadd   mm3, mm2
        pfadd   mm5, mm4
        pfadd   mm7, mm6
        dec     ebx
        jnz     lp
        pfadd   mm1, mm3
        pfadd   mm1, mm5
        pfadd   mm1, mm7
        movq    [eax], mm1
        femms
    }       
}


static BOOL GetFileName()
{
    static char    filterList[] = "File (*.*)" \
                                        "\0" \
                                        "*.*" \
                                        "\0\0";
    OPENFILENAME        of;
    int                 rc;
    
    memset( &of, 0, sizeof( OPENFILENAME ) );
    of.lStructSize = sizeof( OPENFILENAME );
    of.hwndOwner = mainHWND;
    of.lpstrFilter = (LPSTR) filterList;
    of.lpstrDefExt = "";
    of.nFilterIndex = 1L;
    of.lpstrFile = fileName;
    of.nMaxFile = _MAX_PATH;
    of.lpstrTitle = NULL;
    of.Flags = OFN_HIDEREADONLY;

    rc = GetSaveFileName( &of );
    strcpy(fileName, of.lpstrFile);
   
    return( rc );
    
} // GetFileName

BOOL saveData()
{
     FILE *outfile;

     outfile = fopen(fileName,"a+");
     if (outfile == NULL)
     {
         MessageBox(NULL, "Cannot open log file", fileName,
                                 MB_ICONWARNING | MB_OK);
         return FALSE;
     }
     fprintf (outfile, "\n\n #####################################################################\n");
     fprintf (outfile, "      CPU ID and Speed Test %s %s \n", version, timeday);
     fprintf (outfile, "                 Copyright Roy Longbottom 2000 - 2002\n\n");

    fprintf(outfile,"%s\n", configData1);
    if (hasSSEOS || hasSSE2OS) fprintf(outfile,"%s\n", configData5);
    fprintf(outfile,"%s\n", configData2);
    if (hasMMX || hasSSE || hasSSE2) fprintf(outfile,"%s\n", configData4);
    fprintf(outfile,"\n");

     fprintf(outfile, " Speeds adding to     1 Register  2 Registers  3 Registers  4 Registers\n\n");
     fprintf(outfile, " 32 bit Integer MIPS %8d %12d %12d %12d\n",
                                        mipsReg[0], mipsReg[1], mipsReg[2], mipsReg[3]);    
     fprintf(outfile, " 32 bit Float MFLOPS %8d %12d %12d %12d\n",
                                        mflopsSP[0], mflopsSP[1], mflopsSP[2], mflopsSP[3]);
     fprintf(outfile, " 64 bit Float MFLOPS %8d %12d %12d %12d\n",
                                        mflopsDP[0], mflopsDP[1], mflopsDP[2], mflopsDP[3]);
     if (hasMMX)
     {                                            
          fprintf(outfile, " 32 bit MMX Int MIPS %8d %12d %12d %12d\n",
                                        mmxMipsReg[0], mmxMipsReg[1], mmxMipsReg[2], mmxMipsReg[3]);
     }
     if (hasSSE && hasSSEOS)
     {
          fprintf(outfile, " 32 bit SSE MFLOPS   %8d %12d %12d %12d\n",    
                                     SSEmflops[0], SSEmflops[1], SSEmflops[2], SSEmflops[3]);    
     }
     if (hasSSE2 && hasSSE2OS)
     {
          fprintf(outfile, " 64 bit SSE2 MFLOPS  %8d %12d %12d %12d\n",    
                                     SSE2mflops[0], SSE2mflops[1], SSE2mflops[2], SSE2mflops[3]);    
     }
     if (has3DNow)
     {
          fprintf(outfile, " 32 bit 3DNow MFLOPS %8d %12d %12d %12d\n",    
                                     Now3Dmflops[0], Now3Dmflops[1], Now3Dmflops[2], Now3Dmflops[3]);    
     }
     fprintf(outfile, "\n");
     if (correctInt)
     {
         fprintf(outfile, " 32 bit Integer MIPS  810M instructions of r=r+1 correct result\n");
     }
     else
     {
         fprintf(outfile, " 32 bit Integer MIPS  ERRORS INCORRECT ADDITIONS\n");
     }
     if (correctSP)
     {
         fprintf(outfile, " 32 bit Float MFLOPS  810M instructions of r=r+1 correct result\n");
     }
     else
     {
         fprintf(outfile, " 32 bit Float MFLOPS  ERRORS INCORRECT ADDITIONS\n");
     }
     if (correctDP)
     {
         fprintf(outfile, " 64 bit Float MFLOPS  810M instructions of r=r+1 correct result\n");
     }
     else
     {
         fprintf(outfile, " 64 bit Float MFLOPS  ERRORS INCORRECT ADDITIONS\n");
     }
     if (hasMMX)
     {                                   
         if (correctMMX)
         {
              fprintf(outfile, " 32 bit MMX Int MIPS  810M instructions of r=r+1 correct result, 2 adds/instruction\n");
         }
         else
         {
              fprintf(outfile, " 32 bit MMX Int MIPS  ERRORS INCORRECT ADDITIONS ");
              fprintf(outfile, " Was %16.3f \n", MMXcount);
         }
     }                                   
     if (hasSSE && hasSSEOS)
     {
          if (correctSSE)
         {
              fprintf(outfile, " 32 bit SSE MFLOPS    810M instructions of r=r+1 correct result, 4 adds/instruction\n");
         }
         else
         {
              fprintf(outfile, " 32 bit SSE MFLOPS    ERRORS INCORRECT ADDITIONS " );
              fprintf(outfile, " Was %16.3f \n", SSEcount);
         }                
     }
     if (hasSSE2 && hasSSE2OS)
     {
          if (correctSSE2)
         {
              fprintf(outfile, " 64 bit SSE2 MFLOPS   810M instructions of r=r+1 correct result, 2 adds/instruction\n");
         }
         else
         {
              fprintf(outfile, " 64 bit SSE2 MFLOPS   ERRORS INCORRECT ADDITIONS " );
              fprintf(outfile, " Was %16.3f \n", SSE2count);
         }                
     }    
     if (has3DNow)
     {
         if (correct3DNow)
         {
              fprintf(outfile, " 32 bit 3DNow MFLOPS  810M instructions of r=r+1 correct result, 2 adds/instruction\n");
         }
         else
         {
              fprintf(outfile, " 32 bit 3DNow MFLOPS  ERRORS INCORRECT ADDITIONS " );
              fprintf(outfile, " Was %16.3f \n", Now3Dcount);
         }                
     }

     fprintf(outfile, "\n\n");
     fflush (outfile);
     fclose(outfile);  
     return TRUE;
}

// sprintf(writeMsg, " SSEcount %20.6f", SSEcount);
// MessageBox(NULL, writeMsg, "WARNING", MB_ICONWARNING | MB_OK);
/*
                             32 bit Integer MIPS |
                             32 bit Float MFLOPS |
                             64 bit Float MFLOPS |
                             32 bit MMX Int MIPS |
                             32 bit SSE MFLOPS   |
                             64 bit SSE2 MFLOPS  |
                             32 bit 3DNow MFLOPS
*/
