/*
gcc mpmflops2.c -lpthread -msse2 -m64 -lrt -lc -lm -O3 -o MPmflops64SSE2
gcc mpmflops2.c -lpthread -mavx -lrt -lc -lm -O3 -o MPmflops64AVX
gcc mpmflops2.c -lpthread -mavx512ifma -lrt -lc -lm -O3 -o MPmflops64AVX512

Change heading below for SSE2, AVX or AVX512
*/
// char   heading[40]  = "64 Bit MP SSE SP MFLOPS Benchmark 2";
// char   heading[40]  = "64 Bit MP AVX SP MFLOPS Benchmark 2";
 char   heading[40]  = "64 Bit MP AVX512 SP MFLOPS Benchmark 2";


/*
Run time parameters Upper or Lower case

R or Repeats  - start repeat passess          default   75000
T or threads  - must be 1, 2, 4, 8, 16 etc.   default   8

Default command would be ././MPmflops??? Threads 8, Repeats 75000  

Affinity Setting to use 1 core
taskset 0x00000001 ./MPmflops???
*/

 #include <stdio.h>
 #include <stdlib.h>
 #include <malloc.h>
 #include <mm_malloc.h>
 #include <pthread.h> 
 #include <time.h>


 typedef struct 
 { 
    float      *x; 
    int        xlen; 
 }
 MYCALCS;

 MYCALCS xcalcs;
 
 pthread_t tid[100]; 
 pthread_attr_t * attrt = NULL; 
 pthread_mutex_t mutext = PTHREAD_MUTEX_INITIALIZER;

 
 FILE    *outfile;
 int     endit;
 int     part;
 int     opwd;
 int     threads = 8;
 int     words     = 102400;      // E Number of words in arrays
 int     repeats   = 75000;        // R Number of repeat passes 
 float   xval = 0.999950f;
 float   aval = 0.000020f;
 float   bval = 0.999980f;
 float   cval = 0.000011f;
 float   dval = 1.000011f;
 float   eval = 0.000012f;
 float   fval = 0.999992f;
 float   gval = 0.000013f;
 float   hval = 1.000013f;
 float   jval = 0.000014f;
 float   kval = 0.999994f;
 float   lval = 0.000015f;
 float   mval = 1.000015f;
 float   oval = 0.000016f;
 float   pval = 0.999996f;
 float   qval = 0.000017f;
 float   rval = 1.000017f;
 float   sval = 0.000018f;
 float   tval = 1.000018f;
 float   uval = 0.000019f;
 float   vval = 1.000019f;
 float   wval = 0.000021f;
 float   yval = 1.000021f;

 char    timeday[30];
 double  theseSecs = 0.0;
 double  startSecs = 0.0;
 double  secs;
 struct  timespec tp1;

  void local_time()
  {
     time_t t;

     t = time(NULL);
     sprintf(timeday, "%s", asctime(localtime(&t)));
     return;
  }
  void getSecs()
  {
     clock_gettime(CLOCK_REALTIME, &tp1);
     theseSecs =  tp1.tv_sec + tp1.tv_nsec / 1e9;               
     return;
  }

  void start_time()
  {
      getSecs();
      startSecs = theseSecs;
      return;
  }

  void end_time()
  {
      getSecs();
      secs = theseSecs - startSecs;
      return;
  }    




 void triadplus2(int n, float a, float b, float c, float d, float e, float f, float g, float h, float j, float k, float l, float m, float o, float p, float q, float r, float s, float t, float u, float v, float w, float y, float *x)
 {
     int i;

     for(i=0; i<n; i++)
     x[i] = (x[i]+a)*b-(x[i]+c)*d+(x[i]+e)*f-(x[i]+g)*h+(x[i]+j)*k-(x[i]+l)*m+(x[i]+o)*p-(x[i]+q)*r+(x[i]+s)*t-(x[i]+u)*v+(x[i]+w)*y;
 } 

 void triadplus(int n, float a, float b, float c, float d, float e, float f, float *x)
 {
     int i;

     for(i=0; i<n; i++)
     x[i] = (x[i]+a)*b-(x[i]+c)*d+(x[i]+e)*f;
 }

 void triad(int n, float a, float b, float *x)
 {
     int i;

     for(i=0; i<n; i++)
     x[i] = (x[i]+a)*b;
 }

 void *runTests(void *arg)
 {
    int  i;
    int  wds;
    long offset;
    float *xcp;
    
    offset = (long)arg;

    wds = xcalcs.xlen;
    xcp = xcalcs.x + offset * wds;
    
    for (i=0; i<repeats; i++)
    {
       // calculations in CPU
       if (part == 0)
       {
          triad(wds, aval, xval, xcp);
          opwd = 2;
       }
       if (part == 1)
       {
          triadplus(wds, aval, bval, cval, dval, eval, fval, xcp);
          opwd = 8;
       }
       if (part == 2)
       {
          triadplus2(wds, aval, bval, cval, dval, eval, fval, gval, hval, jval, kval, lval, mval, oval, pval, qval, rval, sval, tval, uval, vval, wval, yval,  xcp);
          opwd = 32;
       }   
 
    }
 }

 // main program that executes in the CPU
 int main(int argc, char *argv[])
 {
    float   *x_cpu;                  // Pointer to CPU arrays
    size_t  size_x;
     
    int     i, p, g;
    int     param00 = 0;
    int     param01 = 0;
    int     param02 = 0;
    long    ii;
    float   fpmops;
    float   mflops;
    char    title[3][15];
    int     isok1 = 0;
    int     isok2 = 0;
    int     count1 = 0;
    float   errors[2][10];
    int     erdata[5][10];
    float   newdata = 0.999999f;
    int     pStart = 0;
    int     pEnd =3;
    int     calibrate = 0;
    double  runSecs = 0.0;
    
    sprintf(title[0], "Data in & out ");
    for (i=1; i<9; i=i+2)
    {
       if (argc > i)
       {
          switch (toupper(argv[i][0]))
          {
                case 'R':
                if (argc > i+1)
                {
                   sscanf(argv[i+1],"%d", &param00);
                   if (param00 > 0) repeats = param00;
                   if (repeats < 100) repeats = 100;
                }
                break;

                case 'T':
                if (argc > i+1)
                {
                   sscanf(argv[i+1],"%d", &param01);
                   if (param01 > 0) threads = param01;
                   if (threads > 64) threads = 64;
                }
                break;
         }
       }
    }
   
    int  startWords = words;
    int  startRepeats = repeats;
    printf("\n\n");
    local_time();
    outfile = fopen("MPMflopsLog.txt","a+");
    if (outfile == NULL)
    {
        printf (" Cannot open results file \n\n");
        printf(" Press Enter\n");
        g  = getchar();
        exit (0);
    }
    fprintf (outfile, "##############################################\n");
    fprintf(outfile, "  %s, %d Threads, %s\n", heading, threads, timeday);
    fflush(outfile);                
    int sc = system("lscpu >> MPMflopsLog.txt");

    fprintf (outfile, "\n");
    int so = system("uname -v >> MPMflopsLog.txt");
    fprintf (outfile, "\n");
    fflush(outfile);



    fprintf (outfile, " #####################################################\n");                     


    fprintf(outfile, "  %s, %d Threads, %s\n", heading, threads, timeday);
    printf("\n");
    fprintf(outfile, "  Test             4 Byte  Ops/   Repeat    Seconds   MFLOPS       First   All\n");
    fprintf(outfile,   "                    Words  Word   Passes                         Results  Same\n\n");
    printf ("##############################################\n\n");
    printf("  %s, %d Threads, %s\n", heading, threads, timeday);
    printf("  Test             4 Byte  Ops/   Repeat    Seconds   MFLOPS       First   All\n");
    printf("                    Words  Word   Passes                         Results  Same\n\n");
    fflush (outfile);

    {
         for (part=pStart; part<3; part++)
         {
            isok1  = 0;
            words = startWords;
            repeats = startRepeats;
            for (p=0; p<pEnd; p++)
            {
               size_x = words * sizeof(float);
    
               // Allocate arrays for host CPU
               x_cpu = (float *)_mm_malloc(size_x, 16);
               if (x_cpu  == NULL)
               {
                    printf(" ERROR WILL EXIT\n");
                    printf(" Press Enter\n");
                    g  = getchar();
                    exit(1);
               }
  
               xcalcs.x = x_cpu;
               xcalcs.xlen = words / threads;

               if (calibrate == 1)
               {
                  // Data for array
                   for (i=0; i<words; i++)
                   {
                      x_cpu[i] = newdata;
                   }
                   
                   start_time();
                   for (ii=0; ii<threads; ii++)
                   {
                       pthread_create(&tid[ii], attrt, runTests, (void *)ii);
                   }
        
                   for(ii=0;ii<threads;ii++)
                   {
                      pthread_join(tid[ii], NULL);
                   }
                   end_time();
                   repeats = (int)((double)repeats * 15.0 / secs);
                   startRepeats = repeats;

                   calibrate = 0;
               }
               // Data for array
               for (i=0; i<words; i++)
               {
                  x_cpu[i] = newdata;
               }

               start_time();
               for (ii=0; ii<threads; ii++)
               {
                   pthread_create(&tid[ii], attrt, runTests, (void *)ii);
               }
    
               for(ii=0;ii<threads;ii++)
               {
                  pthread_join(tid[ii], NULL);
               }
               end_time();
               fpmops = (float)words * (float)opwd;
               mflops = (float)repeats * fpmops / 1000000.0f / (float)secs;
               runSecs = runSecs + secs;
    
               // Print results
               fprintf(outfile, "%15s %9d %5d %8d %10.6f %8.0f ", title[0], words, opwd, repeats, secs, mflops);
               printf("%15s %9d %5d %8d %10.6f %8.0f ", title[0], words, opwd, repeats, secs, mflops);
               fflush (outfile);
               isok1  = 0;
               float one = x_cpu[0];
               if (one == newdata)
               {
                       isok2 = 1;
                       isok1 = 1;
               }
               for (i=1; i<words; i++)
               {
                  if (one != x_cpu[i])
                  {
                     isok1 = 1;
                     if (count1 < 10)
                     {
                        errors[0][count1] = x_cpu[i];
                        errors[1][count1] = one;
                        erdata[0][count1] = i;
                                                erdata[1][count1] = words;                          
                        erdata[2][count1] = opwd;
                        erdata[3][count1] = repeats;

                        count1 = count1 + 1;
                     }
                  }
               }
               if (isok1 == 0)
               {
                  fprintf(outfile, " %10.6f   Yes\n", x_cpu[0]);
                  printf(" %10.6f   Yes\n", x_cpu[0]);
               }
               else
               {
                  fprintf(outfile, "   See later   No\n");
                  printf("   See log     No\n");
               }
                // Cleanup
               _mm_free(x_cpu);
               words = words * 10;
               repeats = repeats / 10;
               if (repeats < 1) repeats = 1; 
            }
            fprintf(outfile,"\n");
            printf("\n");
         }
    }
    if (isok2 > 0)
    {
       fprintf(outfile," ERROR - At least one first result of 0.999999 - no calculations?\n\n");
       printf(" ERROR - At least one first result of 0.999999 - no calculations?\n\n");
    }
    if (count1 > 0)
    {
       fprintf(outfile," First Unexpected Results\n");
       for (i=0; i<count1; i++)
       {
         fprintf(outfile,"%15s %9d %5d %8d word %9d was %10.6f not %10.6f\n",
           title[0], erdata[1][i], erdata[2][i], erdata[3][i], erdata[0][i], errors[0][i], errors[1][i]);
       }
       fprintf(outfile,"\n");
    }
    local_time();
    printf("               End of test %s", timeday);
    fprintf(outfile, "               End of test %s\n", timeday);


    fflush(outfile);                
    char moredata[1024];
    printf("\n Type additional information to include in MPMflopsLog.txt - Press Enter\n\n");
    if (fgets (moredata, sizeof(moredata), stdin) != NULL)
             fprintf (outfile, "\nAdditional information - %s\n", moredata);        
    fflush(stdout);                
                   
    fflush(outfile);
    fclose (outfile);
    printf("\n Press Enter\n");
    g  = getchar();
    return 0;
 }


