MBO
 All Data Structures Files Functions Variables Typedefs Enumerations Modules Pages
OpenMP
Illustrates the parallel application of an MBO operator.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <assert.h>
#include "Mbo.h"
#include <config.h>
#ifdef HAVE_OPENMP
#include <omp.h>
#endif
static const int numSpins = 8;
static const int numIters = 1;
static MboProdSpace buildSpace(int n)
{
MboProdSpace h1, hTot;
int i;
hTot = mboProdSpaceCreate(0);
for (i = 0; i < numSpins; ++i) {
mboProdSpaceMul(h1, &hTot);
}
return hTot;
}
static MboNumOp buildJx(MboProdSpace h)
{
MboElemOp sp, sm;
MboNumOp Jx_compiled;
struct MboAmplitude pointFive;
int i;
sp = mboSigmaPlus();
sm = mboSigmaMinus();
pointFive.re = 0.5;
pointFive.im = 0.0;
mboTensorOpNull(h, &Jx);
for (i = 0; i < numSpins; ++i) {
mboTensorOpAddScaledTo(&pointFive, sm, i, Jx);
mboTensorOpAddScaledTo(&pointFive, sp, i, Jx);
}
err = mboNumOpCompile(Jx, &Jx_compiled);
assert(err == MBO_SUCCESS);
return Jx_compiled;
}
int main()
{
struct MboAmplitude one, zero, *x, *y, *yomp;
int i;
MboGlobInd n, chunk, chunkSize, numChunks, dim;
clock_t tstart, tend;
double deltat, difference;
int numThreads;
MboNumSubMatrix *chunks;
hTot = buildSpace(numSpins);
dim = mboProdSpaceDim(hTot);
Jx = buildJx(hTot);
one.re = 1.0;
one.im = 0.0;
zero.re = 0.0;
zero.im = 0.0;
x = malloc(dim * sizeof(*x));
y = malloc(dim * sizeof(*x));
yomp = malloc(dim * sizeof(*x));
for (n = 0; n < dim; ++n) {
x[n].re = (double)rand() / RAND_MAX;
x[n].im = (double)rand() / RAND_MAX;
}
tstart = clock();
for (i = 0; i < numIters; ++i) {
mboNumOpMatVec(one, Jx, x, zero, y);
}
tend = clock();
deltat = (double)(tend - tstart) / (double)CLOCKS_PER_SEC;
printf("Serial: %2.3lf s\n", deltat);
#ifdef HAVE_OPENMP
numThreads = omp_get_max_threads();
#else
numThreads = 1;
printf("here");
#endif
printf("Using %d threads.\n", numThreads);
chunkSize = dim / numThreads;
printf("Chunk Size: %lld\n", chunkSize);
numChunks = dim / chunkSize;
printf("Number of Chunks: %lld\n", numChunks);
if (numChunks * chunkSize < dim) ++numChunks;
chunks = (MboNumSubMatrix*)malloc(numChunks * sizeof(*chunks));
for (chunk = 0; chunk < numChunks; ++chunk) {
err = mboNumSubMatrixCreate(Jx, chunk * chunkSize,
(chunk + 1) * chunkSize, 0, dim, &chunks[chunk]);
assert(err == MBO_SUCCESS);
}
tstart = clock();
for (i = 0; i < numIters; ++i) {
#ifdef HAVE_OPENMP
#pragma omp parallel for private(chunk)
#endif
for (chunk = 0; chunk < numChunks; ++chunk) {
mboNumSubMatrixMatVec(one, chunks[chunk], x, zero,
yomp + chunk * chunkSize);
}
}
tend = clock();
deltat = (double)(tend - tstart) / (double)CLOCKS_PER_SEC / numThreads;
printf("Parallel: %2.3lf s\n", deltat);
difference = 0;
for (n = 0; n < dim; ++n) {
difference += (yomp[n].re - y[n].re) * (yomp[n].re - y[n].re) +
(yomp[n].im - y[n].im) * (yomp[n].im - y[n].im);
}
difference = sqrt(difference);
printf("Error: %lf\n", difference);
free(x);
free(y);
free(yomp);
for (chunk = 0; chunk < numChunks; ++chunk) {
mboNumSubMatrixDestroy(&chunks[chunk]);
}
free(chunks);
if (difference < 1.0e-12) {
return 0;
} else {
return 1;
}
}

Plain source code

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <assert.h>
#include "Mbo.h"
#include <config.h>
#ifdef HAVE_OPENMP
#include <omp.h>
#endif
static const int numSpins = 8;
static const int numIters = 1;
static MboProdSpace buildSpace(int n)
{
MboProdSpace h1, hTot;
int i;
hTot = mboProdSpaceCreate(0);
for (i = 0; i < numSpins; ++i) {
mboProdSpaceMul(h1, &hTot);
}
return hTot;
}
static MboNumOp buildJx(MboProdSpace h)
{
MboElemOp sp, sm;
MboNumOp Jx_compiled;
struct MboAmplitude pointFive;
int i;
sp = mboSigmaPlus();
sm = mboSigmaMinus();
pointFive.re = 0.5;
pointFive.im = 0.0;
mboTensorOpNull(h, &Jx);
for (i = 0; i < numSpins; ++i) {
mboTensorOpAddScaledTo(&pointFive, sm, i, Jx);
mboTensorOpAddScaledTo(&pointFive, sp, i, Jx);
}
err = mboNumOpCompile(Jx, &Jx_compiled);
assert(err == MBO_SUCCESS);
return Jx_compiled;
}
int main()
{
struct MboAmplitude one, zero, *x, *y, *yomp;
int i;
MboGlobInd n, chunk, chunkSize, numChunks, dim;
clock_t tstart, tend;
double deltat, difference;
int numThreads;
MboNumSubMatrix *chunks;
hTot = buildSpace(numSpins);
dim = mboProdSpaceDim(hTot);
Jx = buildJx(hTot);
one.re = 1.0;
one.im = 0.0;
zero.re = 0.0;
zero.im = 0.0;
x = malloc(dim * sizeof(*x));
y = malloc(dim * sizeof(*x));
yomp = malloc(dim * sizeof(*x));
for (n = 0; n < dim; ++n) {
x[n].re = (double)rand() / RAND_MAX;
x[n].im = (double)rand() / RAND_MAX;
}
tstart = clock();
for (i = 0; i < numIters; ++i) {
mboNumOpMatVec(one, Jx, x, zero, y);
}
tend = clock();
deltat = (double)(tend - tstart) / (double)CLOCKS_PER_SEC;
printf("Serial: %2.3lf s\n", deltat);
#ifdef HAVE_OPENMP
numThreads = omp_get_max_threads();
#else
numThreads = 1;
printf("here");
#endif
printf("Using %d threads.\n", numThreads);
chunkSize = dim / numThreads;
printf("Chunk Size: %lld\n", chunkSize);
numChunks = dim / chunkSize;
printf("Number of Chunks: %lld\n", numChunks);
if (numChunks * chunkSize < dim) ++numChunks;
chunks = (MboNumSubMatrix*)malloc(numChunks * sizeof(*chunks));
for (chunk = 0; chunk < numChunks; ++chunk) {
err = mboNumSubMatrixCreate(Jx, chunk * chunkSize,
(chunk + 1) * chunkSize, 0, dim, &chunks[chunk]);
assert(err == MBO_SUCCESS);
}
tstart = clock();
for (i = 0; i < numIters; ++i) {
#ifdef HAVE_OPENMP
#pragma omp parallel for private(chunk)
#endif
for (chunk = 0; chunk < numChunks; ++chunk) {
mboNumSubMatrixMatVec(one, chunks[chunk], x, zero,
yomp + chunk * chunkSize);
}
}
tend = clock();
deltat = (double)(tend - tstart) / (double)CLOCKS_PER_SEC / numThreads;
printf("Parallel: %2.3lf s\n", deltat);
difference = 0;
for (n = 0; n < dim; ++n) {
difference += (yomp[n].re - y[n].re) * (yomp[n].re - y[n].re) +
(yomp[n].im - y[n].im) * (yomp[n].im - y[n].im);
}
difference = sqrt(difference);
printf("Error: %lf\n", difference);
free(x);
free(y);
free(yomp);
for (chunk = 0; chunk < numChunks; ++chunk) {
mboNumSubMatrixDestroy(&chunks[chunk]);
}
free(chunks);
if (difference < 1.0e-12) {
return 0;
} else {
return 1;
}
}