using namespace gismo;
void approximatePI(
const gsMpi & mpi,
const gsMpiComm & comm);
void computeDotProduct(
const gsMpi & mpi,
const gsMpiComm & comm);
void locateDot(
const gsMatrix<> & loc, real_t & count);
int main(
int argc,
char **argv)
{
gsCmdLine cmd(
"An example for testing MPI with G+Smo.\n"
"Execute (eg. with 10 processes): "
" * mpirun -np 10 ./bin/mpi_example\n"
"or provide a hosts file on a cluster: "
" * mpirun -hostfile hosts.txt ./bin/mpi_example\n"
"If your cluster is using srun: "
" * srun -N 10 ./bin/mpi_example"
);
try { cmd.getValues(argc,argv); } catch (int rv) { return rv; }
#ifdef GISMO_WITH_MPI
gsInfo << "Gismo was compiled with MPI support.\n";
#else
gsInfo << "Gismo was compiled without MPI support.\n";
#endif
const gsMpi & mpi = gsMpi::init(argc, argv);
double wtime = mpi.wallTime();
if (0==_rank)
gsInfo<<"Running on "<<_size<<" processes.\n";
gsInfo <<"MPI is "<< (mpi.initialized() ? "" : "NOT ")
<<"initialized on process "<< _rank <<"\n";
std::string cpuname = mpi.getProcessorName();
gsInfo << "Hello G+Smo, from process " << _rank <<" on "
<< cpuname <<", elapsed time is "<< mpi.wallTime()-wtime<< "\n";
approximatePI(mpi, comm);
computeDotProduct(mpi, comm);
gsInfo << "Good bye G+Smo, from process " << _rank << " on "
<< cpuname << "\n";
return 0;
}
void approximatePI(
const gsMpi & mpi,
const gsMpiComm & comm)
{
const int N = 10000;
real_t count = 0;
double wtime = mpi.wallTime();
if(_rank == 0)
{
std::srand((unsigned)wtime);
gsInfo<<"I am the master process of "<<_size<<" processes.\n";
gsInfo << "I will finally compute an approximation of PI! " << "\n";
{
for (
index_t i = 0; i < coord.rows(); i++)
{
coord(i, 0) = (real_t)(rand()) / (real_t)RAND_MAX;
coord(i, 1) = (real_t)(rand()) / (real_t)RAND_MAX;
}
if(p > 0)
comm.
send(coord.data(), coord.size(), p, 0);
else
locateDot(coord, count);
}
}
else
{
comm.
recv(coord.data(), coord.size(), 0, 0);
locateDot(coord, count);
}
comm.
gather(&count, result.data(), 1, 0);
if(_rank == 0)
{
count += result(i);
gsInfo << "PI is approximately: " << (4. * count) / (N * _size) << "\n";
gsInfo << "Time needed for parallel computation: " << mpi.wallTime() - wtime << "\n";
}
}
void computeDotProduct(
const gsMpi & mpi,
const gsMpiComm & comm)
{
const int N = 1000000;
real_t res = 0;
if(N % _size != 0)
{
gsInfo << "Assume that the vectors can be split equivalently \n";
gsInfo << N << " is not divisible by " << _size << "\n";
gsInfo << "Dot product will not be computed! \n";
return;
}
double wtime = mpi.wallTime();
if (_rank == 0)
{
gsInfo << "I am the master process of " << _size << " processes.\n";
gsInfo << "I will finally compute the dot product of two vectors consisting of ones! " << "\n";
for (
index_t p = 0; p < _size; p++)
{
for (
index_t i = 0; i < loc.rows(); i++)
{
loc(i, 0) = colVecs(loc.rows() * p + i, 0);
loc(i, 1) = colVecs(loc.rows() * p + i, 1);
}
if (p > 0)
comm.
send(loc.data(), loc.size(), p, 0);
else
res = loc.col(0).dot(loc.col(1));
}
}
else
{
comm.
recv(loc.data(), loc.size(), 0, 0);
res = loc.col(0).dot(loc.col(1));
}
comm.
gather(&res, result.data(), 1, 0);
if (_rank == 0)
{
for (
index_t i = 1; i < _size; i++)
res += result(i);
gsInfo << "The result is: " << res << "\n";
gsInfo << "Time needed for parallel computation: " << mpi.wallTime() - wtime << "\n";
}
}
void locateDot(
const gsMatrix<> & loc, real_t & count)
{
count = 0;
for(
index_t row = 0; row < loc.rows(); row++)
{
const real_t pos = loc.row(row).squaredNorm();
if(pos <= 1.)
count += 1;
}
}