c++ - MPI_Bcast Matrix Multiplication Setup -


i using mpi multiply 2 matrices (2d arrays) in parallel, dividing rows evenly , dispersing them among child processes. master works on chunk of rows. understand how , did using mpi_send/mpi_recv trying mpi_bcast , can't figure out when bcast , send. when output finished matrix (c) @ various points seems not rows being calculated/updated , know it's because not correctly specifying buffer.

code:

#include <iostream> #include <stdlib.h> #include <mpi.h> #include <stdio.h> #include <time.h>  using namespace std;   int main(int argc, char *argv[]) {     int myid, nproc;      int  ibuffer[200];         // integer buffer, use proper size , type     double dbuffer[2000];      // double buffer, use proper size , type     char sbuffer[200];         // string buffer      int msg_len;      int i, j, k;      // initialize mpi environment , needed data     mpi_init(&argc, &argv);      mpi_comm_size(mpi_comm_world, &nproc);      mpi_comm_rank(mpi_comm_world, &myid);      // name of processor     mpi_get_processor_name(sbuffer, &msg_len);      int rowa = 5,     cola = 2,     rowb = cola,     colb = 3,     rowc = rowa,     colc = colb;      // start clock     double start_time = mpi_wtime();      // initialize matrices     double **mata = new double*[rowa];     (int = 0; < rowa; ++i)         mata[i] = new double[cola];      double **matb = new double*[rowb];     (int = 0; < rowb; ++i)         matb[i] = new double[colb];      double **matc = new double*[rowc];     (int = 0; < rowc; ++i)         matc[i] = new double[colc];        (int = 0; < rowa; i++)  // mata     {         (int j = 0; j < cola; j++)         {             mata[i][j] = 2;         }     }      (int = 0; < rowb; i++)  // matb     {         (int j = 0; j < colb; j++)         {             matb[i][j] = 2;         }     }      (int = 0; < rowc; i++)  // matc     {         (int j = 0; j < colc; j++)         {             matc[i][j] = 0;         }     }        // procs compute chunk size, no need send separate     int chunk = rowa / nproc;     int rest  = rowa % nproc;     int my_start_row = myid * chunk;        // find start row     int my_end_row   = (myid + 1) * chunk;      // find end row      // assign rest ot last worker     if (myid == nproc-1) my_end_row += rest;      int dcount = cola * chunk;    // data count send worker     mpi_status status;        // status variable neede receive      if (myid == 0)     {                // send rows needed workers (don't know if need or not)             //mpi_bcast(mata, dcount, mpi_double, 0, mpi_comm_world);          // work on own part         (int i= my_start_row; < my_end_row; i++)         {             for(int j=0; j < colb; j++)             {                 for(int k=0; k < rowb; k++)                 {                     matc[i][j] = matc[i][j] + (mata[i][k] * matb[k][j]);                 }             }         }          (int n=1; n<nproc; n++)         {             mpi_bcast(matc, dcount, mpi_double, n, mpi_comm_world);             printf("\n ==++ master receive result worker[%d], \n", n);          }     }     else     {         // worker, receive needed info , start working         //mpi_bcast(mata, dcount, mpi_double, 0, mpi_comm_world);          //printf("\n +++ worker[%d], recived %d rows master \n", myid, myid*chunk);         cout << "\n === master sent rows " << myid * chunk << " through " << (myid+1) * chunk << " process #" << myid << endl;          // work first          (int i= my_start_row; < my_end_row; i++)         {             for(int j=0; j < colb; j++)             {                 for(int k=0; k < rowb; k++)                 {                     matc[i][j] = matc[i][j] + (mata[i][k] * matb[k][j]);                 }             }         }          // send result master         mpi_bcast(matc, dcount, mpi_double, myid, mpi_comm_world);         printf("\n --- worker[%d], sent result master \n", myid);      }      // end clock     double end_time = mpi_wtime();      if (myid == 0) {         cout << "\nparallel exec time: " << end_time - start_time << endl;     }       mpi_finalize();        // clean , release storage     (int i=0; i< rowa; i++)      {         delete [] mata[i];         mata[i] = null;     }     delete [] mata;     mata = null;     (int i=0; i< rowa; i++)      {         delete [] matc[i];         matc[i] = null;     }     delete [] matc;     matc = null;   }  

if question vague or troublesome understand, wondered if i'm incorrectly understanding how , when use bcast.

if don't make mistake in reading, code generate 3 same matrix @ beginning a, b , c each processor, compute multiplication of b, index. way, result of multiplication processor rank is

c(rank) = a(begin;end) * b 

at considered lines, ,

c(rank) = 0 

outside.

so problem comes fact mpi_bcast doesn't add matrix, nor concatene it, it's broadcasting function, , send buffer (here, matrix c) root processor others. each processor, doing bcast, overwrite previous bcast.

to concatene buffer, function use mpi_gather. here, matrix well-sized @ beginning, concatenation not idea here.

two options :

  • use function add operation , gathers data. can see mpi_reduce , mpi_allreduce (but operation done x+(nbprocs-1)*0, it's not useful call such function)
  • split , c in sub-sized matrix, , use mpi_gather reunite results.

hope help! luck


Comments

Popular posts from this blog

asynchronous - C# WinSCP .NET assembly: How to upload multiple files asynchronously -

aws api gateway - SerializationException in posting new Records via Dynamodb Proxy Service in API -

asp.net - Problems sending emails from forum -