c - Trouble Resolving Deadlock in MPI Program dealing with a Cartesian mesh -


i implementing cannon's algorithm. run using 4 processors. hit dead lock when enter loop:

   (i=0; i<dims[0]; i++) {      multiply(nlocal, a, b, c);      mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,leftrank, 1, rightrank, 1, comm_2d, &status);      mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,uprank, 1, downrank, 1, comm_2d, &status);    }  

the entire code here:

#include <math.h> #include <mpi.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h>  void multiply(int n, double *a, double *b, double *c);  double* readmatrix(char* filename, int* size); void writematrix(double* matrix, char* filename, int size);  int main(int argc, char* argv[]) {      mpi_init(&argc, &argv);       double* a,*b,*c;   int i, t, n;   int nlocal;    int npes, dims[2], periods[2];    int myrank, my2drank, mycoords[2];    int uprank, downrank, leftrank, rightrank, coords[2];    int shiftsource, shiftdest;    mpi_status status;    mpi_comm comm_2d;      mpi_comm_size(mpi_comm_world, &npes);    mpi_comm_rank(mpi_comm_world, &myrank);     mpi_barrier(mpi_comm_world);    t = -mpi_wtime();     if (myrank == 0) {      int sizea,sizeb;     printf("reading %s\n", argv[1]);     = readmatrix(argv[1], &sizea);     b = readmatrix(argv[2], &sizeb);     printf("reading %s\n", argv[2]);     c = calloc(sizea*sizeb, sizeof(double));     n = sizea;     mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world);     mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world);     mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world);     mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world);      if (sizea != sizeb) {         printf("matrix not sized n^2\n");         mpi_abort(mpi_comm_world, 0);     }     }   else {       = calloc(n*n, sizeof(double));       b = calloc(n*n, sizeof(double));       c = calloc(n*n, sizeof(double));       mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world);       mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world);       mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world);       mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world);   }     dims[0] = dims[1] = sqrt(npes);        periods[0] = periods[1] = 1;      mpi_cart_create(mpi_comm_world, 2, dims, periods, 1, &comm_2d);      mpi_comm_rank(comm_2d, &my2drank);    mpi_cart_coords(comm_2d, my2drank, 2, mycoords);      mpi_cart_shift(comm_2d, 0, -1, &rightrank, &leftrank);    mpi_cart_shift(comm_2d, 1, -1, &downrank, &uprank);      nlocal = n/dims[0];      mpi_cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);    mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double, shiftdest,1, shiftsource, 1, comm_2d, &status);     mpi_cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);    mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 1, shiftsource, 1, comm_2d, &status);     printf("rank[%d] has entered loop\n", myrank);   (i=0; i<dims[0]; i++) {      multiply(nlocal, a, b, c);        mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,leftrank, 1, rightrank, 1, comm_2d, &status);        mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,uprank, 1, downrank, 1, comm_2d, &status);    }    printf("rank[%d] has left loop\n", myrank);    mpi_cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest);    mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,shiftdest, 1, shiftsource, 1, comm_2d, &status);     mpi_cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest);    mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 1, shiftsource, 1, comm_2d, &status);        printf("rank[%d] has reached barrier...\n", myrank);   mpi_barrier(mpi_comm_world);    if (myrank == 0) {       t += mpi_wtime();       writematrix(c, argv[3], n);        printf("%s %d second(s)\n", "finshed in", t);   }    free(a); free(b); free(c);    mpi_comm_free(&comm_2d);     mpi_finalize(); }  double* readmatrix(char* filename, int* size) {     file* file_handle = fopen(filename, "r");     int row;     int col;     fread(&row, sizeof(int), 1, file_handle);     fread(&col, sizeof(int), 1, file_handle);     if (row == col) {             *size = row;     }     else {         *size = -1;         return null;     }     double* buffer = calloc(row*col, sizeof(double));     for(int = 0; < row; i++) {         for(int j = 0; j < col; j++) {             double x;             fread(&x, sizeof(double), 1, file_handle);             buffer[row * + j] = x;         }     }     fclose(file_handle);     printf("buffer has size %d\n", row*col);     return buffer; }  void writematrix(double* matrix, char* filename, int size) {     file* file_handle = fopen(filename, "w");     fwrite(&size, sizeof(int), 1, file_handle);     fwrite(&size, sizeof(int), 1, file_handle);     for(int = 0; < size; i++) {         for(int j = 0; j < size; j++) {             double x = matrix[size * + j];             fwrite(&x, sizeof(double), 1, file_handle);         }     }     fclose(file_handle); }   void multiply(int n, double *a, double *b, double *c) {    int i, j, k;     (i=0; i<n; i++)      (j=0; j<n; j++)        (k=0; k<n; k++)      c[i*n+j] += a[i*n+k]*b[k*n+j];  }  

if code can remove parts. wondering causing deadlock , how resolve it. thank time, in advance.

important information:

rank 0 hits barrier. since other 3 deadlocked rank 0 stuck until have hit barrier.

output

reading 10 buffer has size 100 buffer has size 100 reading 10 rank[0] has entered loop rank[0] has left loop rank[0] has reached barrier... rank[1] has entered loop rank[2] has entered loop rank[3] has entered loop 

there 2 little issues working :

  • in lines :

    a = calloc(n*n, sizeof(double)); b = calloc(n*n, sizeof(double)); c = calloc(n*n, sizeof(double)); mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world); 

n should broadcasted before allocating a. otherwise, n not initialized , output undefined. can trigger segmentation fault.

  • in function mpi_cart_shift, third argument displacement : negative downward , positive upward. changed set same displacement , worked fine. if mpi_sendrecv_replace() used, number of messages received process must match number of messages sent process. not case in call mpi_sendrecv_replace() :

    mpi_cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);  mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double, shiftdest,1, shiftsource, 1, comm_2d, &status); 

in "skew" example of open-mpi, different :

 c compute shift source , destination     call mpi_cart_shift(comm, 0, coords(2), source,                         dest, ierr)   c skew array     call mpi_sendrecv_replace(a, 1, mpi_real, dest, 0,                               source, 0, comm, status,                               ierr) 

in case, processes in each line same displacement. hence, each process should send message , each process should receive one. yet, displacement depends on line , matrix skewed.

here resulting code. compiled mpicc main.c -o main -lm -wall , run mpirun -np 4 main :

#include <stdio.h> #include <stdlib.h> #include <math.h> #include "mpi.h"  int main(int argc, char **argv)  {      mpi_init(&argc, &argv);         double* a,*b,*c;     int i, t, n;     int nlocal;      int npes, dims[2], periods[2];      int myrank, my2drank, mycoords[2];      int uprank, downrank, leftrank, rightrank;      int shiftsource, shiftdest;      mpi_status status;      mpi_comm comm_2d;        mpi_comm_size(mpi_comm_world, &npes);      mpi_comm_rank(mpi_comm_world, &myrank);       mpi_barrier(mpi_comm_world);      t = -mpi_wtime();       if (myrank == 0) {          int sizea,sizeb;         printf("reading \n");         //  = readmatrix(argv[1], &sizea);         sizea=16;         a=malloc(sizea*sizea*sizeof(double));         //  b = readmatrix(argv[2], &sizeb);         sizeb=16;         b=malloc(sizeb*sizeb*sizeof(double));         printf("reading \n");         c = calloc(sizea*sizeb, sizeof(double));         n = sizea;         mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world);         mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world);         mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world);         mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world);          if (sizea != sizeb) {             printf("matrix not sized n^2\n");             mpi_abort(mpi_comm_world, 0);         }       }     else {         mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world);//n should broadcast before allocation         = calloc(n*n, sizeof(double));         b = calloc(n*n, sizeof(double));         c = calloc(n*n, sizeof(double));          mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world);         mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world);         mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world);     }       dims[0] = dims[1] = sqrt(npes);          periods[0] = periods[1] = 1;        mpi_cart_create(mpi_comm_world, 2, dims, periods, 1, &comm_2d);        mpi_comm_rank(comm_2d, &my2drank);      mpi_cart_coords(comm_2d, my2drank, 2, mycoords);        mpi_cart_shift(comm_2d, 0, -1, &rightrank, &leftrank);      mpi_cart_shift(comm_2d, 1, -1, &downrank, &uprank);        nlocal = n/dims[0];       mpi_cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest);      // mpi_cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);      mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double, shiftdest,5, shiftsource, 5, comm_2d, &status);       // mpi_cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);     mpi_cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest);      mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 6, shiftsource, 6, comm_2d, &status);       printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);     (i=0; i<dims[0]; i++) {          //  multiply(nlocal, a, b, c);           mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,leftrank, 1, rightrank, 1, comm_2d, &status);            mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,uprank, 2, downrank, 2, comm_2d, &status);      }      printf("rank[%d] has left loop\n", myrank);fflush(stdout);     mpi_barrier(mpi_comm_world);      // mpi_cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest);      mpi_cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest);      mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,shiftdest, 3, shiftsource, 3, comm_2d, &status);       mpi_cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest);      //mpi_cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest);      mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 4, shiftsource, 4, comm_2d, &status);          printf("rank[%d] has reached barrier...\n", myrank);fflush(stdout);     mpi_barrier(mpi_comm_world);      if (myrank == 0) {         t += mpi_wtime();         //  writematrix(c, argv[3], n);          printf("finshed in %d second(s)\n",t);     }      free(a); free(b); free(c);      mpi_comm_free(&comm_2d);       mpi_finalize();      return 0; } 

Comments

Popular posts from this blog

c++ - No viable overloaded operator for references a map -

java - Custom OutputStreamAppender not run: LOGBACK: No context given for <MYAPPENDER> -

java - Cannot secure connection using TLS -