c - Trouble Resolving Deadlock in MPI Program dealing with a Cartesian mesh -
i implementing cannon's algorithm. run using 4 processors. hit dead lock when enter loop:
(i=0; i<dims[0]; i++) { multiply(nlocal, a, b, c); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,leftrank, 1, rightrank, 1, comm_2d, &status); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,uprank, 1, downrank, 1, comm_2d, &status); }
the entire code here:
#include <math.h> #include <mpi.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> void multiply(int n, double *a, double *b, double *c); double* readmatrix(char* filename, int* size); void writematrix(double* matrix, char* filename, int size); int main(int argc, char* argv[]) { mpi_init(&argc, &argv); double* a,*b,*c; int i, t, n; int nlocal; int npes, dims[2], periods[2]; int myrank, my2drank, mycoords[2]; int uprank, downrank, leftrank, rightrank, coords[2]; int shiftsource, shiftdest; mpi_status status; mpi_comm comm_2d; mpi_comm_size(mpi_comm_world, &npes); mpi_comm_rank(mpi_comm_world, &myrank); mpi_barrier(mpi_comm_world); t = -mpi_wtime(); if (myrank == 0) { int sizea,sizeb; printf("reading %s\n", argv[1]); = readmatrix(argv[1], &sizea); b = readmatrix(argv[2], &sizeb); printf("reading %s\n", argv[2]); c = calloc(sizea*sizeb, sizeof(double)); n = sizea; mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world); mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world); if (sizea != sizeb) { printf("matrix not sized n^2\n"); mpi_abort(mpi_comm_world, 0); } } else { = calloc(n*n, sizeof(double)); b = calloc(n*n, sizeof(double)); c = calloc(n*n, sizeof(double)); mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world); mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world); } dims[0] = dims[1] = sqrt(npes); periods[0] = periods[1] = 1; mpi_cart_create(mpi_comm_world, 2, dims, periods, 1, &comm_2d); mpi_comm_rank(comm_2d, &my2drank); mpi_cart_coords(comm_2d, my2drank, 2, mycoords); mpi_cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); mpi_cart_shift(comm_2d, 1, -1, &downrank, &uprank); nlocal = n/dims[0]; mpi_cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double, shiftdest,1, shiftsource, 1, comm_2d, &status); mpi_cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 1, shiftsource, 1, comm_2d, &status); printf("rank[%d] has entered loop\n", myrank); (i=0; i<dims[0]; i++) { multiply(nlocal, a, b, c); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,leftrank, 1, rightrank, 1, comm_2d, &status); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,uprank, 1, downrank, 1, comm_2d, &status); } printf("rank[%d] has left loop\n", myrank); mpi_cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,shiftdest, 1, shiftsource, 1, comm_2d, &status); mpi_cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 1, shiftsource, 1, comm_2d, &status); printf("rank[%d] has reached barrier...\n", myrank); mpi_barrier(mpi_comm_world); if (myrank == 0) { t += mpi_wtime(); writematrix(c, argv[3], n); printf("%s %d second(s)\n", "finshed in", t); } free(a); free(b); free(c); mpi_comm_free(&comm_2d); mpi_finalize(); } double* readmatrix(char* filename, int* size) { file* file_handle = fopen(filename, "r"); int row; int col; fread(&row, sizeof(int), 1, file_handle); fread(&col, sizeof(int), 1, file_handle); if (row == col) { *size = row; } else { *size = -1; return null; } double* buffer = calloc(row*col, sizeof(double)); for(int = 0; < row; i++) { for(int j = 0; j < col; j++) { double x; fread(&x, sizeof(double), 1, file_handle); buffer[row * + j] = x; } } fclose(file_handle); printf("buffer has size %d\n", row*col); return buffer; } void writematrix(double* matrix, char* filename, int size) { file* file_handle = fopen(filename, "w"); fwrite(&size, sizeof(int), 1, file_handle); fwrite(&size, sizeof(int), 1, file_handle); for(int = 0; < size; i++) { for(int j = 0; j < size; j++) { double x = matrix[size * + j]; fwrite(&x, sizeof(double), 1, file_handle); } } fclose(file_handle); } void multiply(int n, double *a, double *b, double *c) { int i, j, k; (i=0; i<n; i++) (j=0; j<n; j++) (k=0; k<n; k++) c[i*n+j] += a[i*n+k]*b[k*n+j]; }
if code can remove parts. wondering causing deadlock , how resolve it. thank time, in advance.
important information:
rank 0 hits barrier. since other 3 deadlocked rank 0 stuck until have hit barrier.
output
reading 10 buffer has size 100 buffer has size 100 reading 10 rank[0] has entered loop rank[0] has left loop rank[0] has reached barrier... rank[1] has entered loop rank[2] has entered loop rank[3] has entered loop
there 2 little issues working :
in lines :
a = calloc(n*n, sizeof(double)); b = calloc(n*n, sizeof(double)); c = calloc(n*n, sizeof(double)); mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world);
n
should broadcasted before allocating a
. otherwise, n
not initialized , output undefined. can trigger segmentation fault.
in function
mpi_cart_shift
, third argument displacement : negative downward , positive upward. changed set same displacement , worked fine. ifmpi_sendrecv_replace()
used, number of messages received process must match number of messages sent process. not case in callmpi_sendrecv_replace()
:mpi_cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double, shiftdest,1, shiftsource, 1, comm_2d, &status);
in "skew" example of open-mpi, different :
c compute shift source , destination call mpi_cart_shift(comm, 0, coords(2), source, dest, ierr) c skew array call mpi_sendrecv_replace(a, 1, mpi_real, dest, 0, source, 0, comm, status, ierr)
in case, processes in each line same displacement. hence, each process should send message , each process should receive one. yet, displacement depends on line , matrix skewed.
here resulting code. compiled mpicc main.c -o main -lm -wall
, run mpirun -np 4 main
:
#include <stdio.h> #include <stdlib.h> #include <math.h> #include "mpi.h" int main(int argc, char **argv) { mpi_init(&argc, &argv); double* a,*b,*c; int i, t, n; int nlocal; int npes, dims[2], periods[2]; int myrank, my2drank, mycoords[2]; int uprank, downrank, leftrank, rightrank; int shiftsource, shiftdest; mpi_status status; mpi_comm comm_2d; mpi_comm_size(mpi_comm_world, &npes); mpi_comm_rank(mpi_comm_world, &myrank); mpi_barrier(mpi_comm_world); t = -mpi_wtime(); if (myrank == 0) { int sizea,sizeb; printf("reading \n"); // = readmatrix(argv[1], &sizea); sizea=16; a=malloc(sizea*sizea*sizeof(double)); // b = readmatrix(argv[2], &sizeb); sizeb=16; b=malloc(sizeb*sizeb*sizeof(double)); printf("reading \n"); c = calloc(sizea*sizeb, sizeof(double)); n = sizea; mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world); mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world); if (sizea != sizeb) { printf("matrix not sized n^2\n"); mpi_abort(mpi_comm_world, 0); } } else { mpi_bcast(&n, 1, mpi_int, 0, mpi_comm_world);//n should broadcast before allocation = calloc(n*n, sizeof(double)); b = calloc(n*n, sizeof(double)); c = calloc(n*n, sizeof(double)); mpi_bcast(a, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(b, n*n, mpi_double, 0, mpi_comm_world); mpi_bcast(c, n*n, mpi_double, 0, mpi_comm_world); } dims[0] = dims[1] = sqrt(npes); periods[0] = periods[1] = 1; mpi_cart_create(mpi_comm_world, 2, dims, periods, 1, &comm_2d); mpi_comm_rank(comm_2d, &my2drank); mpi_cart_coords(comm_2d, my2drank, 2, mycoords); mpi_cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); mpi_cart_shift(comm_2d, 1, -1, &downrank, &uprank); nlocal = n/dims[0]; mpi_cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest); // mpi_cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double, shiftdest,5, shiftsource, 5, comm_2d, &status); // mpi_cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest); mpi_cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 6, shiftsource, 6, comm_2d, &status); printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout); (i=0; i<dims[0]; i++) { // multiply(nlocal, a, b, c); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,leftrank, 1, rightrank, 1, comm_2d, &status); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,uprank, 2, downrank, 2, comm_2d, &status); } printf("rank[%d] has left loop\n", myrank);fflush(stdout); mpi_barrier(mpi_comm_world); // mpi_cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); mpi_cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest); mpi_sendrecv_replace(a, nlocal*nlocal, mpi_double,shiftdest, 3, shiftsource, 3, comm_2d, &status); mpi_cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest); //mpi_cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); mpi_sendrecv_replace(b, nlocal*nlocal, mpi_double,shiftdest, 4, shiftsource, 4, comm_2d, &status); printf("rank[%d] has reached barrier...\n", myrank);fflush(stdout); mpi_barrier(mpi_comm_world); if (myrank == 0) { t += mpi_wtime(); // writematrix(c, argv[3], n); printf("finshed in %d second(s)\n",t); } free(a); free(b); free(c); mpi_comm_free(&comm_2d); mpi_finalize(); return 0; }
Comments
Post a Comment