[netcdfgroup] bug report (nc_put_varm_double hangs during collective parallel I/O)

Hello,

I am using NetCDF 4.1.3 built with HDF5-1.8.8 and MPICH-2, on Mac OS X
and Ubuntu Linux.

I noticed that nc_put_varm_double (or ..._int, etc) hangs in the
following situation:

- it is running on more than 1 processor
- with NC_COLLECTIVE I/O mode set
- we are writing an N-dimensional variable with N >= 2
- different ranks write different numbers of values along the *first*
dimension (in other words, count[0] are different on different
processors)

Similar code succeeds on one processor, or when count[0] are the same
(even if count[1] differ), or if the nc_put_varm_... call is replaced
with an equivalent nc_put_vara_... call.

It also succeeds when NULL is passed as the "imap" argument to nc_put_varm_...

Please see the attached C code I used to isolate this issue.

PS: It is entirely possible that I am misusing the NetCDF library by
providing invalid start, count, stride and imap arrays. Please do let
me know if this is the case.

-- 
Constantine

cc help@xxxxxxxxxxxxx
#include <string.h>
#include <stdlib.h>
#include <stdio.h>

#include <mpi.h>
#include <netcdf_par.h>

#define CHKERRQ(e) do { \
    if ((e) != NC_NOERR) {                                              \
      printf("Bailing out in file %s, line %d, error:%s.\n",            \
             __FILE__, __LINE__, nc_strerror(e));                       \
      return e;                                                         \
    }                                                                   \
  } while (0)

#define FILE "test_parallel_2d.nc"
#define Y_SIZE 1
#define NDIMS 2

/* This program tries to write data to a 2-dimensional variable using NetCDF's
 * MPI I/O capabilities.
 *
 * Each processor allocates a Y_SIZE * (Y_SIZE + mpi_rank) array
 * and fills it with non-random numbers.
 *
 * "Local" arrays are appended along the "x" dimension.
 *
 * nc_put_varm_double hangs while
 * - writing a two- (or more) dimensional variable
 * - counts along the *first* dimension are different
 * - collective I/O mode is selected
*/

int main(int argc, char **argv)
{
  /* MPI stuff. */
  int mpi_namelen;
  char mpi_name[MPI_MAX_PROCESSOR_NAME];
  int mpi_size, mpi_rank;
  MPI_Comm comm = MPI_COMM_WORLD;
  MPI_Info info = MPI_INFO_NULL;

  /* Netcdf-4 stuff. */
  int ncid, v1id, dimids[NDIMS];

  size_t start[NDIMS], count[NDIMS];
  ptrdiff_t stride[NDIMS], imap[NDIMS];
  double *data;
  int i, j, stat, local_x_size, local_size, x_size;

  /* Initialize MPI. */
  MPI_Init(&argc, &argv);
  MPI_Comm_size(comm, &mpi_size);
  MPI_Comm_rank(comm, &mpi_rank);
  MPI_Get_processor_name(mpi_name, &mpi_namelen);

  if (argc != 3) {
    if (mpi_rank == 0) {
      printf("Usage: netcdf_parallel_2d vara collective\n"
             "or     netcdf_parallel_2d varm collective\n"
             "or     netcdf_parallel_2d vara independent\n"
             "or     netcdf_parallel_2d varm independent\n");
    }
    /* Shut down MPI. */
    MPI_Finalize();
    return 0;
  }

  printf("mpi_name: %s size: %d rank: %d\n", mpi_name, mpi_size, mpi_rank);

  local_x_size = Y_SIZE + mpi_rank;
  local_size = local_x_size * Y_SIZE;

  MPI_Allreduce(&local_x_size, &x_size, 1, MPI_INT, MPI_SUM, comm);

  /* Create a parallel netcdf-4 file. */
  {
    stat = nc_create_par(FILE, NC_NETCDF4|NC_MPIIO, comm, info, &ncid); 
CHKERRQ(stat);

    /* Create two dimensions. */
    stat = nc_def_dim(ncid, "x", (size_t)x_size, &dimids[0]); CHKERRQ(stat);
    stat = nc_def_dim(ncid, "y", (size_t)Y_SIZE, &dimids[1]); CHKERRQ(stat);

    /* Create one variable. */
    stat = nc_def_var(ncid, "data", NC_DOUBLE, 2, dimids, &v1id); CHKERRQ(stat);

    stat = nc_enddef(ncid); CHKERRQ(stat);
  }

  /* Create phony data. */
  {
    data = (double*)malloc(local_size * sizeof(double));

    if (data == NULL) {
      printf("mpi_rank=%d: Memory allocation failed.\n", mpi_rank);
      exit(1);
    }

    /* Fill with non-random junk to make sure that imap (below) is set 
correctly. */
    for (i = 0; i < local_x_size; ++i) {
      for (j = 0; j < Y_SIZE; ++j) {
        data[Y_SIZE * i + j] = local_x_size * mpi_rank + i;
      }
    }
  }

  /* Write phony data. */
  {
    start[0] = 0;
    for (j = 0; j < mpi_rank; ++j)
      start[0] += Y_SIZE + j;
    start[1] = 0;

    count[0] = local_x_size;
    count[1] = Y_SIZE;

    /* Note: this mapping from between storage orders in memory and in the file
       is a trivial one. */
    stride[0] = 1;
    stride[1] = 1;

    imap[0] = Y_SIZE;
    imap[1] = 1;

    for (j = 0; j < NDIMS; ++j) {
      printf("mpi_rank=%d: start[%d]=%d count[%d]=%d imap[%d]=%d\n", mpi_rank,
             j, (int)start[j],
             j, (int)count[j],
             j, (int)imap[j]);
    }

    /* Flush buffers (just to make stdout look better). */
    fflush(stdout);
    MPI_Barrier(comm);

    if (strcmp(argv[2], "collective") == 0) {
      stat = nc_var_par_access(ncid, v1id, NC_COLLECTIVE); CHKERRQ(stat);
    } else {
      stat = nc_var_par_access(ncid, v1id, NC_INDEPENDENT); CHKERRQ(stat);
    }

    if (strcmp(argv[1], "varm") == 0) {
      printf("mpi_rank=%d: calling nc_put_varm_double(...)\n", mpi_rank);
      stat = nc_put_varm_double(ncid, v1id, start, count, stride, imap, data); 
CHKERRQ(stat);
      printf("mpi_rank=%d: exited nc_put_varm_double(...)\n", mpi_rank);
    } else {
      printf("mpi_rank=%d: calling nc_put_vara_double(...)\n", mpi_rank);
      stat = nc_put_vara_double(ncid, v1id, start, count, data); CHKERRQ(stat);
      printf("mpi_rank=%d: exited nc_put_vara_double(...)\n", mpi_rank);
    }

  }

  free(data);

  /* Close the netcdf file. */
  stat = nc_close(ncid); CHKERRQ(stat);

  /* Shut down MPI. */
  MPI_Finalize();
  return 0;
}
  • 2012 messages navigation, sorted by:
    1. Thread
    2. Subject
    3. Author
    4. Date
    5. ↑ Table Of Contents
  • Search the netcdfgroup archives: