[netcdf-hdf] netcdf4 and OpenMP

Quincey Koziol koziol at hdfgroup.org
Tue May 20 12:43:59 MDT 2008


Hi Henry,

On May 20, 2008, at 4:53 AM, Henry Butowsky wrote:

> Hi all,
>
> The NCO operator program ncbo (aka ncdiff) uses the OpenMP interface
> to thread its work over the loop of variables in the input files.
> The threading works fine with netcdf3 but not with netcdf4.
> ( The failure only occurs with  netcdf4 files and the netcdf4 API
>   netcdf3 files and the netcdf4 API works)
>
> We are unsure why and are wondering if others have unexplained
> problems with OpenMP code that utilizes the new netcdf4 library?
>
> A simplified version of the code is below.
> ncbo takes 3 file arguments - 2 input files and one output file
> It then subtracts the variables in file 1 from the variables in file 2
> and writes the results to the output file.
> Each thread has its own file handles in_id_1 & in_id_2 for the input  
> files.
>
> A couple of notes:
> nco_msa_var_get()  wraps the netcdf functions nco_get_vara_type().
> nco_var_mtd_refresh() refreshes the variable structure with type, id
>  numberof dimensions & missing value (if any)
> nco_var_sbt() subtracts values in var_prc_2 from var_prc_1
>
> Ideas/suggestions as to what is going wrong or how to debug this
> problem would be much appreciated.

	Are you using a threadsafe version of HDF5?  (i.e. one that is  
configured with the "--enable-threadsafe" option)  It's also possible  
that netCDF-4 needs some locking mechanisms also, but that's a  
question for Ed or Russ.

	Quincey

>
>
> Regards.
> Henry
>
>
>
> / 
> *****************************************************************************************************/
>
>
>  /* Open output file */
>
> fl_out_tmp 
> = 
> nco_fl_out_open 
> (fl_out,FORCE_APPEND,FORCE_OVERWRITE,fl_out_fmt,&out_id);
>
>    /* create file handles for input file 1 */
>  for(thr_idx=0;thr_idx<thr_nbr;thr_idx++)
>    rcd=nco_open(fl_in_1,NC_NOWRITE,in_id_1_arr+thr_idx);
>
>      /* create file handles for input file 2 */
>  for(thr_idx=0;thr_idx<thr_nbr;thr_idx++)
>    rcd=nco_open(fl_in_2,NC_NOWRITE,in_id_2_arr+thr_idx);
>
>
>
> #ifdef _OPENMP
>  /* OpenMP notes:
>     shared(): msk and wgt are not altered within loop
>     private(): wgt_avg does not need initialization */
> #pragma omp parallel for default(none) firstprivate(ddra_info)
> private(idx,in_id_1,in_id_2,dmn_idx,dmn_jdx)
> shared 
> (dbg_lvl 
> ,dim_1 
> ,fl_in_1 
> ,fl_in_2 
> ,fl_out 
> ,flg_ddra 
> ,in_id_1_arr 
> ,in_id_2_arr 
> ,nbr_dmn_xtr_1 
> ,nbr_var_prc_1 
> ,nbr_var_prc_2 
> ,nco_op_typ 
> ,out_id 
> ,prg_nm,rcd,var_prc_1,var_prc_2,var_prc_out,lmt_all_lst,nbr_dmn_fl_1)
> #endif /* !_OPENMP */
>  for(idx=0;idx<nbr_var_prc_1;idx++){
>    int has_mss_val=False;
>    ptr_unn mss_val;
>
>
>    in_id_1=in_id_1_arr[omp_get_thread_num()];
>    in_id_2=in_id_2_arr[omp_get_thread_num()];
>
>    (void)nco_var_mtd_refresh(in_id_1,var_prc_1[idx]);
>    has_mss_val=var_prc_1[idx]->has_mss_val;
>     
> (void 
> )nco_msa_var_get(in_id_1,var_prc_1[idx],lmt_all_lst,nbr_dmn_fl_1);
>
>    /* Find and set variable dmn_nbr, ID, mss_val, type in second  
> file */
>    (void)nco_var_mtd_refresh(in_id_2,var_prc_2[idx]);
>
>    /* Read hyperslab from second file */
>     
> (void 
> )nco_msa_var_get(in_id_2,var_prc_2[idx],lmt_all_lst,nbr_dmn_fl_1);
>    var_prc_2[idx]=nco_var_cnf_typ(var_prc_1[idx]- 
> >type,var_prc_2[idx]);
>
>    /* Change missing_value of var_prc_2, if any, to missing_value of
> var_prc_1, if any */
>    has_mss_val=nco_mss_val_cnf(var_prc_1[idx],var_prc_2[idx]);
>
>    /* mss_val in fl_1, if any, overrides mss_val in fl_2 */
>    if(has_mss_val) mss_val=var_prc_1[idx]->mss_val;
>
>    /* Subtract file_2 from file_1 */
>
> (void)nco_var_sbt(var_prc_1[idx]->type,var_prc_1[idx]- 
> >sz,has_mss_val,mss_val,var_prc_2[idx]->val,var_prc_1[idx]->val);
>
>
>
>    var_prc_2[idx]->val.vp=nco_free(var_prc_2[idx]->val.vp);
>
> #ifdef _OPENMP
> #pragma omp critical
> #endif /* _OPENMP */
>    { /* begin OpenMP critical */
>      /* Copy result to output file and free workspace buffer */
>      if(var_prc_1[idx]->nbr_dim == 0){
> 	(void)nco_put_var1(out_id,var_prc_out[idx]->id,var_prc_out[idx]- 
> >srt,var_prc_1[idx]->val.vp,var_prc_1[idx]->type);
>      }else{ /* end if variable is scalar */
> 	(void)nco_put_vara(out_id,var_prc_out[idx]->id,var_prc_out[idx]- 
> >srt,var_prc_out[idx]->cnt,var_prc_1[idx]->val.vp,var_prc_1[idx]- 
> >type);
>      } /* end else */
>    } /* end OpenMP critical */
>    var_prc_1[idx]->val.vp=nco_free(var_prc_1[idx]->val.vp);
>
>  } /* end (OpenMP parallel for) loop over idx */
>
>
> / 
> *****************************************************************************************************/
>
>
>
> Variable structure
>
>
>  typedef struct var_sct_tag{ /* var_sct */
>    char *nm; /* [sng] Variable name */
>    int id; /* [id] Variable ID */
>    int nc_id; /* [id] File ID */
>    int nbr_dim; /* [nbr] Number of dimensions of variable in input  
> file */
>    nc_type type; /* [enm] Type of variable in RAM */
>    nc_type typ_dsk; /* [enm] Type of variable on disk (never  
> changes) */
>    short is_rec_var; /* [flg] Is this a record variable? */
>    short is_crd_var; /* [flg] Is this a coordinate variable? */
>    long sz; /* [nbr] Number of elements (NOT bytes) in hyperslab (NOT
> full size of variable in input file!) */
>    long sz_rec; /* [nbr] Number of elements in one record of  
> hyperslab */
>    int nbr_att; /* [nbr] Number of attributes */
>    int has_dpl_dmn; /* [flg] Variable has duplicate copies of same
> dimension */
>    int has_mss_val; /* [flg] Is there a missing_value attribute? */
>    ptr_unn mss_val; /* [frc] Value of missing_value attribute, if any
> (mss_val stored in this structure must be same type as variable) */
>    int cid; /* [id] Dimension ID of associated coordinate, if any */
>    char fmt[5]; /* [sng] Hint for printf()-style formatting */
>    dmn_sct **dim; /* [sct] Pointers to full dimension structures */
>    int *dmn_id; /* [id] Contiguous vector of dimension IDs */
>    long *srt; /* [id] Contiguous vector of indices to start of  
> hyperslab */
>    long *end; /* [id] Contiguous vector of indices to end of  
> hyperslab */
>    long *cnt; /* [id] Contiguous vector of lengths of hyperslab */
>    long *srd; /* [id] Contiguous vector of stride of hyperslab */
>    ptr_unn val; /* [bfr] Buffer to hold hyperslab */
>    long *tally; /* [nbr] Number of valid operations performed so far  
> */
>    struct var_sct_tag *xrf; /* [sct] Cross-reference to associated
> variable structure (usually structure for variable on output) fxm:
> deprecate! TODO nco226 */
>    int pck_dsk; /* [flg] Variable is packed on disk (valid
> scale_factor, add_offset, or both attributes exist) */
>    int pck_ram; /* [flg] Variable is packed in memory (valid
> scale_factor, add_offset, or both attributes exist) */
>    int has_scl_fct; /* [flg] Valid scale_factor attribute exists */
>    int has_add_fst; /* [flg] Valid add_offset attribute exists */
>    ptr_unn scl_fct; /* [frc] Value of scale_factor attribute of type
> typ_upk */
>    ptr_unn add_fst; /* [frc] Value of add_offset attribute of type
> typ_upk */
>    nc_type typ_pck; /* [enm] Type of variable when packed (on disk).
> typ_pck = typ_dsk except in cases where variable is packed in input  
> file
> and unpacked in output file. */
>    nc_type typ_upk; /* [enm] Type of variable when unpacked (expanded)
> (in memory) */
>    int undefined; /* [flg] Variable is still undefined (in first  
> parser
> pass) */
>    int is_fix_var; /* [flg] Is this a fixed (non-processed)  
> variable? */
>  } var_sct; /* end var_sct_tag */
>
>
>
>
>
> _______________________________________________
> netcdf-hdf mailing list
> netcdf-hdf at unidata.ucar.edu
> For list information or to unsubscribe, visit: http://www.unidata.ucar.edu/mailing_lists/
>



More information about the netcdf-hdf mailing list