[netcdfgroup] [netcdf-hdf] netcdf4 and OpenMP
Quincey Koziol
koziol at hdfgroup.org
Tue May 20 12:43:59 MDT 2008
Hi Henry,
On May 20, 2008, at 4:53 AM, Henry Butowsky wrote:
> Hi all,
>
> The NCO operator program ncbo (aka ncdiff) uses the OpenMP interface
> to thread its work over the loop of variables in the input files.
> The threading works fine with netcdf3 but not with netcdf4.
> ( The failure only occurs with netcdf4 files and the netcdf4 API
> netcdf3 files and the netcdf4 API works)
>
> We are unsure why and are wondering if others have unexplained
> problems with OpenMP code that utilizes the new netcdf4 library?
>
> A simplified version of the code is below.
> ncbo takes 3 file arguments - 2 input files and one output file
> It then subtracts the variables in file 1 from the variables in file 2
> and writes the results to the output file.
> Each thread has its own file handles in_id_1 & in_id_2 for the input
> files.
>
> A couple of notes:
> nco_msa_var_get() wraps the netcdf functions nco_get_vara_type().
> nco_var_mtd_refresh() refreshes the variable structure with type, id
> numberof dimensions & missing value (if any)
> nco_var_sbt() subtracts values in var_prc_2 from var_prc_1
>
> Ideas/suggestions as to what is going wrong or how to debug this
> problem would be much appreciated.
Are you using a threadsafe version of HDF5? (i.e. one that is
configured with the "--enable-threadsafe" option) It's also possible
that netCDF-4 needs some locking mechanisms also, but that's a
question for Ed or Russ.
Quincey
>
>
> Regards.
> Henry
>
>
>
> /
> *****************************************************************************************************/
>
>
> /* Open output file */
>
> fl_out_tmp
> =
> nco_fl_out_open
> (fl_out,FORCE_APPEND,FORCE_OVERWRITE,fl_out_fmt,&out_id);
>
> /* create file handles for input file 1 */
> for(thr_idx=0;thr_idx<thr_nbr;thr_idx++)
> rcd=nco_open(fl_in_1,NC_NOWRITE,in_id_1_arr+thr_idx);
>
> /* create file handles for input file 2 */
> for(thr_idx=0;thr_idx<thr_nbr;thr_idx++)
> rcd=nco_open(fl_in_2,NC_NOWRITE,in_id_2_arr+thr_idx);
>
>
>
> #ifdef _OPENMP
> /* OpenMP notes:
> shared(): msk and wgt are not altered within loop
> private(): wgt_avg does not need initialization */
> #pragma omp parallel for default(none) firstprivate(ddra_info)
> private(idx,in_id_1,in_id_2,dmn_idx,dmn_jdx)
> shared
> (dbg_lvl
> ,dim_1
> ,fl_in_1
> ,fl_in_2
> ,fl_out
> ,flg_ddra
> ,in_id_1_arr
> ,in_id_2_arr
> ,nbr_dmn_xtr_1
> ,nbr_var_prc_1
> ,nbr_var_prc_2
> ,nco_op_typ
> ,out_id
> ,prg_nm,rcd,var_prc_1,var_prc_2,var_prc_out,lmt_all_lst,nbr_dmn_fl_1)
> #endif /* !_OPENMP */
> for(idx=0;idx<nbr_var_prc_1;idx++){
> int has_mss_val=False;
> ptr_unn mss_val;
>
>
> in_id_1=in_id_1_arr[omp_get_thread_num()];
> in_id_2=in_id_2_arr[omp_get_thread_num()];
>
> (void)nco_var_mtd_refresh(in_id_1,var_prc_1[idx]);
> has_mss_val=var_prc_1[idx]->has_mss_val;
>
> (void
> )nco_msa_var_get(in_id_1,var_prc_1[idx],lmt_all_lst,nbr_dmn_fl_1);
>
> /* Find and set variable dmn_nbr, ID, mss_val, type in second
> file */
> (void)nco_var_mtd_refresh(in_id_2,var_prc_2[idx]);
>
> /* Read hyperslab from second file */
>
> (void
> )nco_msa_var_get(in_id_2,var_prc_2[idx],lmt_all_lst,nbr_dmn_fl_1);
> var_prc_2[idx]=nco_var_cnf_typ(var_prc_1[idx]-
> >type,var_prc_2[idx]);
>
> /* Change missing_value of var_prc_2, if any, to missing_value of
> var_prc_1, if any */
> has_mss_val=nco_mss_val_cnf(var_prc_1[idx],var_prc_2[idx]);
>
> /* mss_val in fl_1, if any, overrides mss_val in fl_2 */
> if(has_mss_val) mss_val=var_prc_1[idx]->mss_val;
>
> /* Subtract file_2 from file_1 */
>
> (void)nco_var_sbt(var_prc_1[idx]->type,var_prc_1[idx]-
> >sz,has_mss_val,mss_val,var_prc_2[idx]->val,var_prc_1[idx]->val);
>
>
>
> var_prc_2[idx]->val.vp=nco_free(var_prc_2[idx]->val.vp);
>
> #ifdef _OPENMP
> #pragma omp critical
> #endif /* _OPENMP */
> { /* begin OpenMP critical */
> /* Copy result to output file and free workspace buffer */
> if(var_prc_1[idx]->nbr_dim == 0){
> (void)nco_put_var1(out_id,var_prc_out[idx]->id,var_prc_out[idx]-
> >srt,var_prc_1[idx]->val.vp,var_prc_1[idx]->type);
> }else{ /* end if variable is scalar */
> (void)nco_put_vara(out_id,var_prc_out[idx]->id,var_prc_out[idx]-
> >srt,var_prc_out[idx]->cnt,var_prc_1[idx]->val.vp,var_prc_1[idx]-
> >type);
> } /* end else */
> } /* end OpenMP critical */
> var_prc_1[idx]->val.vp=nco_free(var_prc_1[idx]->val.vp);
>
> } /* end (OpenMP parallel for) loop over idx */
>
>
> /
> *****************************************************************************************************/
>
>
>
> Variable structure
>
>
> typedef struct var_sct_tag{ /* var_sct */
> char *nm; /* [sng] Variable name */
> int id; /* [id] Variable ID */
> int nc_id; /* [id] File ID */
> int nbr_dim; /* [nbr] Number of dimensions of variable in input
> file */
> nc_type type; /* [enm] Type of variable in RAM */
> nc_type typ_dsk; /* [enm] Type of variable on disk (never
> changes) */
> short is_rec_var; /* [flg] Is this a record variable? */
> short is_crd_var; /* [flg] Is this a coordinate variable? */
> long sz; /* [nbr] Number of elements (NOT bytes) in hyperslab (NOT
> full size of variable in input file!) */
> long sz_rec; /* [nbr] Number of elements in one record of
> hyperslab */
> int nbr_att; /* [nbr] Number of attributes */
> int has_dpl_dmn; /* [flg] Variable has duplicate copies of same
> dimension */
> int has_mss_val; /* [flg] Is there a missing_value attribute? */
> ptr_unn mss_val; /* [frc] Value of missing_value attribute, if any
> (mss_val stored in this structure must be same type as variable) */
> int cid; /* [id] Dimension ID of associated coordinate, if any */
> char fmt[5]; /* [sng] Hint for printf()-style formatting */
> dmn_sct **dim; /* [sct] Pointers to full dimension structures */
> int *dmn_id; /* [id] Contiguous vector of dimension IDs */
> long *srt; /* [id] Contiguous vector of indices to start of
> hyperslab */
> long *end; /* [id] Contiguous vector of indices to end of
> hyperslab */
> long *cnt; /* [id] Contiguous vector of lengths of hyperslab */
> long *srd; /* [id] Contiguous vector of stride of hyperslab */
> ptr_unn val; /* [bfr] Buffer to hold hyperslab */
> long *tally; /* [nbr] Number of valid operations performed so far
> */
> struct var_sct_tag *xrf; /* [sct] Cross-reference to associated
> variable structure (usually structure for variable on output) fxm:
> deprecate! TODO nco226 */
> int pck_dsk; /* [flg] Variable is packed on disk (valid
> scale_factor, add_offset, or both attributes exist) */
> int pck_ram; /* [flg] Variable is packed in memory (valid
> scale_factor, add_offset, or both attributes exist) */
> int has_scl_fct; /* [flg] Valid scale_factor attribute exists */
> int has_add_fst; /* [flg] Valid add_offset attribute exists */
> ptr_unn scl_fct; /* [frc] Value of scale_factor attribute of type
> typ_upk */
> ptr_unn add_fst; /* [frc] Value of add_offset attribute of type
> typ_upk */
> nc_type typ_pck; /* [enm] Type of variable when packed (on disk).
> typ_pck = typ_dsk except in cases where variable is packed in input
> file
> and unpacked in output file. */
> nc_type typ_upk; /* [enm] Type of variable when unpacked (expanded)
> (in memory) */
> int undefined; /* [flg] Variable is still undefined (in first
> parser
> pass) */
> int is_fix_var; /* [flg] Is this a fixed (non-processed)
> variable? */
> } var_sct; /* end var_sct_tag */
>
>
>
>
>
> _______________________________________________
> netcdf-hdf mailing list
> netcdf-hdf at unidata.ucar.edu
> For list information or to unsubscribe, visit: http://www.unidata.ucar.edu/mailing_lists/
>
More information about the netcdfgroup
mailing list