[netcdf-java] newbie question on NetCDF file overhead

Hi all-

I'm working on generating my first NetCDF files and have a question. The
files I'm creating seem to be far larger than I would have thought
necessary to hold the given data. I'm wondering if there is something I can
do to trim this down a bit.

Our data is simple time-series data (one unlimited dimension). Below is a
simple Java test program that generates a file with 10000 records, each of
which contains a 24-character timestamp string and three 2-byte values.
This gives a raw data requirement of 30000 bytes. The generated NetCDF file
is 2420656 bytes, or 80x larger. Is this what is expected?  In my
development with real data I'm seeing 7MB of data creating an 86MB NetCDF
file, etc. It seems to settle out at about 12x as the data sets grow, which
is still pretty onerous. Any insights or suggestions appreciated.

package gov.noaa.swpc.solarwind;

import org.joda.time.DateTime;
import ucar.ma2.ArrayShort;
import ucar.ma2.ArrayString;
import ucar.ma2.DataType;
import ucar.ma2.InvalidRangeException;
import ucar.nc2.*;

import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

public class TestGenFile {
  public static void main(String[] args) {
    DateTime startDate = new DateTime();
    DateTime endDate = startDate.plusDays(1);

    NetcdfFileWriter dataFile = null;

    try {
      try {

        // define the file
        String filePathName = "output.nc";

        // delete the file if it already exists
        Path path = FileSystems.getDefault().getPath(filePathName);
        Files.deleteIfExists(path);

        // enter definition mode for this NetCDF-4 file
        dataFile =
NetcdfFileWriter.createNew(NetcdfFileWriter.Version.netcdf4, filePathName);

        // create the root group
        Group rootGroup = dataFile.addGroup(null, null);

        // define the global attributes
        dataFile.addGroupAttribute(rootGroup, new Attribute("startDate",
startDate.toString()));
        dataFile.addGroupAttribute(rootGroup, new Attribute("endDate",
endDate.toString()));

        // define dimensions, in this case only one: time
        Dimension timeDim = dataFile.addUnlimitedDimension("time");
        List<Dimension> dimList = new ArrayList<>();
        dimList.add(timeDim);

        // define variables
        Variable time = dataFile.addVariable(rootGroup, "time",
DataType.STRING, dimList);
        dataFile.addVariableAttribute(time, new Attribute("standard_name",
"time"));

        Variable bx = dataFile.addVariable(rootGroup, "bx", DataType.SHORT,
dimList);
        dataFile.addVariableAttribute(bx, new Attribute("long_name", "IMF
Bx"));
        dataFile.addVariableAttribute(bx, new Attribute("units", "raw
counts"));

        Variable by = dataFile.addVariable(rootGroup, "by", DataType.SHORT,
dimList);
        dataFile.addVariableAttribute(by, new Attribute("long_name", "IMF
By"));
        dataFile.addVariableAttribute(by, new Attribute("units", "raw
counts"));

        Variable bz = dataFile.addVariable(rootGroup, "bz", DataType.SHORT,
dimList);
        dataFile.addVariableAttribute(bz, new Attribute("long_name", "IMF
Bz"));
        dataFile.addVariableAttribute(bz, new Attribute("units", "raw
counts"));

        // create the file
        dataFile.create();

        // create 1-D arrays to hold data values (time is the dimension)
        ArrayString timeArray = new ArrayString.D1(1);
        ArrayShort.D1 bxArray = new ArrayShort.D1(1);
        ArrayShort.D1 byArray = new ArrayShort.D1(1);
        ArrayShort.D1 bzArray = new ArrayShort.D1(1);

        int[] origin = new int[]{0};

        // write the records to the file
        for (int i = 0; i < 10000; i++) {
          // load data into array variables
          timeArray.setObject(timeArray.getIndex(), new
DateTime().toString());
          bxArray.set(0, (short) i);
          byArray.set(0, (short) (i * 2));
          bzArray.set(0, (short) (i * 3));

          origin[0] = i;

          // write a record
          dataFile.write(time, origin, timeArray);
          dataFile.write(bx, origin, bxArray);
          dataFile.write(by, origin, byArray);
          dataFile.write(bz, origin, bzArray);
        }
      } finally {
        if (null != dataFile) {
          // close the file
          dataFile.close();
        }
      }
    } catch (IOException | InvalidRangeException e) {
      e.printStackTrace();
    }
  }
}

thanks,
jeff

-- 
Jeff Johnson
DSCOVR Ground System Development
Space Weather Prediction Center
jeff.m.johnson@xxxxxxxx
  • 2014 messages navigation, sorted by:
    1. Thread
    2. Subject
    3. Author
    4. Date
    5. ↑ Table Of Contents
  • Search the netcdf-java archives: