Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Version NEXTVERSION

**2026-??-??**

* New keyword parameter to `cf.Data.compute`: ``persist``
(https://github.com/NCAS-CMS/cf-python/issues/929)
* New function to control the persistence of computed data:
`cf.persist_data` (https://github.com/NCAS-CMS/cf-python/issues/929)
* New default backend for netCDF-4 in `cf.write`: ``h5netcdf-h5py``,
that allows control of the internal file metadata via the new
``h5py_options`` parameter
Expand Down
28 changes: 24 additions & 4 deletions cf/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def configuration(
chunksize=None,
log_level=None,
display_data=None,
persist_data=None,
regrid_logging=None,
relaxed_identities=None,
bounds_combination_mode=None,
Expand All @@ -179,6 +180,7 @@ def configuration(
* `chunksize`
* `log_level`
* `display_data`
* `persist_data`
* `regrid_logging`
* `relaxed_identities`
* `bounds_combination_mode`
Expand All @@ -203,9 +205,10 @@ def configuration(

.. seealso:: `atol`, `rtol`, `tempdir`, `chunksize`,
`total_memory`, `log_level`, `display_data`,
`regrid_logging`, `relaxed_identities`,
`bounds_combination_mode`, `active_storage`,
`active_storage_url`, `active_storage_max_requests`
`persist_data`, `regrid_logging`,
`relaxed_identities`, `bounds_combination_mode`,
`active_storage`, `active_storage_url`,
`active_storage_max_requests`

:Parameters:

Expand Down Expand Up @@ -247,12 +250,18 @@ def configuration(
* ``'DETAIL'`` (``3``);
* ``'DEBUG'`` (``-1``).

display_data `bool` or `Constant`, optional
display_data: `bool` or `Constant`, optional
The new display data option. The default is to not change
the current behaviour.

.. versionadded:: 3.19.0

persist_data: `bool` or `Constant`, optional
The new persist data option. The default is to not change
the current behaviour.

.. versionadded:: NEXTVERSION

regrid_logging: `bool` or `Constant`, optional
The new value (either True to enable logging or False to
disable it). The default is to not change the current
Expand Down Expand Up @@ -312,6 +321,7 @@ def configuration(
'bounds_combination_mode': 'AND',
'chunksize': 82873466.88000001,
'display_data': True,
'persist_data': False,
'active_storage': False,
'active_storage_url': None,
'active_storage_max_requests': 100}
Expand All @@ -330,6 +340,7 @@ def configuration(
'bounds_combination_mode': 'AND',
'chunksize': 75000000.0,
'display_data': True,
'persist_data': False,
'active_storage': False,
'active_storage_url': None,
'active_storage_max_requests': 100}
Expand Down Expand Up @@ -358,6 +369,7 @@ def configuration(
'bounds_combination_mode': 'AND',
'chunksize': 75000000.0,
'display_data': True,
'persist_data': False,
'active_storage': False,
'active_storage_url': None}
>>> with cf.configuration(atol=9, rtol=10):
Expand All @@ -372,6 +384,7 @@ def configuration(
'bounds_combination_mode': 'AND',
'chunksize': 75000000.0,
'display_data': True,
'persist_data': False,
'active_storage': False,
'active_storage_url': None,
'active_storage_max_requests': 100}
Expand All @@ -385,6 +398,7 @@ def configuration(
'bounds_combination_mode': 'AND',
'chunksize': 75000000.0,
'display_data': True,
'persist_data': False,
'active_storage': False,
'active_storage_url': None,
'active_storage_max_requests': 100}
Expand Down Expand Up @@ -416,6 +430,7 @@ def configuration(
new_chunksize=chunksize,
new_log_level=log_level,
new_display_data=display_data,
new_persist_data=persist_data,
new_regrid_logging=regrid_logging,
new_relaxed_identities=relaxed_identities,
bounds_combination_mode=bounds_combination_mode,
Expand Down Expand Up @@ -460,6 +475,7 @@ def _configuration(_Configuration, **kwargs):
"new_chunksize": chunksize,
"new_log_level": log_level,
"new_display_data": display_data,
"new_persist_data": persist_data,
"new_regrid_logging": regrid_logging,
"new_relaxed_identities": relaxed_identities,
"bounds_combination_mode": bounds_combination_mode,
Expand Down Expand Up @@ -590,6 +606,10 @@ class display_data(ConstantAccess, cfdm.display_data):
pass


class persist_data(ConstantAccess, cfdm.persist_data):
pass


class regrid_logging(ConstantAccess):
"""Whether or not to enable `esmpy` regridding logging.

Expand Down
69 changes: 55 additions & 14 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

faulthandler.enable() # to debug seg faults and timeouts

import cfdm

import cf

n_tmpfiles = 2
Expand Down Expand Up @@ -3050,8 +3052,6 @@ def test_Data_where(self):

def test_Data__init__compression(self):
"""Test Data initialised from compressed data sources."""
import cfdm

# Ragged
for f in cfdm.read("DSG_timeSeries_contiguous.nc"):
f = f.data
Expand Down Expand Up @@ -3200,6 +3200,59 @@ def test_Data_compute(self):
d.compute()
self.assertEqual(d.get_cached_elements(), {0: 1, 1: 2, -1: 2})

# Persist
f = cf.read(self.filename, dask_chunks=3)[0]
d0 = f.data
npartitions = d0.npartitions
self.assertGreater(npartitions, 1)

with cf.persist_data(False):
d = d0.copy()
a = d.compute()
self.assertEqual(len(d.get_filenames()), 1)
b = d.compute()
self.assertEqual(len(d.get_filenames()), 1)
self.assertTrue(np.allclose(a, b))

d = d0.copy()
a = d.compute()
self.assertEqual(len(d.get_filenames()), 1)
b = d.compute(persist=None)
self.assertEqual(len(d.get_filenames()), 1)
self.assertTrue(np.allclose(a, b))

d = d0.copy()
a = d.compute()
self.assertEqual(len(d.get_filenames()), 1)
b = d.compute(persist=False)
self.assertEqual(len(d.get_filenames()), 1)
self.assertTrue(np.allclose(a, b))

d = d0.copy()
a = d.compute()
self.assertEqual(len(d.get_filenames()), 1)
b = d.compute(persist=True)
self.assertEqual(len(d.get_filenames()), 0)
self.assertEqual(d.npartitions, npartitions)
self.assertTrue(np.allclose(a, b))

with cf.persist_data(True):
d = d0.copy()
d.compute()
self.assertEqual(len(d.get_filenames()), 0)

d = d0.copy()
d.compute(persist=None)
self.assertEqual(len(d.get_filenames()), 0)

d = d0.copy()
d.compute(persist=False)
self.assertEqual(len(d.get_filenames()), 1)

d = d0.copy()
d.compute(persist=True)
self.assertEqual(len(d.get_filenames()), 0)

def test_Data_persist(self):
"""Test Data.persist."""
d = cf.Data(9, "km")
Expand Down Expand Up @@ -4149,8 +4202,6 @@ def test_Data_masked_invalid(self):

def test_Data_uncompress(self):
"""Test the `uncompress` Data method."""
import cfdm

f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
a = f.data.array
d = cf.Data(cf.RaggedContiguousArray(source=f.data.source()))
Expand Down Expand Up @@ -4274,8 +4325,6 @@ def test_Data_soften_mask(self):

def test_Data_compressed_array(self):
"""Test the `compressed_array` Data property."""
import cfdm

f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
f = f.data
d = cf.Data(cf.RaggedContiguousArray(source=f.source()))
Expand Down Expand Up @@ -4305,8 +4354,6 @@ def test_Data_fits_in_memory(self):

def test_Data_get_compressed(self):
"""Test the Data methods which get compression properties."""
import cfdm

# Compressed
f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
f = f.data
Expand Down Expand Up @@ -4365,8 +4412,6 @@ def test_Data_get_data(self):

def test_Data_get_count(self):
"""Test the `get_count` Data method."""
import cfdm

f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
f = f.data
d = cf.Data(cf.RaggedContiguousArray(source=f.source()))
Expand All @@ -4378,8 +4423,6 @@ def test_Data_get_count(self):

def test_Data_get_index(self):
"""Test the `get_index` Data method."""
import cfdm

f = cfdm.read("DSG_timeSeries_indexed.nc")[0]
f = f.data
d = cf.Data(cf.RaggedIndexedArray(source=f.source()))
Expand All @@ -4391,8 +4434,6 @@ def test_Data_get_index(self):

def test_Data_get_list(self):
"""Test the `get_list` Data method."""
import cfdm

f = cfdm.read("gathered.nc")[0]
f = f.data
d = cf.Data(cf.GatheredArray(source=f.source()))
Expand Down
3 changes: 2 additions & 1 deletion cf/test/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_configuration(self):
self.assertIsInstance(org, dict)

# Check all keys that should be there are, with correct value type:
self.assertEqual(len(org), 12) # update expected len if add new key(s)
self.assertEqual(len(org), 13) # update expected len if add new key(s)

# Types expected:
self.assertIsInstance(org["atol"], float)
Expand All @@ -71,6 +71,7 @@ def test_configuration(self):
# equiv. string
self.assertIsInstance(org["log_level"], str)
self.assertIsInstance(org["display_data"], bool)
self.assertIsInstance(org["persist_data"], bool)

# Store some sensible values to reset items to for testing, ensuring:
# 1) they are kept different to the defaults (i.e. org values); and
Expand Down
2 changes: 2 additions & 0 deletions docs/source/function.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ Resource management

cf.configuration
cf.chunksize
cf.display_data
cf.persist_data
cf.free_memory
cf.regrid_logging
cf.tempdir
Expand Down
Loading