3.4. Testing: Synthetic Data

The synthetic module is a work in progress. The idea is to allow synthetic meteorological datasets to be readily created for testing purposes. While an entire synthetic dataset could be created first, then fed into THUNER in the usual way (see previous tutorials/demos) with this module we instead generate the synthetic data as we go. The approach avoids the need for storing large datasets.

 1"""Synthetic data demo/test."""
 2
 3%load_ext autoreload
 4%autoreload 2
 5from pathlib import Path
 6import shutil
 7import numpy as np
 8import thuner.data as data
 9import thuner.default as default
10import thuner.track.track as track
11import thuner.option as option
12import thuner.data.synthetic as synthetic
Welcome to the Thunderstorm Event Reconnaissance (THUNER) package
v0.0.16! This package is still in testing and development. Please visit
github.com/THUNER-project/THUNER for examples, and to report issues or contribute.

THUNER is a flexible toolkit for performing multi-feature detection,
tracking, tagging and analysis of events within meteorological datasets.
The intended application is to convective weather events. For examples
and instructions, see https://github.com/THUNER-project/THUNER and
https://thuner.readthedocs.io/en/latest/. If you use THUNER in your research, consider
citing the following papers;

Short et al. (2023), doi: 10.1175/MWR-D-22-0146.1
Raut et al. (2021), doi: 10.1175/JAMC-D-20-0119.1
Fridlind et al. (2019), doi: 10.5194/amt-12-2979-2019
...
 1# Set a flag for whether or not to remove existing output directories
 2remove_existing_outputs = False
 3
 4# Parent directory for saving outputs
 5base_local = Path.home() / "THUNER_output"
 6start = "2005-11-13T00:00:00"
 7end = "2005-11-13T02:00:00"
 8
 9output_parent = base_local / "runs/synthetic/geographic"
10if output_parent.exists() and remove_existing_outputs:
11    shutil.rmtree(output_parent)
12options_directory = output_parent / "options"
13options_directory.mkdir(parents=True, exist_ok=True)
14
15# Create a grid
16lat = np.arange(-14, -6 + 0.025, 0.025).tolist()
17lon = np.arange(128, 136 + 0.025, 0.025).tolist()
18grid_options = option.grid.GridOptions(name="geographic", latitude=lat, longitude=lon)
19grid_options.to_yaml(options_directory / "grid.yml")
20
21# Initialize synthetic objects
22starting_objects = []
23for i in range(5):
24    obj = synthetic.create_object(
25        time=start,
26        center_latitude=np.mean(lat),
27        center_longitude=lon[(i+1)*len(lon) // 6],
28        direction=-np.pi / 4 + i * np.pi / 6,
29        speed=30-4*i,
30        horizontal_radius=5+4*i,
31    )
32    starting_objects.append(obj)
33# Create data options dictionary
34synthetic_options = data.synthetic.SyntheticOptions(starting_objects=starting_objects)
35data_options = option.data.DataOptions(datasets=[synthetic_options])
36data_options.to_yaml(options_directory / "data.yml")
37
38track_options = default.synthetic_track()
39track_options.to_yaml(options_directory / "track.yml")
40
41# Create the display_options dictionary
42visualize_options = default.synthetic_runtime(options_directory / "visualize.yml")
43visualize_options.to_yaml(options_directory / "visualize.yml")
2025-06-21 15:45:06,890 - thuner.option.grid - WARNING - altitude not specified. Using default altitudes.
2025-06-21 15:45:06,893 - thuner.option.grid - WARNING - shape not specified. Will attempt to infer from input.
1times = np.arange(
2    np.datetime64(start),
3    np.datetime64(end) + np.timedelta64(10, "m"),
4    np.timedelta64(10, "m"),
5)
6args = [times, data_options, grid_options, track_options, visualize_options]
7track.track(*args, output_directory=output_parent)
2025-06-21 15:45:11,851 - thuner.track.track - INFO - Beginning thuner tracking. Saving output to /home/ewan/THUNER_output/runs/synthetic/geographic.
2025-06-21 15:45:11,867 - thuner.track.track - INFO - Processing 2005-11-13T00:00:00.
2025-06-21 15:45:11,869 - thuner.data.synthetic - INFO - Updating synthetic dataset for 2005-11-13T00:00:00.
2025-06-21 15:45:32,267 - thuner.track.track - INFO - Processing hierarchy level 0.
2025-06-21 15:45:32,268 - thuner.track.track - INFO - Tracking convective.
2025-06-21 15:45:32,290 - thuner.detect.steiner - INFO - Compiling thuner.detect.steiner.steiner_scheme with Numba. Please wait.
2025-06-21 15:45:59,187 - thuner.match.match - INFO - Matching convective objects.
2025-06-21 15:45:59,201 - thuner.match.match - INFO - No current mask, or no objects in current mask.
2025-06-21 15:45:59,216 - thuner.visualize.runtime - INFO - Creating runtime visualization figures.
2025-06-21 15:46:05,368 - thuner.track.track - INFO - Processing 2005-11-13T00:10:00.
2025-06-21 15:46:05,369 - thuner.data.synthetic - INFO - Updating synthetic dataset for 2005-11-13T00:10:00.
2025-06-21 15:46:22,396 - thuner.track.track - INFO - Processing hierarchy level 0.
2025-06-21 15:46:22,397 - thuner.track.track - INFO - Tracking convective.
2025-06-21 15:46:22,414 - thuner.write.mask - INFO - Writing convective masks to /home/ewan/THUNER_output/runs/synthetic/geographic/masks/convective.zarr.
2025-06-21 15:46:22,689 - thuner.match.match - INFO - Matching convective objects.
...
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

Cell In[3], line 7
      1 times = np.arange(
      2     np.datetime64(start),
      3     np.datetime64(end) + np.timedelta64(10, "m"),
      4     np.timedelta64(10, "m"),
      5 )
      6 args = [times, data_options, grid_options, track_options, visualize_options]
----> 7 track.track(*args, output_directory=output_parent)

File ~/Documents/THUNER/thuner/track/track.py:110, in track(times, data_options, grid_options, track_options, visualize_options, output_directory)
    108         track_level_args += [data_options, grid_options, track_options]
    109         track_level_args += [visualize_options, output_directory]
--> 110         track_level(*track_level_args)
    112     current_time = next_time
    114 # Write final data to file
    115 # write.mask.write_final(tracks, track_options, output_directory)

File ~/Documents/THUNER/thuner/track/track.py:155, in track_level(next_time, level_index, tracks, input_records, data_options, grid_options, track_options, visualize_options, output_directory)
    153 for obj in level_tracks.objects.keys():
    154     track_object_args = get_track_object_args(obj, level_options)
--> 155     track_object(*track_object_args)
    157 return level_tracks

File ~/Documents/THUNER/thuner/track/track.py:212, in track_object(next_time, level_index, obj, tracks, input_records, dataset_options, grid_options, track_options, visualize_options, output_directory)
    210 if object_tracks.times[-1] is not None:
    211     args = [input_records, tracks, object_options, grid_options]
--> 212     attribute.record(*args)

File ~/Documents/THUNER/thuner/attribute/attribute.py:77, in record(input_records, tracks, object_options, grid_options)
     75 for attribute_type in object_options.attributes.attribute_types:
     76     for attribute in attribute_type.attributes:
---> 77         attr = retrieve_attribute(kwargs, attribute)
     78         obj_attributes.attribute_types[attribute_type.name].update(attr)
     80 # Append the current attributes to the attributes dictionary

File ~/Documents/THUNER/thuner/attribute/attribute.py:16, in retrieve_attribute(general_kwargs, attribute, member_object)
     13 def retrieve_attribute(general_kwargs, attribute, member_object=None):
     14     # Get the retrieval function and arguments for the attribute
     15     func_kwargs = general_kwargs.copy()
---> 16     keyword_arguments = attribute.retrieval.keyword_arguments
     17     func_kwargs.update(keyword_arguments)
     18     # Retrieval functions expect either "attribute" or "attribute_group"
     19     # keyword arguments. Infer correct argument name from attribute type.

AttributeError: 'NoneType' object has no attribute 'keyword_arguments'
THUNER applied to synthetic data.

Fig. 3.5 THUNER applied to synthetic data.

 1central_latitude = -10
 2central_longitude = 132
 3
 4y = np.arange(-400e3, 400e3 + 2.5e3, 2.5e3).tolist()
 5x = np.arange(-400e3, 400e3 + 2.5e3, 2.5e3).tolist()
 6
 7grid_options = option.grid.GridOptions(
 8    name="cartesian",
 9    x=x,
10    y=y,
11    central_latitude=central_latitude,
12    central_longitude=central_longitude,
13)
14grid_options.to_yaml(options_directory / "grid.yml")
 1output_parent = base_local / "runs/synthetic/cartesian"
 2if output_parent.exists() & remove_existing_outputs:
 3    shutil.rmtree(output_parent)
 4
 5times = np.arange(
 6    np.datetime64(start),
 7    np.datetime64(end) + np.timedelta64(10, "m"),
 8    +np.timedelta64(10, "m"),
 9)
10
11args = [times, data_options, grid_options, track_options, visualize_options]
12track.track(*args, output_directory=output_parent)