3.4. Testing: Synthetic Data
The synthetic module is a work in progress. The idea is to allow synthetic meteorological datasets to be readily created for testing purposes. While an entire synthetic dataset could be created first, then fed into THUNER in the usual way (see previous tutorials/demos) with this module we instead generate the synthetic data as we go. The approach avoids the need for storing large datasets.
1"""Synthetic data demo/test."""
2
3%load_ext autoreload
4%autoreload 2
5from pathlib import Path
6import shutil
7import numpy as np
8import thuner.data as data
9import thuner.default as default
10import thuner.track.track as track
11import thuner.option as option
12import thuner.data.synthetic as synthetic
Welcome to the Thunderstorm Event Reconnaissance (THUNER) package
v0.0.16! This package is still in testing and development. Please visit
github.com/THUNER-project/THUNER for examples, and to report issues or contribute.
THUNER is a flexible toolkit for performing multi-feature detection,
tracking, tagging and analysis of events within meteorological datasets.
The intended application is to convective weather events. For examples
and instructions, see https://github.com/THUNER-project/THUNER and
https://thuner.readthedocs.io/en/latest/. If you use THUNER in your research, consider
citing the following papers;
Short et al. (2023), doi: 10.1175/MWR-D-22-0146.1
Raut et al. (2021), doi: 10.1175/JAMC-D-20-0119.1
Fridlind et al. (2019), doi: 10.5194/amt-12-2979-2019
...
1# Set a flag for whether or not to remove existing output directories
2remove_existing_outputs = False
3
4# Parent directory for saving outputs
5base_local = Path.home() / "THUNER_output"
6start = "2005-11-13T00:00:00"
7end = "2005-11-13T02:00:00"
8
9output_parent = base_local / "runs/synthetic/geographic"
10if output_parent.exists() and remove_existing_outputs:
11 shutil.rmtree(output_parent)
12options_directory = output_parent / "options"
13options_directory.mkdir(parents=True, exist_ok=True)
14
15# Create a grid
16lat = np.arange(-14, -6 + 0.025, 0.025).tolist()
17lon = np.arange(128, 136 + 0.025, 0.025).tolist()
18grid_options = option.grid.GridOptions(name="geographic", latitude=lat, longitude=lon)
19grid_options.to_yaml(options_directory / "grid.yml")
20
21# Initialize synthetic objects
22starting_objects = []
23for i in range(5):
24 obj = synthetic.create_object(
25 time=start,
26 center_latitude=np.mean(lat),
27 center_longitude=lon[(i+1)*len(lon) // 6],
28 direction=-np.pi / 4 + i * np.pi / 6,
29 speed=30-4*i,
30 horizontal_radius=5+4*i,
31 )
32 starting_objects.append(obj)
33# Create data options dictionary
34synthetic_options = data.synthetic.SyntheticOptions(starting_objects=starting_objects)
35data_options = option.data.DataOptions(datasets=[synthetic_options])
36data_options.to_yaml(options_directory / "data.yml")
37
38track_options = default.synthetic_track()
39track_options.to_yaml(options_directory / "track.yml")
40
41# Create the display_options dictionary
42visualize_options = default.synthetic_runtime(options_directory / "visualize.yml")
43visualize_options.to_yaml(options_directory / "visualize.yml")
2025-06-21 15:45:06,890 - thuner.option.grid - WARNING - altitude not specified. Using default altitudes.
2025-06-21 15:45:06,893 - thuner.option.grid - WARNING - shape not specified. Will attempt to infer from input.
1times = np.arange(
2 np.datetime64(start),
3 np.datetime64(end) + np.timedelta64(10, "m"),
4 np.timedelta64(10, "m"),
5)
6args = [times, data_options, grid_options, track_options, visualize_options]
7track.track(*args, output_directory=output_parent)
2025-06-21 15:45:11,851 - thuner.track.track - INFO - Beginning thuner tracking. Saving output to /home/ewan/THUNER_output/runs/synthetic/geographic.
2025-06-21 15:45:11,867 - thuner.track.track - INFO - Processing 2005-11-13T00:00:00.
2025-06-21 15:45:11,869 - thuner.data.synthetic - INFO - Updating synthetic dataset for 2005-11-13T00:00:00.
2025-06-21 15:45:32,267 - thuner.track.track - INFO - Processing hierarchy level 0.
2025-06-21 15:45:32,268 - thuner.track.track - INFO - Tracking convective.
2025-06-21 15:45:32,290 - thuner.detect.steiner - INFO - Compiling thuner.detect.steiner.steiner_scheme with Numba. Please wait.
2025-06-21 15:45:59,187 - thuner.match.match - INFO - Matching convective objects.
2025-06-21 15:45:59,201 - thuner.match.match - INFO - No current mask, or no objects in current mask.
2025-06-21 15:45:59,216 - thuner.visualize.runtime - INFO - Creating runtime visualization figures.
2025-06-21 15:46:05,368 - thuner.track.track - INFO - Processing 2005-11-13T00:10:00.
2025-06-21 15:46:05,369 - thuner.data.synthetic - INFO - Updating synthetic dataset for 2005-11-13T00:10:00.
2025-06-21 15:46:22,396 - thuner.track.track - INFO - Processing hierarchy level 0.
2025-06-21 15:46:22,397 - thuner.track.track - INFO - Tracking convective.
2025-06-21 15:46:22,414 - thuner.write.mask - INFO - Writing convective masks to /home/ewan/THUNER_output/runs/synthetic/geographic/masks/convective.zarr.
2025-06-21 15:46:22,689 - thuner.match.match - INFO - Matching convective objects.
...
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[3], line 7
1 times = np.arange(
2 np.datetime64(start),
3 np.datetime64(end) + np.timedelta64(10, "m"),
4 np.timedelta64(10, "m"),
5 )
6 args = [times, data_options, grid_options, track_options, visualize_options]
----> 7 track.track(*args, output_directory=output_parent)
File ~/Documents/THUNER/thuner/track/track.py:110, in track(times, data_options, grid_options, track_options, visualize_options, output_directory)
108 track_level_args += [data_options, grid_options, track_options]
109 track_level_args += [visualize_options, output_directory]
--> 110 track_level(*track_level_args)
112 current_time = next_time
114 # Write final data to file
115 # write.mask.write_final(tracks, track_options, output_directory)
File ~/Documents/THUNER/thuner/track/track.py:155, in track_level(next_time, level_index, tracks, input_records, data_options, grid_options, track_options, visualize_options, output_directory)
153 for obj in level_tracks.objects.keys():
154 track_object_args = get_track_object_args(obj, level_options)
--> 155 track_object(*track_object_args)
157 return level_tracks
File ~/Documents/THUNER/thuner/track/track.py:212, in track_object(next_time, level_index, obj, tracks, input_records, dataset_options, grid_options, track_options, visualize_options, output_directory)
210 if object_tracks.times[-1] is not None:
211 args = [input_records, tracks, object_options, grid_options]
--> 212 attribute.record(*args)
File ~/Documents/THUNER/thuner/attribute/attribute.py:77, in record(input_records, tracks, object_options, grid_options)
75 for attribute_type in object_options.attributes.attribute_types:
76 for attribute in attribute_type.attributes:
---> 77 attr = retrieve_attribute(kwargs, attribute)
78 obj_attributes.attribute_types[attribute_type.name].update(attr)
80 # Append the current attributes to the attributes dictionary
File ~/Documents/THUNER/thuner/attribute/attribute.py:16, in retrieve_attribute(general_kwargs, attribute, member_object)
13 def retrieve_attribute(general_kwargs, attribute, member_object=None):
14 # Get the retrieval function and arguments for the attribute
15 func_kwargs = general_kwargs.copy()
---> 16 keyword_arguments = attribute.retrieval.keyword_arguments
17 func_kwargs.update(keyword_arguments)
18 # Retrieval functions expect either "attribute" or "attribute_group"
19 # keyword arguments. Infer correct argument name from attribute type.
AttributeError: 'NoneType' object has no attribute 'keyword_arguments'
Fig. 3.5 THUNER applied to synthetic data.
1central_latitude = -10
2central_longitude = 132
3
4y = np.arange(-400e3, 400e3 + 2.5e3, 2.5e3).tolist()
5x = np.arange(-400e3, 400e3 + 2.5e3, 2.5e3).tolist()
6
7grid_options = option.grid.GridOptions(
8 name="cartesian",
9 x=x,
10 y=y,
11 central_latitude=central_latitude,
12 central_longitude=central_longitude,
13)
14grid_options.to_yaml(options_directory / "grid.yml")
1output_parent = base_local / "runs/synthetic/cartesian"
2if output_parent.exists() & remove_existing_outputs:
3 shutil.rmtree(output_parent)
4
5times = np.arange(
6 np.datetime64(start),
7 np.datetime64(end) + np.timedelta64(10, "m"),
8 +np.timedelta64(10, "m"),
9)
10
11args = [times, data_options, grid_options, track_options, visualize_options]
12track.track(*args, output_directory=output_parent)