88from pathlib import Path
99from typing import List , Optional , Union
1010
11+ import numpy as np
1112import xarray
1213import xarray .testing
14+ from xarray import DataArray
1315
1416from openeo .rest .job import DEFAULT_JOB_RESULTS_FILENAME , BatchJob , JobResults
1517from openeo .util import repr_truncate
1618
1719_log = logging .getLogger (__name__ )
1820
19-
2021_DEFAULT_RTOL = 1e-6
2122_DEFAULT_ATOL = 1e-6
2223
24+ # https://paulbourke.net/dataformats/asciiart
25+ DEFAULT_GRAYSCALE_70_CHARACTERS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\" ^`'. " [::- 1 ]
26+ DEFAULT_GRAYSCALE_10_CHARACTERS = " .:-=+*#%@"
2327
2428def _load_xarray_netcdf (path : Union [str , Path ], ** kwargs ) -> xarray .Dataset :
2529 """
@@ -88,12 +92,105 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
8892 return data
8993
9094
95+ def _ascii_art (
96+ diff_data : DataArray ,
97+ * ,
98+ max_width : int = 60 ,
99+ y_vs_x_aspect_ratio = 2.5 ,
100+ grayscale_characters : str = DEFAULT_GRAYSCALE_70_CHARACTERS ,
101+ ) -> str :
102+ max_grayscale_idx = len (grayscale_characters ) - 1
103+ x_scale : int = max (1 , int (diff_data .sizes ["x" ] / max_width ))
104+ y_scale : int = max (1 , int (diff_data .sizes ["y" ] / (max_width / y_vs_x_aspect_ratio )))
105+ data_max = diff_data .max ().item ()
106+ if data_max == 0 :
107+ data_max = 1
108+ coarsened = diff_data .coarsen (dim = {"x" : x_scale , "y" : y_scale }, boundary = "pad" ).mean ()
109+ coarsened = coarsened .transpose ("y" , "x" , ...)
110+ top = "┌" + "─" * coarsened .sizes ["x" ] + "┐\n "
111+ bottom = "\n └" + "─" * coarsened .sizes ["x" ] + "┘"
112+
113+ def _pixel_char (v ) -> str :
114+ i = 0 if np .isnan (v ) else int (v * max_grayscale_idx / data_max )
115+ if v > 0 and i == 0 :
116+ i = 1 # don't show a blank for a difference above the threshold
117+ else :
118+ i = min (max_grayscale_idx , i )
119+ return grayscale_characters [i ]
120+
121+ return top + "\n " .join (["│" + "" .join ([_pixel_char (v ) for v in row ]) + "│" for row in coarsened ]) + bottom
122+
123+
124+ def _compare_xarray_dataarray_xy (
125+ actual : Union [xarray .DataArray , str , Path ],
126+ expected : Union [xarray .DataArray , str , Path ],
127+ * ,
128+ rtol : float = _DEFAULT_RTOL ,
129+ atol : float = _DEFAULT_ATOL ,
130+ name : str = None ,
131+ ) -> List [str ]:
132+ """
133+ Additional compare for two compatible spatial xarray DataArrays with tolerance (rtol, atol)
134+ :return: list of issues (empty if no issues)
135+ """
136+ issues = []
137+ threshold = abs (expected * rtol ) + atol
138+ diff_exact = abs (expected - actual )
139+ diff_mask = diff_exact > threshold
140+ diff_lenient = diff_exact .where (diff_mask )
141+
142+ non_x_y_dims = list (set (expected .dims ) - {"x" , "y" })
143+ value_mapping = dict (map (lambda d : (d , expected [d ].data ), non_x_y_dims ))
144+ shape = tuple ([len (value_mapping [x ]) for x in non_x_y_dims ])
145+
146+ for shape_index , v in np .ndenumerate (np .ndarray (shape )):
147+ indexers = {}
148+ for index , value_index in enumerate (shape_index ):
149+ indexers [non_x_y_dims [index ]] = value_mapping [non_x_y_dims [index ]][value_index ]
150+ diff_data = diff_lenient .sel (indexers = indexers )
151+ total_pixel_count = expected .sel (indexers ).count ().item ()
152+ diff_pixel_count = diff_data .count ().item ()
153+
154+ if diff_pixel_count > 0 :
155+ diff_pixel_percentage = round (diff_pixel_count * 100 / total_pixel_count , 1 )
156+ diff_mean = round (diff_data .mean ().item (), 2 )
157+ diff_var = round (diff_data .var ().item (), 2 )
158+
159+ key = name + ": " if name else ""
160+ key += "," .join ([f"{ k } { str (v1 )} " for k , v1 in indexers .items ()])
161+ issues .append (
162+ f"{ key } : value difference exceeds tolerance (rtol { rtol } , atol { atol } ), min:{ diff_data .min ().data } , max: { diff_data .max ().data } , mean: { diff_mean } , var: { diff_var } "
163+ )
164+
165+ _log .warning (f"Difference (ascii art) for { key } :\n { _ascii_art (diff_data )} " )
166+
167+ coord_grid = np .meshgrid (diff_data .coords ["x" ], diff_data .coords ["y" ])
168+ mask = diff_data .notnull ()
169+ if mask .dims [0 ] != "y" :
170+ mask = mask .transpose ()
171+ x_coords = coord_grid [0 ][mask ]
172+ y_coords = coord_grid [1 ][mask ]
173+
174+ diff_bbox = ((x_coords .min ().item (), y_coords .min ().item ()), (x_coords .max ().item (), y_coords .max ().item ()))
175+ diff_area = (x_coords .max () - x_coords .min ()) * (y_coords .max () - y_coords .min ())
176+ total_area = abs (
177+ (diff_data .coords ["y" ][- 1 ].data - diff_data .coords ["y" ][0 ].data )
178+ * (diff_data .coords ["x" ][- 1 ].data - diff_data .coords ["x" ][0 ].data )
179+ )
180+ area_percentage = round (diff_area * 100 / total_area , 1 )
181+ issues .append (
182+ f"{ key } : differing pixels: { diff_pixel_count } /{ total_pixel_count } ({ diff_pixel_percentage } %), bbox { diff_bbox } - { area_percentage } % of the area"
183+ )
184+ return issues
185+
186+
91187def _compare_xarray_dataarray (
92188 actual : Union [xarray .DataArray , str , Path ],
93189 expected : Union [xarray .DataArray , str , Path ],
94190 * ,
95191 rtol : float = _DEFAULT_RTOL ,
96192 atol : float = _DEFAULT_ATOL ,
193+ name : str = None ,
97194) -> List [str ]:
98195 """
99196 Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
@@ -116,7 +213,7 @@ def _compare_xarray_dataarray(
116213 issues = []
117214
118215 # `xarray.testing.assert_allclose` currently does not always
119- # provides detailed information about shape/dimension mismatches
216+ # provide detailed information about shape/dimension mismatches
120217 # so we enrich the issue listing with some more details
121218 if actual .dims != expected .dims :
122219 issues .append (f"Dimension mismatch: { actual .dims } != { expected .dims } " )
@@ -127,13 +224,14 @@ def _compare_xarray_dataarray(
127224 issues .append (f"Coordinates mismatch for dimension { dim !r} : { acs } != { ecs } " )
128225 if actual .shape != expected .shape :
129226 issues .append (f"Shape mismatch: { actual .shape } != { expected .shape } " )
130-
227+ compatible = len ( issues ) == 0
131228 try :
132229 xarray .testing .assert_allclose (a = actual , b = expected , rtol = rtol , atol = atol )
133230 except AssertionError as e :
134231 # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
135232 issues .append (str (e ).strip ())
136-
233+ if compatible and {"x" , "y" } <= set (expected .dims ):
234+ issues .extend (_compare_xarray_dataarray_xy (actual = actual , expected = expected , rtol = rtol , atol = atol , name = name ))
137235 return issues
138236
139237
@@ -162,7 +260,6 @@ def assert_xarray_dataarray_allclose(
162260 if issues :
163261 raise AssertionError ("\n " .join (issues ))
164262
165-
166263def _compare_xarray_datasets (
167264 actual : Union [xarray .Dataset , str , Path ],
168265 expected : Union [xarray .Dataset , str , Path ],
@@ -180,15 +277,14 @@ def _compare_xarray_datasets(
180277 expected = _as_xarray_dataset (expected )
181278
182279 all_issues = []
183- # TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this?
184280 actual_vars = set (actual .data_vars )
185281 expected_vars = set (expected .data_vars )
186282 _log .debug (f"_compare_xarray_datasets: actual_vars={ actual_vars !r} expected_vars={ expected_vars !r} " )
187283 if actual_vars != expected_vars :
188284 all_issues .append (f"Xarray DataSet variables mismatch: { actual_vars } != { expected_vars } " )
189285 for var in expected_vars .intersection (actual_vars ):
190286 _log .debug (f"_compare_xarray_datasets: comparing variable { var !r} " )
191- issues = _compare_xarray_dataarray (actual [var ], expected [var ], rtol = rtol , atol = atol )
287+ issues = _compare_xarray_dataarray (actual [var ], expected [var ], rtol = rtol , atol = atol , name = var )
192288 if issues :
193289 all_issues .append (f"Issues for variable { var !r} :" )
194290 all_issues .extend (issues )
0 commit comments