1616import pandas as pd
1717import requests
1818import shapely .errors
19+ import shapely .geometry .base
1920import shapely .wkt
2021from requests .adapters import HTTPAdapter , Retry
2122
2223from openeo import BatchJob , Connection
23- from openeo .internal .processes .parse import Process , parse_remote_process_definition
24+ from openeo .internal .processes .parse import (
25+ Parameter ,
26+ Process ,
27+ parse_remote_process_definition ,
28+ )
2429from openeo .rest import OpenEoApiError
2530from openeo .util import deep_get , repr_truncate , rfc3339
2631
@@ -943,11 +948,17 @@ class UDPJobFactory:
943948 """
944949
945950 def __init__ (
946- self , process_id : str , * , namespace : Union [str , None ] = None , parameter_defaults : Optional [dict ] = None
951+ self ,
952+ process_id : str ,
953+ * ,
954+ namespace : Union [str , None ] = None ,
955+ parameter_defaults : Optional [dict ] = None ,
956+ parameter_column_map : Optional [dict ] = None ,
947957 ):
948958 self ._process_id = process_id
949959 self ._namespace = namespace
950960 self ._parameter_defaults = parameter_defaults or {}
961+ self ._parameter_column_map = parameter_column_map
951962
952963 def _get_process_definition (self , connection : Connection ) -> Process :
953964 if isinstance (self ._namespace , str ) and re .match ("https?://" , self ._namespace ):
@@ -979,33 +990,38 @@ def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob:
979990
980991 process_definition = self ._get_process_definition (connection = connection )
981992 parameters = process_definition .parameters or []
993+
994+ if self ._parameter_column_map is None :
995+ self ._parameter_column_map = self ._guess_parameter_column_map (parameters = parameters , row = row )
996+
982997 arguments = {}
983998 for parameter in parameters :
984- name = parameter .name
985- schema = parameter . schema
986- if name in row .index :
987- # Higherst priority: value from dataframe row
988- value = row [ name ]
989- elif name in self ._parameter_defaults :
999+ param_name = parameter .name
1000+ column_name = self . _parameter_column_map . get ( param_name , param_name )
1001+ if column_name in row .index :
1002+ # Get value from dataframe row
1003+ value = row . loc [ column_name ]
1004+ elif param_name in self ._parameter_defaults :
9901005 # Fallback on default values from constructor
991- value = self ._parameter_defaults [name ]
1006+ value = self ._parameter_defaults [param_name ]
9921007 elif parameter .has_default ():
9931008 # Explicitly use default value from parameter schema
9941009 value = parameter .default
9951010 elif parameter .optional :
9961011 # Skip optional parameters without any fallback default value
9971012 continue
9981013 else :
999- raise ValueError (f"Missing required parameter { name !r} for process { self ._process_id !r} " )
1014+ raise ValueError (f"Missing required parameter { param_name !r} for process { self ._process_id !r} " )
10001015
1001- # TODO: validation or normalization based on schema?
1002- # Some pandas/numpy data types need a bit of conversion for JSON encoding
1016+ # Prepare some values/dtypes for JSON encoding
10031017 if isinstance (value , numpy .integer ):
10041018 value = int (value )
10051019 elif isinstance (value , numpy .number ):
10061020 value = float (value )
1021+ elif isinstance (value , shapely .geometry .base .BaseGeometry ):
1022+ value = shapely .geometry .mapping (value )
10071023
1008- arguments [name ] = value
1024+ arguments [param_name ] = value
10091025
10101026 cube = connection .datacube_from_process (process_id = self ._process_id , namespace = self ._namespace , ** arguments )
10111027
@@ -1020,3 +1036,26 @@ def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob:
10201036 def __call__ (self , * arg , ** kwargs ) -> BatchJob :
10211037 """Syntactic sugar for calling `start_job` directly."""
10221038 return self .start_job (* arg , ** kwargs )
1039+
1040+ @staticmethod
1041+ def _guess_parameter_column_map (parameters : List [Parameter ], row : pd .Series ) -> dict :
1042+ """
1043+ Guess parameter-column mapping from given parameter list and dataframe row
1044+ """
1045+ parameter_column_map = {}
1046+ # Geometry based mapping: try to automatically map geometry columns to geojson parameters
1047+ geojson_parameters = [p .name for p in parameters if p .schema .accepts_geojson ()]
1048+ geometry_columns = [i for (i , v ) in row .items () if isinstance (v , shapely .geometry .base .BaseGeometry )]
1049+ if geojson_parameters and geometry_columns :
1050+ if len (geojson_parameters ) == 1 and len (geometry_columns ) == 1 :
1051+ # Most common case: one geometry parameter and one geometry column: can be mapped naively
1052+ parameter_column_map [geojson_parameters [0 ]] = geometry_columns [0 ]
1053+ elif all (p in geometry_columns for p in geojson_parameters ):
1054+ # Each geometry param has geometry column with same name: easy to map
1055+ parameter_column_map .update ((p , p ) for p in geojson_parameters )
1056+ else :
1057+ raise RuntimeError (
1058+ f"Problem with mapping geometry columns ({ geometry_columns } ) to process parameters ({ geojson_parameters } )"
1059+ )
1060+ _log .debug (f"Guessed parameter-column map: { parameter_column_map } " )
1061+ return parameter_column_map
0 commit comments