Skip to content

Commit 941c24b

Browse files
uros-dbdongjoon-hyun
authored andcommitted
[SPARK-54201][GEO][SQL] Allow casting from GeographyType(srid) to GeographyType(ANY)
### What changes were proposed in this pull request? This PR allows casting fixed SRID type `GEOGRAPHY(<srid>)` to mixed SRID type `GEOGRAPHY(ANY)`. ### Why are the changes needed? Enable explicit casting between geography types. ### Does this PR introduce _any_ user-facing change? Yes, casting `GEOGRAPHY(<srid>)` to `GEOGRAPHY(ANY)` is now allowed. ### How was this patch tested? Added new unit tests: - `StUtilsSuite` - `CastSuiteBase` Added new e2e SQL tests: - `st-functions` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #52906 from uros-db/geo-cast-geography_any. Authored-by: Uros Bojanic <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 7a1b481 commit 941c24b

File tree

8 files changed

+199
-0
lines changed

8 files changed

+199
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ object Cast extends QueryErrorsBase {
164164

165165
case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
166166

167+
// Casts from concrete GEOGRAPHY(srid) to mixed GEOGRAPHY(ANY) is allowed.
168+
case (gt1: GeographyType, gt2: GeographyType) if !gt1.isMixedSrid && gt2.isMixedSrid =>
169+
true
167170
// Casting from GEOGRAPHY to GEOMETRY with the same SRID is allowed.
168171
case (geog: GeographyType, geom: GeometryType) if geog.srid == geom.srid =>
169172
true
@@ -294,6 +297,9 @@ object Cast extends QueryErrorsBase {
294297

295298
case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
296299

300+
// Casts from concrete GEOGRAPHY(srid) to mixed GEOGRAPHY(ANY) is allowed.
301+
case (gt1: GeographyType, gt2: GeographyType) if !gt1.isMixedSrid && gt2.isMixedSrid =>
302+
true
297303
// Casting from GEOGRAPHY to GEOMETRY with the same SRID is allowed.
298304
case (geog: GeographyType, geom: GeometryType) if geog.srid == geom.srid =>
299305
true
@@ -1232,6 +1238,7 @@ case class Cast(
12321238
case FloatType => castToFloat(from)
12331239
case LongType => castToLong(from)
12341240
case DoubleType => castToDouble(from)
1241+
case _: GeographyType => identity
12351242
case _: GeometryType => castToGeometry(from)
12361243
case array: ArrayType =>
12371244
castArray(from.asInstanceOf[ArrayType].elementType, array.elementType)
@@ -1341,6 +1348,7 @@ case class Cast(
13411348
case FloatType => castToFloatCode(from, ctx)
13421349
case LongType => castToLongCode(from, ctx)
13431350
case DoubleType => castToDoubleCode(from, ctx)
1351+
case _: GeographyType => (c, evPrim, _) => code"$evPrim = $c;"
13441352
case _: GeometryType => castToGeometryCode(from)
13451353

13461354
case array: ArrayType =>

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,27 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
14911491

14921492
// The following tests are confirming the behavior of casting between geospatial types.
14931493

1494+
test("Casting GeographyType to GeographyType") {
1495+
// Casting from fixed SRID GEOGRAPHY(<srid>) to mixed SRID GEOGRAPHY(ANY) is always allowed.
1496+
// Type casting is always safe in this direction, so no additional constraints are imposed.
1497+
// Casting from mixed SRID GEOGRAPHY(ANY) to fixed SRID GEOGRAPHY(<srid>) is not allowed.
1498+
// Type casting can be unsafe in this direction, because per-row SRID values may be different.
1499+
1500+
// Valid cast test cases.
1501+
val canCastTestCases: Seq[(DataType, DataType)] = Seq(
1502+
(GeographyType(4326), GeographyType("ANY"))
1503+
)
1504+
// Iterate over the test cases and verify casting.
1505+
canCastTestCases.foreach { case (fromType, toType) =>
1506+
// Cast can be performed from `fromType` to `toType`.
1507+
assert(Cast.canCast(fromType, toType))
1508+
assert(Cast.canAnsiCast(fromType, toType))
1509+
// Cast cannot be performed from `toType` to `fromType`.
1510+
assert(!Cast.canCast(toType, fromType))
1511+
assert(!Cast.canAnsiCast(toType, fromType))
1512+
}
1513+
}
1514+
14941515
test("Casting GeographyType to GeometryType") {
14951516
// Casting from GEOGRAPHY to GEOMETRY is only allowed if the SRIDs are the same.
14961517

sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,35 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
6666
}
6767

6868

69+
-- !query
70+
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOGRAPHY(ANY)))) AS result
71+
-- !query analysis
72+
Project [hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as geography(any)))) AS result#x]
73+
+- OneRowRelation
74+
75+
76+
-- !query
77+
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326)) AS result
78+
-- !query analysis
79+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
80+
{
81+
"errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
82+
"sqlState" : "42K09",
83+
"messageParameters" : {
84+
"sqlExpr" : "\"CAST(CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS GEOGRAPHY(ANY)) AS GEOGRAPHY(4326))\"",
85+
"srcType" : "\"GEOGRAPHY(ANY)\"",
86+
"targetType" : "\"GEOGRAPHY(4326)\""
87+
},
88+
"queryContext" : [ {
89+
"objectType" : "",
90+
"objectName" : "",
91+
"startIndex" : 8,
92+
"stopIndex" : 109,
93+
"fragment" : "CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326))"
94+
} ]
95+
}
96+
97+
6998
-- !query
7099
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(4326)))) AS result
71100
-- !query analysis

sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,35 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
6666
}
6767

6868

69+
-- !query
70+
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOGRAPHY(ANY)))) AS result
71+
-- !query analysis
72+
Project [hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as geography(any)))) AS result#x]
73+
+- OneRowRelation
74+
75+
76+
-- !query
77+
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326)) AS result
78+
-- !query analysis
79+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
80+
{
81+
"errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
82+
"sqlState" : "42K09",
83+
"messageParameters" : {
84+
"sqlExpr" : "\"CAST(CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS GEOGRAPHY(ANY)) AS GEOGRAPHY(4326))\"",
85+
"srcType" : "\"GEOGRAPHY(ANY)\"",
86+
"targetType" : "\"GEOGRAPHY(4326)\""
87+
},
88+
"queryContext" : [ {
89+
"objectType" : "",
90+
"objectName" : "",
91+
"startIndex" : 8,
92+
"stopIndex" : 109,
93+
"fragment" : "CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326))"
94+
} ]
95+
}
96+
97+
6998
-- !query
7099
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(4326)))) AS result
71100
-- !query analysis

sql/core/src/test/resources/sql-tests/inputs/st-functions.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ INSERT INTO geodata VALUES
1313
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS STRING) AS result;
1414
SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) AS result;
1515

16+
-- Casting GEOGRAPHY(<srid>) to GEOGRAPHY(ANY) is allowed.
17+
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOGRAPHY(ANY)))) AS result;
18+
-- Casting GEOGRAPHY(ANY) to GEOGRAPHY(<srid>) is not allowed.
19+
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326)) AS result;
20+
1621
-- Casting GEOGRAPHY to GEOMETRY is allowed only if SRIDs match.
1722
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(4326)))) AS result;
1823
-- Error handling: mismatched SRIDs.

sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,38 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
7373
}
7474

7575

76+
-- !query
77+
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOGRAPHY(ANY)))) AS result
78+
-- !query schema
79+
struct<result:string>
80+
-- !query output
81+
0101000000000000000000F03F0000000000000040
82+
83+
84+
-- !query
85+
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326)) AS result
86+
-- !query schema
87+
struct<>
88+
-- !query output
89+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
90+
{
91+
"errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
92+
"sqlState" : "42K09",
93+
"messageParameters" : {
94+
"sqlExpr" : "\"CAST(CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS GEOGRAPHY(ANY)) AS GEOGRAPHY(4326))\"",
95+
"srcType" : "\"GEOGRAPHY(ANY)\"",
96+
"targetType" : "\"GEOGRAPHY(4326)\""
97+
},
98+
"queryContext" : [ {
99+
"objectType" : "",
100+
"objectName" : "",
101+
"startIndex" : 8,
102+
"stopIndex" : 109,
103+
"fragment" : "CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326))"
104+
} ]
105+
}
106+
107+
76108
-- !query
77109
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(4326)))) AS result
78110
-- !query schema

sql/core/src/test/resources/sql-tests/results/st-functions.sql.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,38 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
7373
}
7474

7575

76+
-- !query
77+
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOGRAPHY(ANY)))) AS result
78+
-- !query schema
79+
struct<result:string>
80+
-- !query output
81+
0101000000000000000000F03F0000000000000040
82+
83+
84+
-- !query
85+
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326)) AS result
86+
-- !query schema
87+
struct<>
88+
-- !query output
89+
org.apache.spark.sql.catalyst.ExtendedAnalysisException
90+
{
91+
"errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
92+
"sqlState" : "42K09",
93+
"messageParameters" : {
94+
"sqlExpr" : "\"CAST(CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS GEOGRAPHY(ANY)) AS GEOGRAPHY(4326))\"",
95+
"srcType" : "\"GEOGRAPHY(ANY)\"",
96+
"targetType" : "\"GEOGRAPHY(4326)\""
97+
},
98+
"queryContext" : [ {
99+
"objectType" : "",
100+
"objectName" : "",
101+
"startIndex" : 8,
102+
"stopIndex" : 109,
103+
"fragment" : "CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOGRAPHY(ANY) AS GEOGRAPHY(4326))"
104+
} ]
105+
}
106+
107+
76108
-- !query
77109
SELECT hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS GEOMETRY(4326)))) AS result
78110
-- !query schema

sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class STExpressionsSuite
3131
// Private common constants used across several tests.
3232
private final val defaultGeographySrid: Int = ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID
3333
private final val defaultGeographyType: DataType = GeographyType(defaultGeographySrid)
34+
private final val mixedSridGeographyType: DataType = GeographyType("ANY")
3435
private final val defaultGeometrySrid: Int = ExpressionDefaults.DEFAULT_GEOMETRY_SRID
3536
private final val defaultGeometryType: DataType = GeometryType(defaultGeometrySrid)
3637

@@ -39,6 +40,48 @@ class STExpressionsSuite
3940
assert(sql(query).schema.fields.head.dataType.sameType(expectedDataType))
4041
}
4142

43+
/** Geospatial type casting. */
44+
45+
test("Cast GEOGRAPHY(srid) to GEOGRAPHY(ANY)") {
46+
// Test data: WKB representation of POINT(1 2).
47+
val wkbString = "0101000000000000000000F03F0000000000000040"
48+
val wkb = Hex.unhex(wkbString.getBytes())
49+
val wkbLiteral = Literal.create(wkb, BinaryType)
50+
51+
// Construct the input GEOGRAPHY expression.
52+
val geogExpr = ST_GeogFromWKB(wkbLiteral)
53+
assert(geogExpr.dataType.sameType(defaultGeographyType))
54+
checkEvaluation(ST_AsBinary(geogExpr), wkb)
55+
// Cast the GEOGRAPHY with fixed SRID to GEOGRAPHY with mixed SRID.
56+
val castExpr = Cast(geogExpr, mixedSridGeographyType)
57+
assert(castExpr.dataType.sameType(mixedSridGeographyType))
58+
checkEvaluation(ST_AsBinary(castExpr), wkb)
59+
60+
// Construct the input GEOGRAPHY SQL query, using WKB literal.
61+
val geogQueryLit: String = s"ST_GeogFromWKB(X'$wkbString')"
62+
assertType(s"SELECT $geogQueryLit", defaultGeographyType)
63+
checkAnswer(sql(s"SELECT ST_AsBinary($geogQueryLit)"), Row(wkb))
64+
// Cast the GEOGRAPHY with fixed SRID to GEOGRAPHY with mixed SRID.
65+
val castQueryLit = s"$geogQueryLit::GEOGRAPHY(ANY)"
66+
assertType(s"SELECT $castQueryLit", mixedSridGeographyType)
67+
checkAnswer(sql(s"SELECT ST_AsBinary($castQueryLit)"), Row(wkb))
68+
69+
withTable("tbl") {
70+
// Construct the test table with WKB.
71+
sql(s"CREATE TABLE tbl (wkb BINARY)")
72+
sql(s"INSERT INTO tbl VALUES (X'$wkbString')")
73+
74+
// Construct the input GEOGRAPHY SQL query, using WKB column.
75+
val geogQueryCol: String = s"ST_GeogFromWKB(wkb)"
76+
assertType(s"SELECT $geogQueryCol FROM tbl", defaultGeographyType)
77+
checkAnswer(sql(s"SELECT ST_AsBinary($geogQueryCol) FROM tbl"), Row(wkb))
78+
// Cast the GEOGRAPHY with fixed SRID to GEOGRAPHY with mixed SRID.
79+
val castQueryCol = s"$geogQueryCol::GEOGRAPHY(ANY)"
80+
assertType(s"SELECT $castQueryCol FROM tbl", mixedSridGeographyType)
81+
checkAnswer(sql(s"SELECT ST_AsBinary($castQueryCol) FROM tbl"), Row(wkb))
82+
}
83+
}
84+
4285
/** ST reader/writer expressions. */
4386

4487
test("ST_AsBinary") {

0 commit comments

Comments
 (0)