diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index b5ffd40161..ed190bebb9 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -466,8 +466,8 @@ All higher-order functions are planned via [#4224](https://github.com/apache/dat | Function | Status | Notes | | --- | --- | --- | -| `aes_decrypt` | 🔜 | Falls back; `StaticInvoke` not allowlisted; planned via codegen dispatch ([#4558](https://github.com/apache/datafusion-comet/issues/4558)) | -| `aes_encrypt` | 🔜 | Falls back; planned via codegen dispatch ([#4558](https://github.com/apache/datafusion-comet/issues/4558)); nondeterministic IV by default | +| `aes_decrypt` | ✅ | Routed through the JVM codegen dispatcher | +| `aes_encrypt` | ✅ | Routed through the JVM codegen dispatcher; nondeterministic IV by default | | `assert_true` | 🔜 | Lowers to `RaiseError`, which falls back | | `current_catalog` | ✅ | Resolved to a literal by the analyzer (`ReplaceCurrentLike`) | | `current_database` | ✅ | Resolved to a literal by the analyzer (`ReplaceCurrentLike`) | @@ -485,7 +485,7 @@ All higher-order functions are planned via [#4224](https://github.com/apache/dat | `session_user` | ✅ | Alias of `current_user`; resolved to a literal by the analyzer | | `spark_partition_id` | ✅ | | | `to_variant_object` | 🔜 | tracking [#4098](https://github.com/apache/datafusion-comet/issues/4098) | -| `try_aes_decrypt` | 🔜 | Falls back; planned via codegen dispatch ([#4558](https://github.com/apache/datafusion-comet/issues/4558)) | +| `try_aes_decrypt` | ✅ | Routed through the JVM codegen dispatcher | | `try_parse_json` | 🔜 | tracking [#4098](https://github.com/apache/datafusion-comet/issues/4098) | | `try_variant_get` | 🔜 | tracking [#4098](https://github.com/apache/datafusion-comet/issues/4098) | | `typeof` | ✅ | Foldable; resolved to a literal before Comet sees the plan | diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 385528a2eb..47525f1e4a 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -285,6 +285,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { classOf[SparkPartitionID] -> CometSparkPartitionId, classOf[SortOrder] -> CometSortOrder, classOf[StaticInvoke] -> CometStaticInvoke, + classOf[TryEval] -> CometTryEval, classOf[UnscaledValue] -> CometUnscaledValue) /** diff --git a/spark/src/main/scala/org/apache/comet/serde/statics.scala b/spark/src/main/scala/org/apache/comet/serde/statics.scala index b2a4b991d1..33817b7383 100644 --- a/spark/src/main/scala/org/apache/comet/serde/statics.scala +++ b/spark/src/main/scala/org/apache/comet/serde/statics.scala @@ -19,7 +19,7 @@ package org.apache.comet.serde -import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionImplUtils, Literal, UrlCodec} +import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionImplUtils, Literal, TryEval, UrlCodec} import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils @@ -38,7 +38,9 @@ object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] { "read_side_padding"), ("isLuhnNumber", classOf[ExpressionImplUtils]) -> CometScalarFunction("luhn_check"), ("encode", UrlCodec.getClass) -> CometUrlEncodeStaticInvoke, - ("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke) + ("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke, + ("aesEncrypt", classOf[ExpressionImplUtils]) -> CometStaticInvokeCodegenDispatch, + ("aesDecrypt", classOf[ExpressionImplUtils]) -> CometStaticInvokeCodegenDispatch) override def convert( expr: StaticInvoke, @@ -83,3 +85,9 @@ object CometUrlDecodeStaticInvoke extends CometExpressionSerde[StaticInvoke] { optExprWithFallbackReason(optExpr, expr, expr.children: _*) } } + +/** Routes a [[StaticInvoke]] through the JVM codegen dispatcher; used for AES. */ +object CometStaticInvokeCodegenDispatch extends CometCodegenDispatch[StaticInvoke] + +/** Routes [[TryEval]] through the JVM codegen dispatcher; used for `try_aes_decrypt`. */ +object CometTryEval extends CometCodegenDispatch[TryEval] diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes.sql b/spark/src/test/resources/sql-tests/expressions/misc/aes.sql new file mode 100644 index 0000000000..6e2cc4fba2 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/misc/aes.sql @@ -0,0 +1,69 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Tests for aes_encrypt and aes_decrypt (available since Spark 3.3). They lower to a +-- StaticInvoke of ExpressionImplUtils.aesEncrypt / aesDecrypt; Comet routes both methods +-- through the JVM codegen dispatcher (no native lowering). +-- try_aes_decrypt (Spark 3.5+) is covered in aes_try_decrypt.sql and AES-CBC (Spark 3.5+) in +-- aes_cbc.sql, both gated with MinSparkVersion. + +statement +CREATE TABLE test_aes(data STRING, key STRING) USING parquet + +statement +INSERT INTO test_aes VALUES + ('hello world', '1234567890abcdef'), + ('apache spark', '1234567890abcdef'), + ('', '1234567890abcdef'), + (NULL, '1234567890abcdef') + +-- GCM round-trip (default mode, nondeterministic IV, test via round-trip) +query +SELECT CAST(aes_decrypt(aes_encrypt(data, key), key) AS STRING) FROM test_aes + +-- GCM round-trip with explicit mode +query +SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'GCM'), key, 'GCM') AS STRING) FROM test_aes + +-- ECB round-trip (deterministic mode) +query +SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) FROM test_aes + +-- CBC mode is covered separately in aes_cbc.sql (Spark added AES-CBC in 3.5). + +-- ECB direct: output is deterministic so we can compare directly to Spark +query +SELECT aes_encrypt(data, key, 'ECB') FROM test_aes + +-- aes_decrypt on ECB-encrypted column +query +SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) FROM test_aes + +-- literal key and data (all literals, constant folding disabled in test suite) +query +SELECT CAST(aes_decrypt(aes_encrypt('hello', '1234567890abcdef', 'ECB'), '1234567890abcdef', 'ECB') AS STRING) + +query +SELECT CAST(aes_decrypt(aes_encrypt(NULL, '1234567890abcdef', 'ECB'), '1234567890abcdef', 'ECB') AS STRING) + +-- 24-byte key +query +SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef12345678', 'ECB'), '1234567890abcdef12345678', 'ECB') AS STRING) FROM test_aes + +-- 32-byte key +query +SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef1234567890abcdef', 'ECB'), '1234567890abcdef1234567890abcdef', 'ECB') AS STRING) FROM test_aes diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql b/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql new file mode 100644 index 0000000000..fb660b9a71 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql @@ -0,0 +1,35 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- AES-CBC round-trip for aes_encrypt / aes_decrypt. Comet routes the underlying StaticInvoke +-- through the JVM codegen dispatcher. AES-CBC was added to Spark in 3.5 (SPARK-43042) and +-- throws on 3.4, so this file is gated. +-- MinSparkVersion: 3.5 + +statement +CREATE TABLE test_aes_cbc(data STRING, key STRING) USING parquet + +statement +INSERT INTO test_aes_cbc VALUES + ('hello world', '1234567890abcdef'), + ('apache spark', '1234567890abcdef'), + ('', '1234567890abcdef'), + (NULL, '1234567890abcdef') + +-- CBC round-trip (nondeterministic IV, so compare via round-trip rather than raw ciphertext) +query +SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'CBC'), key, 'CBC') AS STRING) FROM test_aes_cbc diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql b/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql new file mode 100644 index 0000000000..20e1df6971 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql @@ -0,0 +1,48 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Tests for try_aes_decrypt, which returns NULL on invalid input instead of throwing. +-- try_aes_decrypt lowers to TryEval(StaticInvoke(aesDecrypt, ...)); Comet routes both the +-- TryEval wrapper and the inner StaticInvoke through the JVM codegen dispatcher. +-- try_aes_decrypt was added in Spark 3.5, so this file is gated. +-- MinSparkVersion: 3.5 + +statement +CREATE TABLE test_aes_try(data STRING, key STRING) USING parquet + +statement +INSERT INTO test_aes_try VALUES + ('hello world', '1234567890abcdef'), + ('apache spark', '1234567890abcdef'), + ('', '1234567890abcdef'), + (NULL, '1234567890abcdef') + +-- invalid ciphertext returns NULL instead of throwing +query +SELECT try_aes_decrypt(CAST('garbage' AS BINARY), key) FROM test_aes_try + +-- valid ciphertext decrypts correctly +query +SELECT CAST(try_aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) FROM test_aes_try + +-- literal invalid ciphertext +query +SELECT try_aes_decrypt(CAST('not_valid_ciphertext' AS BINARY), '1234567890abcdef') + +-- NULL data +query +SELECT try_aes_decrypt(NULL, '1234567890abcdef')