Skip to content
Open
6 changes: 3 additions & 3 deletions docs/source/user-guide/latest/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,8 @@ All higher-order functions are planned via [#4224](https://github.com/apache/dat

| Function | Status | Notes |
| --- | --- | --- |
| `aes_decrypt` | 🔜 | Falls back; `StaticInvoke` not allowlisted; planned via codegen dispatch ([#4558](https://github.com/apache/datafusion-comet/issues/4558)) |
| `aes_encrypt` | 🔜 | Falls back; planned via codegen dispatch ([#4558](https://github.com/apache/datafusion-comet/issues/4558)); nondeterministic IV by default |
| `aes_decrypt` | | Routed through the JVM codegen dispatcher |
| `aes_encrypt` | | Routed through the JVM codegen dispatcher; nondeterministic IV by default |
| `assert_true` | 🔜 | Lowers to `RaiseError`, which falls back |
| `current_catalog` | ✅ | Resolved to a literal by the analyzer (`ReplaceCurrentLike`) |
| `current_database` | ✅ | Resolved to a literal by the analyzer (`ReplaceCurrentLike`) |
Expand All @@ -485,7 +485,7 @@ All higher-order functions are planned via [#4224](https://github.com/apache/dat
| `session_user` | ✅ | Alias of `current_user`; resolved to a literal by the analyzer |
| `spark_partition_id` | ✅ | |
| `to_variant_object` | 🔜 | tracking [#4098](https://github.com/apache/datafusion-comet/issues/4098) |
| `try_aes_decrypt` | 🔜 | Falls back; planned via codegen dispatch ([#4558](https://github.com/apache/datafusion-comet/issues/4558)) |
| `try_aes_decrypt` | | Routed through the JVM codegen dispatcher |
| `try_parse_json` | 🔜 | tracking [#4098](https://github.com/apache/datafusion-comet/issues/4098) |
| `try_variant_get` | 🔜 | tracking [#4098](https://github.com/apache/datafusion-comet/issues/4098) |
| `typeof` | ✅ | Foldable; resolved to a literal before Comet sees the plan |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
classOf[SparkPartitionID] -> CometSparkPartitionId,
classOf[SortOrder] -> CometSortOrder,
classOf[StaticInvoke] -> CometStaticInvoke,
classOf[TryEval] -> CometTryEval,
classOf[UnscaledValue] -> CometUnscaledValue)

/**
Expand Down
12 changes: 10 additions & 2 deletions spark/src/main/scala/org/apache/comet/serde/statics.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

package org.apache.comet.serde

import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionImplUtils, Literal, UrlCodec}
import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionImplUtils, Literal, TryEval, UrlCodec}
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils

Expand All @@ -38,7 +38,9 @@ object CometStaticInvoke extends CometExpressionSerde[StaticInvoke] {
"read_side_padding"),
("isLuhnNumber", classOf[ExpressionImplUtils]) -> CometScalarFunction("luhn_check"),
("encode", UrlCodec.getClass) -> CometUrlEncodeStaticInvoke,
("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke)
("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke,
("aesEncrypt", classOf[ExpressionImplUtils]) -> CometStaticInvokeCodegenDispatch,
("aesDecrypt", classOf[ExpressionImplUtils]) -> CometStaticInvokeCodegenDispatch)

override def convert(
expr: StaticInvoke,
Expand Down Expand Up @@ -83,3 +85,9 @@ object CometUrlDecodeStaticInvoke extends CometExpressionSerde[StaticInvoke] {
optExprWithFallbackReason(optExpr, expr, expr.children: _*)
}
}

/** Routes a [[StaticInvoke]] through the JVM codegen dispatcher; used for AES. */
object CometStaticInvokeCodegenDispatch extends CometCodegenDispatch[StaticInvoke]

/** Routes [[TryEval]] through the JVM codegen dispatcher; used for `try_aes_decrypt`. */
object CometTryEval extends CometCodegenDispatch[TryEval]
69 changes: 69 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/misc/aes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- Tests for aes_encrypt and aes_decrypt (available since Spark 3.3). They lower to a
-- StaticInvoke of ExpressionImplUtils.aesEncrypt / aesDecrypt; Comet routes both methods
-- through the JVM codegen dispatcher (no native lowering).
-- try_aes_decrypt (Spark 3.5+) is covered in aes_try_decrypt.sql and AES-CBC (Spark 3.5+) in
-- aes_cbc.sql, both gated with MinSparkVersion.

statement
CREATE TABLE test_aes(data STRING, key STRING) USING parquet

statement
INSERT INTO test_aes VALUES
('hello world', '1234567890abcdef'),
('apache spark', '1234567890abcdef'),
('', '1234567890abcdef'),
(NULL, '1234567890abcdef')

-- GCM round-trip (default mode, nondeterministic IV, test via round-trip)
query
SELECT CAST(aes_decrypt(aes_encrypt(data, key), key) AS STRING) FROM test_aes

-- GCM round-trip with explicit mode
query
SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'GCM'), key, 'GCM') AS STRING) FROM test_aes

-- ECB round-trip (deterministic mode)
query
SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) FROM test_aes

-- CBC mode is covered separately in aes_cbc.sql (Spark added AES-CBC in 3.5).

-- ECB direct: output is deterministic so we can compare directly to Spark
query
SELECT aes_encrypt(data, key, 'ECB') FROM test_aes

-- aes_decrypt on ECB-encrypted column
query
SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) FROM test_aes

-- literal key and data (all literals, constant folding disabled in test suite)
query
SELECT CAST(aes_decrypt(aes_encrypt('hello', '1234567890abcdef', 'ECB'), '1234567890abcdef', 'ECB') AS STRING)

query
SELECT CAST(aes_decrypt(aes_encrypt(NULL, '1234567890abcdef', 'ECB'), '1234567890abcdef', 'ECB') AS STRING)

-- 24-byte key
query
SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef12345678', 'ECB'), '1234567890abcdef12345678', 'ECB') AS STRING) FROM test_aes

-- 32-byte key
query
SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef1234567890abcdef', 'ECB'), '1234567890abcdef1234567890abcdef', 'ECB') AS STRING) FROM test_aes
35 changes: 35 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- AES-CBC round-trip for aes_encrypt / aes_decrypt. Comet routes the underlying StaticInvoke
-- through the JVM codegen dispatcher. AES-CBC was added to Spark in 3.5 (SPARK-43042) and
-- throws on 3.4, so this file is gated.
-- MinSparkVersion: 3.5

statement
CREATE TABLE test_aes_cbc(data STRING, key STRING) USING parquet

statement
INSERT INTO test_aes_cbc VALUES
('hello world', '1234567890abcdef'),
('apache spark', '1234567890abcdef'),
('', '1234567890abcdef'),
(NULL, '1234567890abcdef')

-- CBC round-trip (nondeterministic IV, so compare via round-trip rather than raw ciphertext)
query
SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'CBC'), key, 'CBC') AS STRING) FROM test_aes_cbc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- Tests for try_aes_decrypt, which returns NULL on invalid input instead of throwing.
-- try_aes_decrypt lowers to TryEval(StaticInvoke(aesDecrypt, ...)); Comet routes both the
-- TryEval wrapper and the inner StaticInvoke through the JVM codegen dispatcher.
-- try_aes_decrypt was added in Spark 3.5, so this file is gated.
-- MinSparkVersion: 3.5

statement
CREATE TABLE test_aes_try(data STRING, key STRING) USING parquet

statement
INSERT INTO test_aes_try VALUES
('hello world', '1234567890abcdef'),
('apache spark', '1234567890abcdef'),
('', '1234567890abcdef'),
(NULL, '1234567890abcdef')

-- invalid ciphertext returns NULL instead of throwing
query
SELECT try_aes_decrypt(CAST('garbage' AS BINARY), key) FROM test_aes_try

-- valid ciphertext decrypts correctly
query
SELECT CAST(try_aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) FROM test_aes_try

-- literal invalid ciphertext
query
SELECT try_aes_decrypt(CAST('not_valid_ciphertext' AS BINARY), '1234567890abcdef')

-- NULL data
query
SELECT try_aes_decrypt(NULL, '1234567890abcdef')
Loading