From 4000b95d4f8034485e3f8523b61ca4c1a61f62d4 Mon Sep 17 00:00:00 2001 From: Zeel Date: Sun, 12 Apr 2026 08:18:50 -0400 Subject: [PATCH 1/2] Support None comparisons for null expressions --- python/datafusion/expr.py | 4 ++++ python/tests/test_expr.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 7cd74ecd5..32004656f 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -483,6 +483,8 @@ def __eq__(self, rhs: object) -> Expr: Accepts either an expression or any valid PyArrow scalar literal value. """ + if rhs is None: + return self.is_null() if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) return Expr(self.expr.__eq__(rhs.expr)) @@ -492,6 +494,8 @@ def __ne__(self, rhs: object) -> Expr: Accepts either an expression or any valid PyArrow scalar literal value. """ + if rhs is None: + return self.is_not_null() if not isinstance(rhs, Expr): rhs = Expr.literal(rhs) return Expr(self.expr.__ne__(rhs.expr)) diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 1cf824a15..1548cb618 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -173,6 +173,24 @@ def test_relational_expr(test_ctx): assert df.filter(col("a") == "beta").count() == 0 +def test_relational_expr_none_uses_null_predicates(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [ + pa.array([1, 2, None]), + pa.array(["alpha", None, "gamma"], type=pa.string_view()), + ], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]], name="batch_with_nulls") + + assert df.filter(col("a") == None).count() == 1 # noqa: E711 + assert df.filter(col("a") != None).count() == 2 # noqa: E711 + assert df.filter(col("b") == None).count() == 1 # noqa: E711 + assert df.filter(col("b") != None).count() == 2 # noqa: E711 + + def test_expr_to_variant(): # Taken from https://github.com/apache/datafusion-python/issues/781 from datafusion import SessionContext From 85e710efc0d10b319840bc1cbb0f87fbc139ce94 Mon Sep 17 00:00:00 2001 From: Zeel Date: Sun, 12 Apr 2026 19:30:33 -0400 Subject: [PATCH 2/2] Fold None comparison coverage into relational expr test --- python/tests/test_expr.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 1548cb618..d046eb48c 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -153,8 +153,8 @@ def test_relational_expr(test_ctx): batch = pa.RecordBatch.from_arrays( [ - pa.array([1, 2, 3]), - pa.array(["alpha", "beta", "gamma"], type=pa.string_view()), + pa.array([1, 2, 3, None]), + pa.array(["alpha", "beta", "gamma", None], type=pa.string_view()), ], names=["a", "b"], ) @@ -171,24 +171,10 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") != "beta").count() == 2 assert df.filter(col("a") == "beta").count() == 0 - - -def test_relational_expr_none_uses_null_predicates(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [ - pa.array([1, 2, None]), - pa.array(["alpha", None, "gamma"], type=pa.string_view()), - ], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]], name="batch_with_nulls") - assert df.filter(col("a") == None).count() == 1 # noqa: E711 - assert df.filter(col("a") != None).count() == 2 # noqa: E711 + assert df.filter(col("a") != None).count() == 3 # noqa: E711 assert df.filter(col("b") == None).count() == 1 # noqa: E711 - assert df.filter(col("b") != None).count() == 2 # noqa: E711 + assert df.filter(col("b") != None).count() == 3 # noqa: E711 def test_expr_to_variant():