From cdec2025383ed2c6549ffaa37f1dee199e7d1f9b Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 8 Oct 2024 17:30:57 -0400 Subject: [PATCH 001/248] Ts/minor updates release process (#903) * Add instructions for updating submodule to test a release * Apply formatting to changelog script --- dev/release/README.md | 1 + dev/release/generate-changelog.py | 76 +++++++++++++++++++------------ 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/dev/release/README.md b/dev/release/README.md index 49fd9de2d..b2c015e1d 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -169,6 +169,7 @@ cd datafusion-python # checkout the release commit git fetch --tags git checkout 40.0.0-rc1 +git submodule update --init --recursive # create the env python3 -m venv venv diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 5645d2f74..0f07457d0 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -22,18 +22,22 @@ import re import subprocess + def print_pulls(repo_name, title, pulls): - if len(pulls) > 0: + if len(pulls) > 0: print("**{}:**".format(title)) print() - for (pull, commit) in pulls: + for pull, commit in pulls: url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) - print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login)) + print( + "- {} [#{}]({}) ({})".format( + pull.title, pull.number, url, commit.author.login + ) + ) print() def generate_changelog(repo, repo_name, tag1, tag2, version): - # get a list of commits between two tags print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) comparison = repo.compare(tag1, tag2) @@ -61,29 +65,27 @@ def generate_changelog(repo, repo_name, tag1, tag2, version): # categorize the pull requests based on GitHub labels print("Categorizing pull requests", file=sys.stderr) - for (pull, commit) in all_pulls: - + for pull, commit in all_pulls: # see if PR title uses Conventional Commits - cc_type = '' - cc_scope = '' - cc_breaking = '' - parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title) + cc_type = "" + cc_breaking = "" + parts = re.findall(r"^([a-z]+)(\([a-z]+\))?(!)?:", pull.title) if len(parts) == 1: parts_tuple = parts[0] - cc_type = parts_tuple[0] # fix, feat, docs, chore - cc_scope = parts_tuple[1] # component within project - cc_breaking = parts_tuple[2] == '!' + cc_type = parts_tuple[0] # fix, feat, docs, chore + # cc_scope = parts_tuple[1] # component within project + cc_breaking = parts_tuple[2] == "!" labels = [label.name for label in pull.labels] - if 'api change' in labels or cc_breaking: + if "api change" in labels or cc_breaking: breaking.append((pull, commit)) - elif 'bug' in labels or cc_type == 'fix': + elif "bug" in labels or cc_type == "fix": bugs.append((pull, commit)) - elif 'performance' in labels or cc_type == 'perf': + elif "performance" in labels or cc_type == "perf": performance.append((pull, commit)) - elif 'enhancement' in labels or cc_type == 'feat': + elif "enhancement" in labels or cc_type == "feat": enhancements.append((pull, commit)) - elif 'documentation' in labels or cc_type == 'docs' or cc_type == 'doc': + elif "documentation" in labels or cc_type == "docs" or cc_type == "doc": docs.append((pull, commit)) else: other.append((pull, commit)) @@ -114,13 +116,19 @@ def generate_changelog(repo, repo_name, tag1, tag2, version): print(f"# Apache DataFusion Python {version} Changelog\n") # get the number of commits - commit_count = subprocess.check_output(f"git log --pretty=oneline {tag1}..{tag2} | wc -l", shell=True, text=True).strip() + commit_count = subprocess.check_output( + f"git log --pretty=oneline {tag1}..{tag2} | wc -l", shell=True, text=True + ).strip() # get number of contributors - contributor_count = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2} | wc -l", shell=True, text=True).strip() + contributor_count = subprocess.check_output( + f"git shortlog -sn {tag1}..{tag2} | wc -l", shell=True, text=True + ).strip() - print(f"This release consists of {commit_count} commits from {contributor_count} contributors. " - f"See credits at the end of this changelog for more information.\n") + print( + f"This release consists of {commit_count} commits from {contributor_count} contributors. " + f"See credits at the end of this changelog for more information.\n" + ) print_pulls(repo_name, "Breaking changes", breaking) print_pulls(repo_name, "Performance related", performance) @@ -130,17 +138,24 @@ def generate_changelog(repo, repo_name, tag1, tag2, version): print_pulls(repo_name, "Other", other) # show code contributions - credits = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2}", shell=True, text=True).rstrip() + credits = subprocess.check_output( + f"git shortlog -sn {tag1}..{tag2}", shell=True, text=True + ).rstrip() print("## Credits\n") - print("Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) " - "per contributor.\n") + print( + "Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) " + "per contributor.\n" + ) print("```") print(credits) print("```\n") - print("Thank you also to everyone who contributed in other ways such as filing issues, reviewing " - "PRs, and providing feedback on this release.\n") + print( + "Thank you also to everyone who contributed in other ways such as filing issues, reviewing " + "PRs, and providing feedback on this release.\n" + ) + def cli(args=None): """Process command line arguments.""" @@ -150,7 +165,9 @@ def cli(args=None): parser = argparse.ArgumentParser() parser.add_argument("tag1", help="The previous commit or tag (e.g. 0.1.0)") parser.add_argument("tag2", help="The current commit or tag (e.g. HEAD)") - parser.add_argument("version", help="The version number to include in the changelog") + parser.add_argument( + "version", help="The version number to include in the changelog" + ) args = parser.parse_args() token = os.getenv("GITHUB_TOKEN") @@ -160,5 +177,6 @@ def cli(args=None): repo = g.get_repo(project) generate_changelog(repo, project, args.tag1, args.tag2, args.version) + if __name__ == "__main__": - cli() \ No newline at end of file + cli() From 840b5de08dad7dd9cd0d8db19d9245dcace721ce Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Mon, 14 Oct 2024 09:55:46 -0500 Subject: [PATCH 002/248] fix: remove use of deprecated `make_scalar_function` (#906) * fix: remove use of deprecated `make_scalar_function` `make_scalar_function` has been deprecated since v36 [0]. It is being removed from the public api in v43 [1]. [0]: https://github.com/apache/datafusion/pull/8878 [1]: https://github.com/apache/datafusion/pull/12505 * remove use of `.unwrap()` from pyarrow_function_to_rust --- src/udf.rs | 65 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/src/udf.rs b/src/udf.rs index 7d5db2f96..ec8efb169 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -24,39 +24,56 @@ use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::FromPyArrow; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::error::DataFusionError; -use datafusion::logical_expr::create_udf; use datafusion::logical_expr::function::ScalarFunctionImplementation; use datafusion::logical_expr::ScalarUDF; +use datafusion::logical_expr::{create_udf, ColumnarValue}; use crate::expr::PyExpr; use crate::utils::parse_volatility; +/// Create a Rust callable function fr a python function that expects pyarrow arrays +fn pyarrow_function_to_rust( + func: PyObject, +) -> impl Fn(&[ArrayRef]) -> Result { + move |args: &[ArrayRef]| -> Result { + Python::with_gil(|py| { + // 1. cast args to Pyarrow arrays + let py_args = args + .iter() + .map(|arg| { + arg.into_data() + .to_pyarrow(py) + .map_err(|e| DataFusionError::Execution(format!("{e:?}"))) + }) + .collect::, _>>()?; + let py_args = PyTuple::new_bound(py, py_args); + + // 2. call function + let value = func + .call_bound(py, py_args, None) + .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; + + // 3. cast to arrow::array::Array + let array_data = ArrayData::from_pyarrow_bound(value.bind(py)) + .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; + Ok(make_array(array_data)) + }) + } +} + /// Create a DataFusion's UDF implementation from a python function /// that expects pyarrow arrays. This is more efficient as it performs /// a zero-copy of the contents. -fn to_rust_function(func: PyObject) -> ScalarFunctionImplementation { - #[allow(deprecated)] - datafusion::physical_plan::functions::make_scalar_function( - move |args: &[ArrayRef]| -> Result { - Python::with_gil(|py| { - // 1. cast args to Pyarrow arrays - let py_args = args - .iter() - .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) - .collect::>(); - let py_args = PyTuple::new_bound(py, py_args); - - // 2. call function - let value = func - .call_bound(py, py_args, None) - .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; +fn to_scalar_function_impl(func: PyObject) -> ScalarFunctionImplementation { + // Make the python function callable from rust + let pyarrow_func = pyarrow_function_to_rust(func); - // 3. cast to arrow::array::Array - let array_data = ArrayData::from_pyarrow_bound(value.bind(py)).unwrap(); - Ok(make_array(array_data)) - }) - }, - ) + // Convert input/output from datafusion ColumnarValue to arrow arrays + Arc::new(move |args: &[ColumnarValue]| { + let array_refs = ColumnarValue::values_to_arrays(args)?; + let array_result = pyarrow_func(&array_refs)?; + Ok(array_result.into()) + }) } /// Represents a PyScalarUDF @@ -82,7 +99,7 @@ impl PyScalarUDF { input_types.0, Arc::new(return_type.0), parse_volatility(volatility)?, - to_rust_function(func), + to_scalar_function_impl(func), ); Ok(Self { function }) } From 3d751728467a4210007bd9d266f22ae0e291d63f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:05:45 -0500 Subject: [PATCH 003/248] build(deps): bump pyo3 from 0.22.3 to 0.22.4 (#910) Bumps [pyo3](https://github.com/pyo3/pyo3) from 0.22.3 to 0.22.4. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/main/CHANGELOG.md) - [Commits](https://github.com/pyo3/pyo3/commits) --- updated-dependencies: - dependency-name: pyo3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 815323bf4..56c368f46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2342,9 +2342,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.3" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15ee168e30649f7f234c3d49ef5a7a6cbf5134289bc46c29ff3155fa3221c225" +checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" dependencies = [ "cfg-if", "indoc", @@ -2360,9 +2360,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.3" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e61cef80755fe9e46bb8a0b8f20752ca7676dcc07a5277d8b7768c6172e529b3" +checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" dependencies = [ "once_cell", "target-lexicon", @@ -2370,9 +2370,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.3" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ce096073ec5405f5ee2b8b31f03a68e02aa10d5d4f565eca04acc41931fa1c" +checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" dependencies = [ "libc", "pyo3-build-config", @@ -2380,9 +2380,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.3" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2440c6d12bc8f3ae39f1e775266fa5122fd0c8891ce7520fa6048e683ad3de28" +checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2392,9 +2392,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.3" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1be962f0e06da8f8465729ea2cb71a416d2257dff56cbe40a70d3e62a93ae5d1" +checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" dependencies = [ "heck 0.5.0", "proc-macro2", From 72f274385792d6eee3d9053ed786966b4899b24d Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:20:22 +0200 Subject: [PATCH 004/248] feat: expose drop method (#913) --- python/datafusion/dataframe.py | 11 +++++++++++ python/tests/test_dataframe.py | 11 +++++++++++ src/dataframe.rs | 7 +++++++ 3 files changed, 29 insertions(+) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index c5ac0bb89..a9e4d4d10 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -129,6 +129,17 @@ def select(self, *exprs: Expr | str) -> DataFrame: ] return DataFrame(self.df.select(*exprs_internal)) + def drop(self, *columns: str) -> DataFrame: + """Drop arbitrary amount of columns. + + Args: + columns: Column names to drop from the dataframe. + + Returns: + DataFrame with those columns removed in the projection. + """ + return DataFrame(self.df.drop(*columns)) + def filter(self, *predicates: Expr) -> DataFrame: """Return a DataFrame for which ``predicate`` evaluates to ``True``. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index e89c57159..88c642a7d 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -169,6 +169,17 @@ def test_sort(df): assert table.to_pydict() == expected +def test_drop(df): + df = df.drop("c") + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert df.schema().names == ["a", "b"] + assert result.column(0) == pa.array([1, 2, 3]) + assert result.column(1) == pa.array([4, 5, 6]) + + def test_limit(df): df = df.limit(1) diff --git a/src/dataframe.rs b/src/dataframe.rs index e77ca8425..db243704a 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -170,6 +170,13 @@ impl PyDataFrame { Ok(Self::new(df)) } + #[pyo3(signature = (*args))] + fn drop(&self, args: Vec) -> PyResult { + let cols = args.iter().map(|s| s.as_ref()).collect::>(); + let df = self.df.as_ref().clone().drop_columns(&cols)?; + Ok(Self::new(df)) + } + fn filter(&self, predicate: PyExpr) -> PyResult { let df = self.df.as_ref().clone().filter(predicate.into())?; Ok(Self::new(df)) From b4b03fe10fab72cc5606a193b58e1c9ae5031318 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:20:39 +0200 Subject: [PATCH 005/248] feat: expose `join_on` (#914) * feat: expose join_on method * test: improve join_on case --- python/datafusion/dataframe.py | 25 ++++++++++++++++++++++- python/tests/test_dataframe.py | 36 ++++++++++++++++++++++++++++++++++ src/dataframe.rs | 25 +++++++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index a9e4d4d10..60203ffb4 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -21,7 +21,7 @@ from __future__ import annotations -from typing import Any, List, TYPE_CHECKING +from typing import Any, List, TYPE_CHECKING, Literal from datafusion.record_batch import RecordBatchStream from typing_extensions import deprecated from datafusion.plan import LogicalPlan, ExecutionPlan @@ -304,6 +304,29 @@ def join( """ return DataFrame(self.df.join(right.df, join_keys, how)) + def join_on( + self, + right: DataFrame, + *on_exprs: Expr, + how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", + ) -> DataFrame: + """Join two :py:class:`DataFrame`using the specified expressions. + + On expressions are used to support in-equality predicates. Equality + predicates are correctly optimized + + Args: + right: Other DataFrame to join with. + on_exprs: single or multiple (in)-equality predicates. + how: Type of join to perform. Supported types are "inner", "left", + "right", "full", "semi", "anti". + + Returns: + DataFrame after join. + """ + exprs = [expr.expr for expr in on_exprs] + return DataFrame(self.df.join_on(right.df, exprs, how)) + def explain(self, verbose: bool = False, analyze: bool = False) -> DataFrame: """Return a DataFrame with the explanation of its plan so far. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 88c642a7d..6330ede04 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -270,6 +270,42 @@ def test_join(): assert table.to_pydict() == expected +def test_join_on(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]], "l") + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2]), pa.array([-8, 10])], + names=["a", "c"], + ) + df1 = ctx.create_dataframe([[batch]], "r") + + df2 = df.join_on(df1, column("l.a").__eq__(column("r.a")), how="inner") + df2.show() + df2 = df2.sort(column("l.a")) + table = pa.Table.from_batches(df2.collect()) + + expected = {"a": [1, 2], "c": [-8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + df3 = df.join_on( + df1, + column("l.a").__eq__(column("r.a")), + column("l.a").__lt__(column("r.c")), + how="inner", + ) + df3.show() + df3 = df3.sort(column("l.a")) + table = pa.Table.from_batches(df3.collect()) + expected = {"a": [2], "c": [10], "b": [5]} + assert table.to_pydict() == expected + + def test_distinct(): ctx = SessionContext() diff --git a/src/dataframe.rs b/src/dataframe.rs index db243704a..fa6c1d44f 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -300,6 +300,31 @@ impl PyDataFrame { Ok(Self::new(df)) } + fn join_on(&self, right: PyDataFrame, on_exprs: Vec, how: &str) -> PyResult { + let join_type = match how { + "inner" => JoinType::Inner, + "left" => JoinType::Left, + "right" => JoinType::Right, + "full" => JoinType::Full, + "semi" => JoinType::LeftSemi, + "anti" => JoinType::LeftAnti, + how => { + return Err(DataFusionError::Common(format!( + "The join type {how} does not exist or is not implemented" + )) + .into()); + } + }; + let exprs: Vec = on_exprs.into_iter().map(|e| e.into()).collect(); + + let df = self + .df + .as_ref() + .clone() + .join_on(right.df.as_ref().clone(), join_type, exprs)?; + Ok(Self::new(df)) + } + /// Print the query plan #[pyo3(signature = (verbose=false, analyze=false))] fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { From 494b89a522541bbaf9c3cd5d7b6bd7ab7218a399 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:21:30 +0200 Subject: [PATCH 006/248] refactor: from_arrow (#917) --- python/datafusion/context.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 957d7e311..5221c866c 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -30,7 +30,7 @@ from datafusion.record_batch import RecordBatchStream from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF -from typing import Any, TYPE_CHECKING +from typing import Any, TYPE_CHECKING, Protocol from typing_extensions import deprecated if TYPE_CHECKING: @@ -41,6 +41,28 @@ from datafusion.plan import LogicalPlan, ExecutionPlan +class ArrowStreamExportable(Protocol): + """Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface. + + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + """ + + def __arrow_c_stream__( # noqa: D105 + self, requested_schema: object | None = None + ) -> object: ... + + +class ArrowArrayExportable(Protocol): + """Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface. + + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + """ + + def __arrow_c_array__( # noqa: D105 + self, requested_schema: object | None = None + ) -> tuple[object, object]: ... + + class SessionConfig: """Session configuration options.""" @@ -592,12 +614,18 @@ def from_pydict( """ return DataFrame(self.ctx.from_pydict(data, name)) - def from_arrow(self, data: Any, name: str | None = None) -> DataFrame: + def from_arrow( + self, + data: ArrowStreamExportable | ArrowArrayExportable, + name: str | None = None, + ) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source. The Arrow data source can be any object that implements either ``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return - a struct array. Common examples of sources from pyarrow include + a struct array. + + Arrow data can be Polars, Pandas, Pyarrow etc. Args: data: Arrow data source. From 0905f5fca4b763fc61e5e2093a85ad05e203d7fb Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:36:56 +0200 Subject: [PATCH 007/248] feat: add fill_null/nan (#919) --- python/datafusion/expr.py | 12 ++++++++++++ python/datafusion/functions.py | 6 ++++++ python/tests/test_expr.py | 33 ++++++++++++++++++++++++++++++--- src/functions.rs | 6 ++++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 8600627ae..c4e7713f3 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -406,6 +406,18 @@ def is_not_null(self) -> Expr: """Returns ``True`` if this expression is not null.""" return Expr(self.expr.is_not_null()) + def fill_nan(self, value: Any | Expr | None = None) -> Expr: + """Fill NaN values with a provided value.""" + if not isinstance(value, Expr): + value = Expr.literal(value) + return Expr(functions_internal.nanvl(self.expr, value.expr)) + + def fill_null(self, value: Any | Expr | None = None) -> Expr: + """Fill NULL values with a provided value.""" + if not isinstance(value, Expr): + value = Expr.literal(value) + return Expr(functions_internal.nvl(self.expr, value.expr)) + _to_pyarrow_types = { float: pa.float64(), int: pa.int64(), diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0401afbc4..727321979 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -186,6 +186,7 @@ "min", "named_struct", "nanvl", + "nvl", "now", "nth_value", "nullif", @@ -673,6 +674,11 @@ def nanvl(x: Expr, y: Expr) -> Expr: return Expr(f.nanvl(x.expr, y.expr)) +def nvl(x: Expr, y: Expr) -> Expr: + """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``.""" + return Expr(f.nvl(x.expr, y.expr)) + + def octet_length(arg: Expr) -> Expr: """Returns the number of bytes of a string.""" return Expr(f.octet_length(arg.expr)) diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index b58177f16..1847edef2 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import pyarrow +import pyarrow as pa import pytest from datafusion import SessionContext, col from datafusion.expr import ( @@ -125,8 +125,8 @@ def test_sort(test_ctx): def test_relational_expr(test_ctx): ctx = SessionContext() - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array(["alpha", "beta", "gamma"])], + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array(["alpha", "beta", "gamma"])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]], name="batch_array") @@ -216,3 +216,30 @@ def test_display_name_deprecation(): # returns appropriate result assert name == expr.schema_name() assert name == "foo" + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, None]), pa.array([4, None, 6]), pa.array([None, None, 8])], + names=["a", "b", "c"], + ) + + return ctx.from_arrow(batch) + + +def test_fill_null(df): + df = df.select( + col("a").fill_null(100).alias("a"), + col("b").fill_null(25).alias("b"), + col("c").fill_null(1234).alias("c"), + ) + df.show() + result = df.collect()[0] + + assert result.column(0) == pa.array([1, 2, 100]) + assert result.column(1) == pa.array([4, 25, 6]) + assert result.column(2) == pa.array([1234, 1234, 8]) diff --git a/src/functions.rs b/src/functions.rs index 6f8dd7ada..24d33af39 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -490,6 +490,11 @@ expr_fn!( x y, "Returns x if x is not NaN otherwise returns y." ); +expr_fn!( + nvl, + x y, + "Returns x if x is not NULL otherwise returns y." +); expr_fn!(nullif, arg_1 arg_2); expr_fn!(octet_length, args, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); expr_fn_vec!(overlay); @@ -913,6 +918,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(min))?; m.add_wrapped(wrap_pyfunction!(named_struct))?; m.add_wrapped(wrap_pyfunction!(nanvl))?; + m.add_wrapped(wrap_pyfunction!(nvl))?; m.add_wrapped(wrap_pyfunction!(now))?; m.add_wrapped(wrap_pyfunction!(nullif))?; m.add_wrapped(wrap_pyfunction!(octet_length))?; From fc7e3e546dbb03783e66d44837d75a1e6ad62827 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 19 Oct 2024 06:15:28 +0800 Subject: [PATCH 008/248] Change requires-python version (#924) --- pyproject.toml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6e10333a0..d327c0ec1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,8 +23,8 @@ build-backend = "maturin" name = "datafusion" description = "Build and run queries against data" readme = "README.md" -license = {file = "LICENSE.txt"} -requires-python = ">=3.6" +license = { file = "LICENSE.txt" } +requires-python = ">=3.7" keywords = ["datafusion", "dataframe", "rust", "query-engine"] classifier = [ "Development Status :: 2 - Pre-Alpha", @@ -42,10 +42,7 @@ classifier = [ "Programming Language :: Python", "Programming Language :: Rust", ] -dependencies = [ - "pyarrow>=11.0.0", - "typing-extensions;python_version<'3.13'", -] +dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"] [project.urls] homepage = "https://datafusion.apache.org/python" @@ -58,9 +55,7 @@ profile = "black" [tool.maturin] python-source = "python" module-name = "datafusion._internal" -include = [ - { path = "Cargo.lock", format = "sdist" } -] +include = [{ path = "Cargo.lock", format = "sdist" }] exclude = [".github/**", "ci/**", ".asf.yaml"] # Require Cargo.lock is up to date locked = true From 7cca0283e7837426e6ccdf9d5c8cbfc0c8b239c9 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Sat, 19 Oct 2024 00:57:57 +0200 Subject: [PATCH 009/248] feat: add `with_columns` (#909) * feat: add with_columns * chore: add doc * Format docstring to render in online documentation --------- Co-authored-by: Tim Saucer --- python/datafusion/dataframe.py | 47 +++++++++++++++++++++++++++++++++- python/tests/test_dataframe.py | 31 ++++++++++++++++++++++ src/dataframe.rs | 10 ++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 60203ffb4..9c0953c35 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -21,7 +21,7 @@ from __future__ import annotations -from typing import Any, List, TYPE_CHECKING, Literal +from typing import Any, Iterable, List, Literal, TYPE_CHECKING from datafusion.record_batch import RecordBatchStream from typing_extensions import deprecated from datafusion.plan import LogicalPlan, ExecutionPlan @@ -171,6 +171,51 @@ def with_column(self, name: str, expr: Expr) -> DataFrame: """ return DataFrame(self.df.with_column(name, expr.expr)) + def with_columns( + self, *exprs: Expr | Iterable[Expr], **named_exprs: Expr + ) -> DataFrame: + """Add columns to the DataFrame. + + By passing expressions, iteratables of expressions, or named expressions. To + pass named expressions use the form name=Expr. + + Example usage: The following will add 4 columns labeled a, b, c, and d:: + + df = df.with_columns( + lit(0).alias('a'), + [lit(1).alias('b'), lit(2).alias('c')], + d=lit(3) + ) + + Args: + exprs: Either a single expression or an iterable of expressions to add. + named_exprs: Named expressions in the form of ``name=expr`` + + Returns: + DataFrame with the new columns added. + """ + + def _simplify_expression( + *exprs: Expr | Iterable[Expr], **named_exprs: Expr + ) -> list[Expr]: + expr_list = [] + for expr in exprs: + if isinstance(expr, Expr): + expr_list.append(expr.expr) + elif isinstance(expr, Iterable): + for inner_expr in expr: + expr_list.append(inner_expr.expr) + else: + raise NotImplementedError + if named_exprs: + for alias, expr in named_exprs.items(): + expr_list.append(expr.alias(alias).expr) + return expr_list + + expressions = _simplify_expression(*exprs, **named_exprs) + + return DataFrame(self.df.with_columns(expressions)) + def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame: r"""Rename one column by applying a new projection. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 6330ede04..0d4a7dcb0 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -216,6 +216,37 @@ def test_with_column(df): assert result.column(2) == pa.array([5, 7, 9]) +def test_with_columns(df): + df = df.with_columns( + (column("a") + column("b")).alias("c"), + (column("a") + column("b")).alias("d"), + [ + (column("a") + column("b")).alias("e"), + (column("a") + column("b")).alias("f"), + ], + g=(column("a") + column("b")), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.schema.field(0).name == "a" + assert result.schema.field(1).name == "b" + assert result.schema.field(2).name == "c" + assert result.schema.field(3).name == "d" + assert result.schema.field(4).name == "e" + assert result.schema.field(5).name == "f" + assert result.schema.field(6).name == "g" + + assert result.column(0) == pa.array([1, 2, 3]) + assert result.column(1) == pa.array([4, 5, 6]) + assert result.column(2) == pa.array([5, 7, 9]) + assert result.column(3) == pa.array([5, 7, 9]) + assert result.column(4) == pa.array([5, 7, 9]) + assert result.column(5) == pa.array([5, 7, 9]) + assert result.column(6) == pa.array([5, 7, 9]) + + def test_with_column_renamed(df): df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum") diff --git a/src/dataframe.rs b/src/dataframe.rs index fa6c1d44f..dd5d89ce9 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -187,6 +187,16 @@ impl PyDataFrame { Ok(Self::new(df)) } + fn with_columns(&self, exprs: Vec) -> PyResult { + let mut df = self.df.as_ref().clone(); + for expr in exprs { + let expr: Expr = expr.into(); + let name = format!("{}", expr.schema_name()); + df = df.with_column(name.as_str(), expr)? + } + Ok(Self::new(df)) + } + /// Rename one column by applying a new projection. This is a no-op if the column to be /// renamed does not exist. fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyResult { From 70c099aad8ec337ef88e27c125a8eeba328d62de Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:21:25 +0200 Subject: [PATCH 010/248] feat: add `cast` to DataFrame (#916) * feat: add with_columns * feat: add top level cast * chore: improve docstring --------- Co-authored-by: Tim Saucer --- python/datafusion/dataframe.py | 13 +++++++++++++ python/tests/test_dataframe.py | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 9c0953c35..3ed6d40fe 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -21,6 +21,7 @@ from __future__ import annotations + from typing import Any, Iterable, List, Literal, TYPE_CHECKING from datafusion.record_batch import RecordBatchStream from typing_extensions import deprecated @@ -267,6 +268,18 @@ def sort(self, *exprs: Expr | SortExpr) -> DataFrame: exprs_raw = [sort_or_default(expr) for expr in exprs] return DataFrame(self.df.sort(*exprs_raw)) + def cast(self, mapping: dict[str, pa.DataType[Any]]) -> DataFrame: + """Cast one or more columns to a different data type. + + Args: + mapping: Mapped with column as key and column dtype as value. + + Returns: + DataFrame after casting columns + """ + exprs = [Expr.column(col).cast(dtype) for col, dtype in mapping.items()] + return self.with_columns(exprs) + def limit(self, count: int, offset: int = 0) -> DataFrame: """Return a new :py:class:`DataFrame` with a limited number of rows. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 0d4a7dcb0..bb408c9c9 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -247,6 +247,15 @@ def test_with_columns(df): assert result.column(6) == pa.array([5, 7, 9]) +def test_cast(df): + df = df.cast({"a": pa.float16(), "b": pa.list_(pa.uint32())}) + expected = pa.schema( + [("a", pa.float16()), ("b", pa.list_(pa.uint32())), ("c", pa.int64())] + ) + + assert df.schema() == expected + + def test_with_column_renamed(df): df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum") From f59dd08bfbc0f01cc16b858465d03c3a01ba647c Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:02:49 +0200 Subject: [PATCH 011/248] feat: add `head`, `tail` methods (#915) * feat: add head, tail methods * chore: add default head/tail --- python/datafusion/dataframe.py | 25 +++++++++++++++++++++++++ python/tests/test_dataframe.py | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 3ed6d40fe..e4f8073d3 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -292,6 +292,31 @@ def limit(self, count: int, offset: int = 0) -> DataFrame: """ return DataFrame(self.df.limit(count, offset)) + def head(self, n: int = 5) -> DataFrame: + """Return a new :py:class:`DataFrame` with a limited number of rows. + + Args: + n: Number of rows to take from the head of the DataFrame. + + Returns: + DataFrame after limiting. + """ + return DataFrame(self.df.limit(n, 0)) + + def tail(self, n: int = 5) -> DataFrame: + """Return a new :py:class:`DataFrame` with a limited number of rows. + + Be aware this could be potentially expensive since the row size needs to be + determined of the dataframe. This is done by collecting it. + + Args: + n: Number of rows to take from the tail of the DataFrame. + + Returns: + DataFrame after limiting. + """ + return DataFrame(self.df.limit(n, max(0, self.count() - n))) + def collect(self) -> list[pa.RecordBatch]: """Execute this :py:class:`DataFrame` and collect results into memory. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index bb408c9c9..d73f5ebde 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -201,6 +201,28 @@ def test_limit_with_offset(df): assert len(result.column(1)) == 1 +def test_head(df): + df = df.head(1) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([1]) + assert result.column(1) == pa.array([4]) + assert result.column(2) == pa.array([8]) + + +def test_tail(df): + df = df.tail(1) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([3]) + assert result.column(1) == pa.array([6]) + assert result.column(2) == pa.array([8]) + + def test_with_column(df): df = df.with_column("c", column("a") + column("b")) From 56b72438004965f36dd4ce7e14d62a533ab2026f Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:23:36 +0200 Subject: [PATCH 012/248] chore: deprecate `select_columns` (#911) * chore: deprecate select_columns * chore: lint * Update user document to use select instead of select_columns * Update all tpch examples to use select instead of select_columns --------- Co-authored-by: Tim Saucer --- .../common-operations/select-and-filter.rst | 4 ++-- examples/import.py | 10 +++++----- examples/tpch/convert_data_to_parquet.py | 2 +- examples/tpch/q02_minimum_cost_supplier.py | 12 ++++++------ examples/tpch/q03_shipping_priority.py | 8 ++++---- examples/tpch/q04_order_priority_checking.py | 6 +++--- examples/tpch/q05_local_supplier_volume.py | 12 ++++++------ .../tpch/q06_forecasting_revenue_change.py | 2 +- examples/tpch/q07_volume_shipping.py | 10 +++++----- examples/tpch/q08_market_share.py | 18 ++++++++---------- .../tpch/q09_product_type_profit_measure.py | 14 ++++++-------- examples/tpch/q10_returned_item_reporting.py | 10 +++++----- .../tpch/q11_important_stock_identification.py | 8 ++++---- examples/tpch/q12_ship_mode_order_priority.py | 4 ++-- examples/tpch/q13_customer_distribution.py | 6 ++---- examples/tpch/q14_promotion_effect.py | 6 ++---- examples/tpch/q15_top_supplier.py | 6 +++--- .../tpch/q16_part_supplier_relationship.py | 8 ++++---- examples/tpch/q17_small_quantity_order.py | 4 ++-- examples/tpch/q18_large_volume_customer.py | 8 ++++---- examples/tpch/q19_discounted_revenue.py | 4 ++-- examples/tpch/q20_potential_part_promotion.py | 14 ++++++-------- .../tpch/q21_suppliers_kept_orders_waiting.py | 8 ++++---- examples/tpch/q22_global_sales_opportunity.py | 6 ++---- python/datafusion/dataframe.py | 3 +++ python/tests/test_dataframe.py | 14 ++++++-------- 26 files changed, 98 insertions(+), 109 deletions(-) diff --git a/docs/source/user-guide/common-operations/select-and-filter.rst b/docs/source/user-guide/common-operations/select-and-filter.rst index 92b4841b2..075909129 100644 --- a/docs/source/user-guide/common-operations/select-and-filter.rst +++ b/docs/source/user-guide/common-operations/select-and-filter.rst @@ -33,7 +33,7 @@ DataFusion can work with several file types, to start simple we can use a subset ctx = SessionContext() df = ctx.read_parquet("yellow_trip_data.parquet") - df.select_columns("trip_distance", "passenger_count") + df.select("trip_distance", "passenger_count") For mathematical or logical operations use :py:func:`~datafusion.col` to select columns, and give meaningful names to the resulting operations using :py:func:`~datafusion.expr.Expr.alias` @@ -48,7 +48,7 @@ operations using :py:func:`~datafusion.expr.Expr.alias` Please be aware that all identifiers are effectively made lower-case in SQL, so if your file has capital letters (ex: Name) you must put your column name in double quotes or the selection won’t work. As an alternative for simple - column selection use :py:func:`~datafusion.dataframe.DataFrame.select_columns` without double quotes + column selection use :py:func:`~datafusion.dataframe.DataFrame.select` without double quotes For selecting columns with capital letters use ``'"VendorID"'`` diff --git a/examples/import.py b/examples/import.py index cd965cb46..c9d2e8cb6 100644 --- a/examples/import.py +++ b/examples/import.py @@ -28,7 +28,7 @@ # The dictionary keys represent column names and the dictionary values # represent column values df = ctx.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) -assert type(df) == datafusion.DataFrame +assert type(df) is datafusion.DataFrame # Dataframe: # +---+---+ # | a | b | @@ -40,19 +40,19 @@ # Create a datafusion DataFrame from a Python list of rows df = ctx.from_pylist([{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}]) -assert type(df) == datafusion.DataFrame +assert type(df) is datafusion.DataFrame # Convert pandas DataFrame to datafusion DataFrame pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df = ctx.from_pandas(pandas_df) -assert type(df) == datafusion.DataFrame +assert type(df) is datafusion.DataFrame # Convert polars DataFrame to datafusion DataFrame polars_df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df = ctx.from_polars(polars_df) -assert type(df) == datafusion.DataFrame +assert type(df) is datafusion.DataFrame # Convert Arrow Table to datafusion DataFrame arrow_table = pa.Table.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]}) df = ctx.from_arrow(arrow_table) -assert type(df) == datafusion.DataFrame +assert type(df) is datafusion.DataFrame diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py index a8091a708..cb0b2f0bd 100644 --- a/examples/tpch/convert_data_to_parquet.py +++ b/examples/tpch/convert_data_to_parquet.py @@ -138,6 +138,6 @@ df = ctx.read_csv(source_file, schema=schema, has_header=False, delimiter="|") - df = df.select_columns(*output_cols) + df = df.select(*output_cols) df.write_parquet(dest_file, compression="snappy") diff --git a/examples/tpch/q02_minimum_cost_supplier.py b/examples/tpch/q02_minimum_cost_supplier.py index 2171a2083..2440fdad6 100644 --- a/examples/tpch/q02_minimum_cost_supplier.py +++ b/examples/tpch/q02_minimum_cost_supplier.py @@ -43,10 +43,10 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select( "p_partkey", "p_mfgr", "p_type", "p_size" ) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_acctbal", "s_name", "s_address", @@ -55,13 +55,13 @@ "s_nationkey", "s_suppkey", ) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns( +df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( "ps_partkey", "ps_suppkey", "ps_supplycost" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_regionkey", "n_name" ) -df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns( +df_region = ctx.read_parquet(get_data_path("region.parquet")).select( "r_regionkey", "r_name" ) @@ -115,7 +115,7 @@ # From the problem statement, these are the values we wish to output -df = df.select_columns( +df = df.select( "s_acctbal", "s_name", "n_name", diff --git a/examples/tpch/q03_shipping_priority.py b/examples/tpch/q03_shipping_priority.py index 6a4886d83..c4e8f461a 100644 --- a/examples/tpch/q03_shipping_priority.py +++ b/examples/tpch/q03_shipping_priority.py @@ -37,13 +37,13 @@ ctx = SessionContext() -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_mktsegment", "c_custkey" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderdate", "o_shippriority", "o_custkey", "o_orderkey" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_orderkey", "l_extendedprice", "l_discount", "l_shipdate" ) @@ -80,7 +80,7 @@ # Change the order that the columns are reported in just to match the spec -df = df.select_columns("l_orderkey", "revenue", "o_orderdate", "o_shippriority") +df = df.select("l_orderkey", "revenue", "o_orderdate", "o_shippriority") # Show result diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index 77c3bd43e..f10b74d91 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -39,10 +39,10 @@ ctx = SessionContext() -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderdate", "o_orderpriority", "o_orderkey" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_orderkey", "l_commitdate", "l_receiptdate" ) @@ -54,7 +54,7 @@ # Limit results to cases where commitment date before receipt date # Aggregate the results so we only get one row to join with the order table. # Alternately, and likely more idiomatic is instead of `.aggregate` you could -# do `.select_columns("l_orderkey").distinct()`. The goal here is to show +# do `.select("l_orderkey").distinct()`. The goal here is to show # multiple examples of how to use Data Fusion. df_lineitem = df_lineitem.filter(col("l_commitdate") < col("l_receiptdate")).aggregate( [col("l_orderkey")], [] diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index f17f600a4..2a83d2d1a 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -47,22 +47,22 @@ ctx = SessionContext() -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_custkey", "c_nationkey" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_custkey", "o_orderkey", "o_orderdate" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_orderkey", "l_suppkey", "l_extendedprice", "l_discount" ) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_nationkey" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_regionkey", "n_name" ) -df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns( +df_region = ctx.read_parquet(get_data_path("region.parquet")).select( "r_regionkey", "r_name" ) diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py index 3beb9eb1f..eaf9b0c29 100644 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ b/examples/tpch/q06_forecasting_revenue_change.py @@ -51,7 +51,7 @@ ctx = SessionContext() -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_shipdate", "l_quantity", "l_extendedprice", "l_discount" ) diff --git a/examples/tpch/q07_volume_shipping.py b/examples/tpch/q07_volume_shipping.py index 44c605a9b..a1d7d81ad 100644 --- a/examples/tpch/q07_volume_shipping.py +++ b/examples/tpch/q07_volume_shipping.py @@ -49,19 +49,19 @@ ctx = SessionContext() -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_nationkey" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_shipdate", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_custkey" ) -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_custkey", "c_nationkey" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name" ) diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index cd6bc1fa9..95fc0a871 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -47,25 +47,23 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( - "p_partkey", "p_type" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type") +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_nationkey" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_partkey", "l_extendedprice", "l_discount", "l_suppkey", "l_orderkey" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_custkey", "o_orderdate" ) -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_custkey", "c_nationkey" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name", "n_regionkey" ) -df_region = ctx.read_parquet(get_data_path("region.parquet")).select_columns( +df_region = ctx.read_parquet(get_data_path("region.parquet")).select( "r_regionkey", "r_name" ) @@ -133,7 +131,7 @@ # When we join to the customer dataframe, we don't want to confuse other columns, so only # select the supplier key that we need -df_national_suppliers = df_national_suppliers.select_columns("s_suppkey") +df_national_suppliers = df_national_suppliers.select("s_suppkey") # Part 3: Combine suppliers and customers and compute the market share diff --git a/examples/tpch/q09_product_type_profit_measure.py b/examples/tpch/q09_product_type_profit_measure.py index b4a7369f8..0295d3025 100644 --- a/examples/tpch/q09_product_type_profit_measure.py +++ b/examples/tpch/q09_product_type_profit_measure.py @@ -39,16 +39,14 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( - "p_partkey", "p_name" -) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_name") +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_nationkey" ) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns( +df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( "ps_suppkey", "ps_partkey", "ps_supplycost" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_partkey", "l_extendedprice", "l_discount", @@ -56,10 +54,10 @@ "l_orderkey", "l_quantity", ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_custkey", "o_orderdate" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name", "n_regionkey" ) diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index 78327c3ad..25f81b2ff 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -44,7 +44,7 @@ ctx = SessionContext() -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_custkey", "c_nationkey", "c_name", @@ -53,13 +53,13 @@ "c_phone", "c_comment", ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_extendedprice", "l_discount", "l_orderkey", "l_returnflag" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_custkey", "o_orderdate" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name", "n_regionkey" ) @@ -87,7 +87,7 @@ df = df.join(df_nation, (["c_nationkey"], ["n_nationkey"]), how="inner") # These are the columns the problem statement requires -df = df.select_columns( +df = df.select( "c_custkey", "c_name", "revenue", diff --git a/examples/tpch/q11_important_stock_identification.py b/examples/tpch/q11_important_stock_identification.py index 391eb45b1..86ff2296b 100644 --- a/examples/tpch/q11_important_stock_identification.py +++ b/examples/tpch/q11_important_stock_identification.py @@ -37,13 +37,13 @@ ctx = SessionContext() -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_nationkey" ) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns( +df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( "ps_supplycost", "ps_availqty", "ps_suppkey", "ps_partkey" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name" ) @@ -75,7 +75,7 @@ df = df.filter(col("value") / col("total_value") >= lit(FRACTION)) # We only need to report on these two columns -df = df.select_columns("ps_partkey", "value") +df = df.select("ps_partkey", "value") # Sort in descending order of value df = df.sort(col("value").sort(ascending=False)) diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index 150870c64..c3fc0d2e9 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -42,10 +42,10 @@ ctx = SessionContext() -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_orderpriority" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_orderkey", "l_shipmode", "l_commitdate", "l_shipdate", "l_receiptdate" ) diff --git a/examples/tpch/q13_customer_distribution.py b/examples/tpch/q13_customer_distribution.py index bc0a5bd1f..f8b6c139d 100644 --- a/examples/tpch/q13_customer_distribution.py +++ b/examples/tpch/q13_customer_distribution.py @@ -38,12 +38,10 @@ ctx = SessionContext() -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_custkey", "o_comment" ) -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( - "c_custkey" -) +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select("c_custkey") # Use a regex to remove special cases df_orders = df_orders.filter( diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index 8cb1e4c5a..8224136ad 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -41,12 +41,10 @@ ctx = SessionContext() -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_partkey", "l_shipdate", "l_extendedprice", "l_discount" ) -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( - "p_partkey", "p_type" -) +df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_type") # Check part type begins with PROMO diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index aa76093ec..44d5dd997 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -41,10 +41,10 @@ ctx = SessionContext() -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_suppkey", "l_shipdate", "l_extendedprice", "l_discount" ) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_name", "s_address", @@ -79,7 +79,7 @@ df = df.join(df_supplier, (["l_suppkey"], ["s_suppkey"]), "inner") # Return only the columns requested -df = df.select_columns("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue") +df = df.select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue") # If we have more than one, sort by supplier number (suppkey) df = df.sort(col("s_suppkey").sort()) diff --git a/examples/tpch/q16_part_supplier_relationship.py b/examples/tpch/q16_part_supplier_relationship.py index fdcb5b4db..cbdd9989a 100644 --- a/examples/tpch/q16_part_supplier_relationship.py +++ b/examples/tpch/q16_part_supplier_relationship.py @@ -40,13 +40,13 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select( "p_partkey", "p_brand", "p_type", "p_size" ) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns( +df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( "ps_suppkey", "ps_partkey" ) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_comment" ) @@ -75,7 +75,7 @@ df = df_part.join(df_partsupp, (["p_partkey"], ["ps_partkey"]), "inner") -df = df.select_columns("p_brand", "p_type", "p_size", "ps_suppkey").distinct() +df = df.select("p_brand", "p_type", "p_size", "ps_suppkey").distinct() df = df.aggregate( [col("p_brand"), col("p_type"), col("p_size")], diff --git a/examples/tpch/q17_small_quantity_order.py b/examples/tpch/q17_small_quantity_order.py index e0ee8bb90..ff494279b 100644 --- a/examples/tpch/q17_small_quantity_order.py +++ b/examples/tpch/q17_small_quantity_order.py @@ -38,10 +38,10 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select( "p_partkey", "p_brand", "p_container" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_partkey", "l_quantity", "l_extendedprice" ) diff --git a/examples/tpch/q18_large_volume_customer.py b/examples/tpch/q18_large_volume_customer.py index 10c5f6e6a..497615499 100644 --- a/examples/tpch/q18_large_volume_customer.py +++ b/examples/tpch/q18_large_volume_customer.py @@ -35,13 +35,13 @@ ctx = SessionContext() -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_custkey", "c_name" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_custkey", "o_orderdate", "o_totalprice" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_orderkey", "l_quantity", "l_extendedprice" ) @@ -57,7 +57,7 @@ df = df.join(df_orders, (["l_orderkey"], ["o_orderkey"]), "inner") df = df.join(df_customer, (["o_custkey"], ["c_custkey"]), "inner") -df = df.select_columns( +df = df.select( "c_name", "c_custkey", "o_orderkey", "o_orderdate", "o_totalprice", "total_quantity" ) diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index b15cd98bf..c2fe2570d 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -52,10 +52,10 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select( "p_partkey", "p_brand", "p_container", "p_size" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_partkey", "l_quantity", "l_shipmode", diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index 4ced7aaa1..3a0edb1ec 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -40,19 +40,17 @@ ctx = SessionContext() -df_part = ctx.read_parquet(get_data_path("part.parquet")).select_columns( - "p_partkey", "p_name" -) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_part = ctx.read_parquet(get_data_path("part.parquet")).select("p_partkey", "p_name") +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_shipdate", "l_partkey", "l_suppkey", "l_quantity" ) -df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select_columns( +df_partsupp = ctx.read_parquet(get_data_path("partsupp.parquet")).select( "ps_partkey", "ps_suppkey", "ps_availqty" ) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_address", "s_name", "s_nationkey" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name" ) @@ -91,7 +89,7 @@ df = df.join(df_nation, (["s_nationkey"], ["n_nationkey"]), "inner") # Restrict to the requested data per the problem statement -df = df.select_columns("s_name", "s_address").distinct() +df = df.select("s_name", "s_address").distinct() df = df.sort(col("s_name").sort()) diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index 6b1679e7d..d3d57acee 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -35,16 +35,16 @@ ctx = SessionContext() -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select( "o_orderkey", "o_orderstatus" ) -df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select_columns( +df_lineitem = ctx.read_parquet(get_data_path("lineitem.parquet")).select( "l_orderkey", "l_receiptdate", "l_commitdate", "l_suppkey" ) -df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select_columns( +df_supplier = ctx.read_parquet(get_data_path("supplier.parquet")).select( "s_suppkey", "s_name", "s_nationkey" ) -df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select_columns( +df_nation = ctx.read_parquet(get_data_path("nation.parquet")).select( "n_nationkey", "n_name" ) diff --git a/examples/tpch/q22_global_sales_opportunity.py b/examples/tpch/q22_global_sales_opportunity.py index 41fd5de9e..e6660e60c 100644 --- a/examples/tpch/q22_global_sales_opportunity.py +++ b/examples/tpch/q22_global_sales_opportunity.py @@ -35,12 +35,10 @@ ctx = SessionContext() -df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select_columns( +df_customer = ctx.read_parquet(get_data_path("customer.parquet")).select( "c_phone", "c_acctbal", "c_custkey" ) -df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select_columns( - "o_custkey" -) +df_orders = ctx.read_parquet(get_data_path("orders.parquet")).select("o_custkey") # The nation code is a two digit number, but we need to convert it to a string literal nation_codes = F.make_array(*[lit(str(n)) for n in NATION_CODES]) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index e4f8073d3..e59f00d9f 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -98,6 +98,9 @@ def schema(self) -> pa.Schema: """ return self.df.schema() + @deprecated( + "select_columns() is deprecated. Use :py:meth:`~DataFrame.select` instead" + ) def select_columns(self, *args: str) -> DataFrame: """Filter the DataFrame by columns. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index d73f5ebde..7b20e9e39 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -103,30 +103,28 @@ def partitioned_df(): def test_select(df): - df = df.select( + df_1 = df.select( column("a") + column("b"), column("a") - column("b"), ) # execute and collect the first (and only) batch - result = df.collect()[0] + result = df_1.collect()[0] assert result.column(0) == pa.array([5, 7, 9]) assert result.column(1) == pa.array([-3, -3, -3]) - -def test_select_mixed_expr_string(df): - df = df.select_columns(column("b"), "a") + df_2 = df.select("b", "a") # execute and collect the first (and only) batch - result = df.collect()[0] + result = df_2.collect()[0] assert result.column(0) == pa.array([4, 5, 6]) assert result.column(1) == pa.array([1, 2, 3]) -def test_select_columns(df): - df = df.select_columns("b", "a") +def test_select_mixed_expr_string(df): + df = df.select(column("b"), "a") # execute and collect the first (and only) batch result = df.collect()[0] From 7007e0239f0a0dbe863936b3937cb8a787971a59 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 12:47:48 -0500 Subject: [PATCH 013/248] build(deps): bump uuid from 1.10.0 to 1.11.0 (#927) Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.10.0 to 1.11.0. - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.10.0...1.11.0) --- updated-dependencies: - dependency-name: uuid dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 56c368f46..0835f219e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3406,9 +3406,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", "serde", diff --git a/Cargo.toml b/Cargo.toml index df72cd40a..073f82cf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ datafusion = { version = "42.0.0", features = ["pyarrow", "avro", "unicode_expre datafusion-substrait = { version = "42.0.0", optional = true } datafusion-proto = { version = "42.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` -uuid = { version = "1.9", features = ["v4"] } +uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" From 0bc2f31d8d9ac46e1732d1267cb8d83c38452f45 Mon Sep 17 00:00:00 2001 From: kosiew Date: Tue, 29 Oct 2024 00:03:00 +0800 Subject: [PATCH 014/248] Add array_empty (#931) --- .../user-guide/common-operations/expressions.rst | 15 ++++++++++++++- python/datafusion/functions.py | 6 ++++++ python/tests/test_functions.py | 4 ++++ src/functions.rs | 2 ++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/source/user-guide/common-operations/expressions.rst b/docs/source/user-guide/common-operations/expressions.rst index 6014c9d2e..77f3359f5 100644 --- a/docs/source/user-guide/common-operations/expressions.rst +++ b/docs/source/user-guide/common-operations/expressions.rst @@ -77,12 +77,25 @@ approaches. df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5, 6]]}) df.select(col("a")[0].alias("a0")) - .. warning:: Indexing an element of an array via ``[]`` starts at index 0 whereas :py:func:`~datafusion.functions.array_element` starts at index 1. +To check if an array is empty, you can use the function :py:func:`datafusion.functions.array_empty`. +This function returns a boolean indicating whether the array is empty. + +.. ipython:: python + + from datafusion import SessionContext, col + from datafusion.functions import array_empty + + ctx = SessionContext() + df = ctx.from_pydict({"a": [[], [1, 2, 3]]}) + df.select(array_empty(col("a")).alias("is_empty")) + +In this example, the `is_empty` column will contain `True` for the first row and `False` for the second row. + Structs ------- diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 727321979..570a6ce5e 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -51,6 +51,7 @@ "array_dims", "array_distinct", "array_element", + "array_empty", "array_except", "array_extract", "array_has", @@ -1160,6 +1161,11 @@ def array_element(array: Expr, n: Expr) -> Expr: return Expr(f.array_element(array.expr, n.expr)) +def array_empty(array: Expr) -> Expr: + """Returns a boolean indicating whether the array is empty.""" + return Expr(f.array_empty(array.expr)) + + def array_extract(array: Expr, n: Expr) -> Expr: """Extracts the element with the index n from the array. diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 9353f872d..e6fd41d8b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -309,6 +309,10 @@ def py_flatten(arr): lambda col: f.array_element(col, literal(1)), lambda data: [r[0] for r in data], ], + [ + lambda col: f.array_empty(col), + lambda data: [len(r) == 0 for r in data], + ], [ lambda col: f.array_extract(col, literal(1)), lambda data: [r[0] for r in data], diff --git a/src/functions.rs b/src/functions.rs index 24d33af39..4facb6cf7 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -572,6 +572,7 @@ array_fn!(array_to_string, array delimiter); array_fn!(array_dims, array); array_fn!(array_distinct, array); array_fn!(array_element, array element); +array_fn!(array_empty, array); array_fn!(array_length, array); array_fn!(array_has, first_array second_array); array_fn!(array_has_all, first_array second_array); @@ -1003,6 +1004,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_dims))?; m.add_wrapped(wrap_pyfunction!(array_distinct))?; m.add_wrapped(wrap_pyfunction!(array_element))?; + m.add_wrapped(wrap_pyfunction!(array_empty))?; m.add_wrapped(wrap_pyfunction!(array_length))?; m.add_wrapped(wrap_pyfunction!(array_has))?; m.add_wrapped(wrap_pyfunction!(array_has_all))?; From e015482750e9e08bd426bfcf649445d53705c51a Mon Sep 17 00:00:00 2001 From: kosiew Date: Tue, 29 Oct 2024 18:16:50 +0800 Subject: [PATCH 015/248] feat: add `cardinality` function to calculate total elements in an array (#937) --- .../common-operations/expressions.rst | 14 ++++++++++++++ python/datafusion/functions.py | 6 ++++++ python/tests/test_functions.py | 18 ++++++++++++++++++ src/functions.rs | 2 ++ 4 files changed, 40 insertions(+) diff --git a/docs/source/user-guide/common-operations/expressions.rst b/docs/source/user-guide/common-operations/expressions.rst index 77f3359f5..23430d359 100644 --- a/docs/source/user-guide/common-operations/expressions.rst +++ b/docs/source/user-guide/common-operations/expressions.rst @@ -96,6 +96,20 @@ This function returns a boolean indicating whether the array is empty. In this example, the `is_empty` column will contain `True` for the first row and `False` for the second row. +To get the total number of elements in an array, you can use the function :py:func:`datafusion.functions.cardinality`. +This function returns an integer indicating the total number of elements in the array. + +.. ipython:: python + + from datafusion import SessionContext, col + from datafusion.functions import cardinality + + ctx = SessionContext() + df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5, 6]]}) + df.select(cardinality(col("a")).alias("num_elements")) + +In this example, the `num_elements` column will contain `3` for both rows. + Structs ------- diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 570a6ce5e..e67ba4ae4 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -132,6 +132,7 @@ "find_in_set", "first_value", "flatten", + "cardinality", "floor", "from_unixtime", "gcd", @@ -1516,6 +1517,11 @@ def flatten(array: Expr) -> Expr: return Expr(f.flatten(array.expr)) +def cardinality(array: Expr) -> Expr: + """Returns the total number of elements in the array.""" + return Expr(f.cardinality(array.expr)) + + # aggregate functions def approx_distinct( expression: Expr, diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index e6fd41d8b..37943e57c 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -540,6 +540,24 @@ def test_array_function_flatten(): ) +def test_array_function_cardinality(): + data = [[1, 2, 3], [4, 4, 5, 6]] + ctx = SessionContext() + batch = pa.RecordBatch.from_arrays([np.array(data, dtype=object)], names=["arr"]) + df = ctx.create_dataframe([[batch]]) + + stmt = f.cardinality(column("arr")) + py_expr = [len(arr) for arr in data] # Expected lengths: [3, 3] + # assert py_expr lengths + + query_result = df.select(stmt).collect()[0].column(0) + + for a, b in zip(query_result, py_expr): + np.testing.assert_array_equal( + np.array([a.as_py()], dtype=int), np.array([b], dtype=int) + ) + + @pytest.mark.parametrize( ("stmt", "py_expr"), [ diff --git a/src/functions.rs b/src/functions.rs index 4facb6cf7..fe3531ba9 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -594,6 +594,7 @@ array_fn!(array_intersect, first_array second_array); array_fn!(array_union, array1 array2); array_fn!(array_except, first_array second_array); array_fn!(array_resize, array size value); +array_fn!(cardinality, array); array_fn!(flatten, array); array_fn!(range, start stop step); @@ -1030,6 +1031,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(array_sort))?; m.add_wrapped(wrap_pyfunction!(array_slice))?; m.add_wrapped(wrap_pyfunction!(flatten))?; + m.add_wrapped(wrap_pyfunction!(cardinality))?; // Window Functions m.add_wrapped(wrap_pyfunction!(lead))?; From aedffe0d8a0522fa21a7b545aa885750f32fc218 Mon Sep 17 00:00:00 2001 From: kosiew Date: Fri, 1 Nov 2024 19:42:57 +0800 Subject: [PATCH 016/248] Add empty scalar function (alias of array_empty), fix a small typo (#938) * feat: add `empty` function as alias of array_empty * fix: correct typo in null_treatment parameter documentation --- .../user-guide/common-operations/expressions.rst | 2 +- python/datafusion/functions.py | 12 +++++++++--- python/tests/test_functions.py | 4 ++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/source/user-guide/common-operations/expressions.rst b/docs/source/user-guide/common-operations/expressions.rst index 23430d359..b2a83c89f 100644 --- a/docs/source/user-guide/common-operations/expressions.rst +++ b/docs/source/user-guide/common-operations/expressions.rst @@ -82,7 +82,7 @@ approaches. Indexing an element of an array via ``[]`` starts at index 0 whereas :py:func:`~datafusion.functions.array_element` starts at index 1. -To check if an array is empty, you can use the function :py:func:`datafusion.functions.array_empty`. +To check if an array is empty, you can use the function :py:func:`datafusion.functions.array_empty` or `datafusion.functions.empty`. This function returns a boolean indicating whether the array is empty. .. ipython:: python diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index e67ba4ae4..907f801af 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -125,6 +125,7 @@ "decode", "degrees", "digest", + "empty", "encode", "ends_with", "exp", @@ -1522,6 +1523,11 @@ def cardinality(array: Expr) -> Expr: return Expr(f.cardinality(array.expr)) +def empty(array: Expr) -> Expr: + """This is an alias for :py:func:`array_empty`.""" + return array_empty(array) + + # aggregate functions def approx_distinct( expression: Expr, @@ -2140,7 +2146,7 @@ def first_value( expression: Argument to perform bitwise calculation on filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate - null_treatment: Assign whether to respect or ignull null values. + null_treatment: Assign whether to respect or ignore null values. """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2172,7 +2178,7 @@ def last_value( expression: Argument to perform bitwise calculation on filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate - null_treatment: Assign whether to respect or ignull null values. + null_treatment: Assign whether to respect or ignore null values. """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None @@ -2206,7 +2212,7 @@ def nth_value( n: Index of value to return. Starts at 1. filter: If provided, only compute against rows for which the filter is True order_by: Set the ordering of the expression to evaluate - null_treatment: Assign whether to respect or ignull null values. + null_treatment: Assign whether to respect or ignore null values. """ order_by_raw = sort_list_to_raw_sort_list(order_by) filter_raw = filter.expr if filter is not None else None diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 37943e57c..c65c633a4 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -313,6 +313,10 @@ def py_flatten(arr): lambda col: f.array_empty(col), lambda data: [len(r) == 0 for r in data], ], + [ + lambda col: f.empty(col), + lambda data: [len(r) == 0 for r in data], + ], [ lambda col: f.array_extract(col, literal(1)), lambda data: [r[0] for r in data], From cbe28cb4bb53c26940f5c020981592141030a324 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Tue, 5 Nov 2024 08:35:45 -0800 Subject: [PATCH 017/248] README How to develop section now also works on Apple M1 (#940) --- README.md | 2 + conda/environments/datafusion-cuda-dev.yaml | 44 +++++++++++++++++++++ conda/environments/datafusion-dev.yaml | 3 -- 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 conda/environments/datafusion-cuda-dev.yaml diff --git a/README.md b/README.md index b1d5397ef..83b307e7a 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,8 @@ conda env create -f ./conda/environments/datafusion-dev.yaml -n datafusion-dev conda activate datafusion-dev ``` +Or alternatively, if you are on an OS that supports CUDA Toolkit, you can use `-f ./conda/environments/datafusion-cuda-dev.yaml`. + Bootstrap (Pip): ```bash diff --git a/conda/environments/datafusion-cuda-dev.yaml b/conda/environments/datafusion-cuda-dev.yaml new file mode 100644 index 000000000..1f6f23942 --- /dev/null +++ b/conda/environments/datafusion-cuda-dev.yaml @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +channels: + - conda-forge +dependencies: + - black + - flake8 + - isort + - maturin>=1.5.1 + - mypy + - numpy + - pyarrow>=11.0.0 + - pytest + - toml + - importlib_metadata + - python>=3.10 + # Packages useful for building distributions and releasing + - mamba + - conda-build + - anaconda-client + # Packages for documentation building + - sphinx + - pydata-sphinx-theme==0.8.0 + - myst-parser + - jinja2 + # GPU packages + - cudf + - cudatoolkit=11.8 +name: datafusion-dev diff --git a/conda/environments/datafusion-dev.yaml b/conda/environments/datafusion-dev.yaml index 1f6f23942..b4b503dc6 100644 --- a/conda/environments/datafusion-dev.yaml +++ b/conda/environments/datafusion-dev.yaml @@ -38,7 +38,4 @@ dependencies: - pydata-sphinx-theme==0.8.0 - myst-parser - jinja2 - # GPU packages - - cudf - - cudatoolkit=11.8 name: datafusion-dev From 4a6c4d129af3e1eb207f64ee84285419afb26876 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:06:27 +0100 Subject: [PATCH 018/248] refactor: dataframe `join` params (#912) * refactor: dataframe join params * chore: add description for on params * fix type * chore: change join param * chore: update join params in tpch * oops * chore: final change * Add support for join_keys as a positional argument --------- Co-authored-by: Tim Saucer --- .../user-guide/common-operations/joins.rst | 10 +- examples/tpch/_tests.py | 6 +- examples/tpch/q02_minimum_cost_supplier.py | 12 ++- examples/tpch/q03_shipping_priority.py | 6 +- examples/tpch/q04_order_priority_checking.py | 4 +- examples/tpch/q05_local_supplier_volume.py | 13 ++- examples/tpch/q07_volume_shipping.py | 12 ++- examples/tpch/q08_market_share.py | 14 +-- .../tpch/q09_product_type_profit_measure.py | 13 ++- examples/tpch/q10_returned_item_reporting.py | 6 +- .../q11_important_stock_identification.py | 6 +- examples/tpch/q12_ship_mode_order_priority.py | 2 +- examples/tpch/q13_customer_distribution.py | 4 +- examples/tpch/q14_promotion_effect.py | 4 +- examples/tpch/q15_top_supplier.py | 2 +- .../tpch/q16_part_supplier_relationship.py | 6 +- examples/tpch/q17_small_quantity_order.py | 2 +- examples/tpch/q18_large_volume_customer.py | 4 +- examples/tpch/q19_discounted_revenue.py | 2 +- examples/tpch/q20_potential_part_promotion.py | 11 ++- .../tpch/q21_suppliers_kept_orders_waiting.py | 8 +- examples/tpch/q22_global_sales_opportunity.py | 2 +- python/datafusion/dataframe.py | 95 +++++++++++++++++-- python/tests/test_dataframe.py | 66 ++++++++++++- src/dataframe.rs | 15 +-- 25 files changed, 240 insertions(+), 85 deletions(-) diff --git a/docs/source/user-guide/common-operations/joins.rst b/docs/source/user-guide/common-operations/joins.rst index 09fa145a7..40d922150 100644 --- a/docs/source/user-guide/common-operations/joins.rst +++ b/docs/source/user-guide/common-operations/joins.rst @@ -56,7 +56,7 @@ will be included in the resulting DataFrame. .. ipython:: python - left.join(right, join_keys=(["customer_id"], ["id"]), how="inner") + left.join(right, left_on="customer_id", right_on="id", how="inner") The parameter ``join_keys`` specifies the columns from the left DataFrame and right DataFrame that contains the values that should match. @@ -70,7 +70,7 @@ values for the corresponding columns. .. ipython:: python - left.join(right, join_keys=(["customer_id"], ["id"]), how="left") + left.join(right, left_on="customer_id", right_on="id", how="left") Full Join --------- @@ -80,7 +80,7 @@ is no match. Unmatched rows will have null values. .. ipython:: python - left.join(right, join_keys=(["customer_id"], ["id"]), how="full") + left.join(right, left_on="customer_id", right_on="id", how="full") Left Semi Join -------------- @@ -90,7 +90,7 @@ omitting duplicates with multiple matches in the right table. .. ipython:: python - left.join(right, join_keys=(["customer_id"], ["id"]), how="semi") + left.join(right, left_on="customer_id", right_on="id", how="semi") Left Anti Join -------------- @@ -101,4 +101,4 @@ the right table. .. ipython:: python - left.join(right, join_keys=(["customer_id"], ["id"]), how="anti") \ No newline at end of file + left.join(right, left_on="customer_id", right_on="id", how="anti") \ No newline at end of file diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 903b53548..13144ae9d 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -18,7 +18,7 @@ import pytest from importlib import import_module import pyarrow as pa -from datafusion import col, lit, functions as F +from datafusion import DataFrame, col, lit, functions as F from util import get_answer_file @@ -94,7 +94,7 @@ def check_q17(df): ) def test_tpch_query_vs_answer_file(query_code: str, answer_file: str): module = import_module(query_code) - df = module.df + df: DataFrame = module.df # Treat q17 as a special case. The answer file does not match the spec. # Running at scale factor 1, we have manually verified this result does @@ -121,5 +121,5 @@ def test_tpch_query_vs_answer_file(query_code: str, answer_file: str): cols = list(read_schema.names) - assert df.join(df_expected, (cols, cols), "anti").count() == 0 + assert df.join(df_expected, on=cols, how="anti").count() == 0 assert df.count() == df_expected.count() diff --git a/examples/tpch/q02_minimum_cost_supplier.py b/examples/tpch/q02_minimum_cost_supplier.py index 2440fdad6..c4ccf8ad3 100644 --- a/examples/tpch/q02_minimum_cost_supplier.py +++ b/examples/tpch/q02_minimum_cost_supplier.py @@ -80,16 +80,20 @@ # Now that we have the region, find suppliers in that region. Suppliers are tied to their nation # and nations are tied to the region. -df_nation = df_nation.join(df_region, (["n_regionkey"], ["r_regionkey"]), how="inner") +df_nation = df_nation.join( + df_region, left_on=["n_regionkey"], right_on=["r_regionkey"], how="inner" +) df_supplier = df_supplier.join( - df_nation, (["s_nationkey"], ["n_nationkey"]), how="inner" + df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner" ) # Now that we know who the potential suppliers are for the part, we can limit out part # supplies table down. We can further join down to the specific parts we've identified # as matching the request -df = df_partsupp.join(df_supplier, (["ps_suppkey"], ["s_suppkey"]), how="inner") +df = df_partsupp.join( + df_supplier, left_on=["ps_suppkey"], right_on=["s_suppkey"], how="inner" +) # Locate the minimum cost across all suppliers. There are multiple ways you could do this, # but one way is to create a window function across all suppliers, find the minimum, and @@ -111,7 +115,7 @@ df = df.filter(col("min_cost") == col("ps_supplycost")) -df = df.join(df_part, (["ps_partkey"], ["p_partkey"]), how="inner") +df = df.join(df_part, left_on=["ps_partkey"], right_on=["p_partkey"], how="inner") # From the problem statement, these are the values we wish to output diff --git a/examples/tpch/q03_shipping_priority.py b/examples/tpch/q03_shipping_priority.py index c4e8f461a..5ebab13c0 100644 --- a/examples/tpch/q03_shipping_priority.py +++ b/examples/tpch/q03_shipping_priority.py @@ -55,9 +55,9 @@ # Join all 3 dataframes -df = df_customer.join(df_orders, (["c_custkey"], ["o_custkey"]), how="inner").join( - df_lineitem, (["o_orderkey"], ["l_orderkey"]), how="inner" -) +df = df_customer.join( + df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner" +).join(df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner") # Compute the revenue diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index f10b74d91..8bf02cb83 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -66,7 +66,9 @@ ) # Perform the join to find only orders for which there are lineitems outside of expected range -df = df_orders.join(df_lineitem, (["o_orderkey"], ["l_orderkey"]), how="inner") +df = df_orders.join( + df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner" +) # Based on priority, find the number of entries df = df.aggregate( diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index 2a83d2d1a..413a4acb9 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -76,15 +76,18 @@ # Join all the dataframes df = ( - df_customer.join(df_orders, (["c_custkey"], ["o_custkey"]), how="inner") - .join(df_lineitem, (["o_orderkey"], ["l_orderkey"]), how="inner") + df_customer.join( + df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner" + ) + .join(df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner") .join( df_supplier, - (["l_suppkey", "c_nationkey"], ["s_suppkey", "s_nationkey"]), + left_on=["l_suppkey", "c_nationkey"], + right_on=["s_suppkey", "s_nationkey"], how="inner", ) - .join(df_nation, (["s_nationkey"], ["n_nationkey"]), how="inner") - .join(df_region, (["n_regionkey"], ["r_regionkey"]), how="inner") + .join(df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner") + .join(df_region, left_on=["n_regionkey"], right_on=["r_regionkey"], how="inner") ) # Compute the final result diff --git a/examples/tpch/q07_volume_shipping.py b/examples/tpch/q07_volume_shipping.py index a1d7d81ad..18c290d9c 100644 --- a/examples/tpch/q07_volume_shipping.py +++ b/examples/tpch/q07_volume_shipping.py @@ -90,20 +90,22 @@ # Limit suppliers to either nation df_supplier = df_supplier.join( - df_nation, (["s_nationkey"], ["n_nationkey"]), how="inner" + df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner" ).select(col("s_suppkey"), col("n_name").alias("supp_nation")) # Limit customers to either nation df_customer = df_customer.join( - df_nation, (["c_nationkey"], ["n_nationkey"]), how="inner" + df_nation, left_on=["c_nationkey"], right_on=["n_nationkey"], how="inner" ).select(col("c_custkey"), col("n_name").alias("cust_nation")) # Join up all the data frames from line items, and make sure the supplier and customer are in # different nations. df = ( - df_lineitem.join(df_orders, (["l_orderkey"], ["o_orderkey"]), how="inner") - .join(df_customer, (["o_custkey"], ["c_custkey"]), how="inner") - .join(df_supplier, (["l_suppkey"], ["s_suppkey"]), how="inner") + df_lineitem.join( + df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner" + ) + .join(df_customer, left_on=["o_custkey"], right_on=["c_custkey"], how="inner") + .join(df_supplier, left_on=["l_suppkey"], right_on=["s_suppkey"], how="inner") .filter(col("cust_nation") != col("supp_nation")) ) diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index 95fc0a871..7138ab65a 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -89,27 +89,27 @@ # After this join we have all of the possible sales nations df_regional_customers = df_regional_customers.join( - df_nation, (["r_regionkey"], ["n_regionkey"]), how="inner" + df_nation, left_on=["r_regionkey"], right_on=["n_regionkey"], how="inner" ) # Now find the possible customers df_regional_customers = df_regional_customers.join( - df_customer, (["n_nationkey"], ["c_nationkey"]), how="inner" + df_customer, left_on=["n_nationkey"], right_on=["c_nationkey"], how="inner" ) # Next find orders for these customers df_regional_customers = df_regional_customers.join( - df_orders, (["c_custkey"], ["o_custkey"]), how="inner" + df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="inner" ) # Find all line items from these orders df_regional_customers = df_regional_customers.join( - df_lineitem, (["o_orderkey"], ["l_orderkey"]), how="inner" + df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner" ) # Limit to the part of interest df_regional_customers = df_regional_customers.join( - df_part, (["l_partkey"], ["p_partkey"]), how="inner" + df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="inner" ) # Compute the volume for each line item @@ -126,7 +126,7 @@ # Determine the suppliers by the limited nation key we have in our single row df above df_national_suppliers = df_national_suppliers.join( - df_supplier, (["n_nationkey"], ["s_nationkey"]), how="inner" + df_supplier, left_on=["n_nationkey"], right_on=["s_nationkey"], how="inner" ) # When we join to the customer dataframe, we don't want to confuse other columns, so only @@ -141,7 +141,7 @@ # column only from suppliers in the nation we are evaluating. df = df_regional_customers.join( - df_national_suppliers, (["l_suppkey"], ["s_suppkey"]), how="left" + df_national_suppliers, left_on=["l_suppkey"], right_on=["s_suppkey"], how="left" ) # Use a case statement to compute the volume sold by suppliers in the nation of interest diff --git a/examples/tpch/q09_product_type_profit_measure.py b/examples/tpch/q09_product_type_profit_measure.py index 0295d3025..aa47d76c0 100644 --- a/examples/tpch/q09_product_type_profit_measure.py +++ b/examples/tpch/q09_product_type_profit_measure.py @@ -65,13 +65,16 @@ df = df_part.filter(F.strpos(col("p_name"), part_color) > lit(0)) # We have a series of joins that get us to limit down to the line items we need -df = df.join(df_lineitem, (["p_partkey"], ["l_partkey"]), how="inner") -df = df.join(df_supplier, (["l_suppkey"], ["s_suppkey"]), how="inner") -df = df.join(df_orders, (["l_orderkey"], ["o_orderkey"]), how="inner") +df = df.join(df_lineitem, left_on=["p_partkey"], right_on=["l_partkey"], how="inner") +df = df.join(df_supplier, left_on=["l_suppkey"], right_on=["s_suppkey"], how="inner") +df = df.join(df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner") df = df.join( - df_partsupp, (["l_suppkey", "l_partkey"], ["ps_suppkey", "ps_partkey"]), how="inner" + df_partsupp, + left_on=["l_suppkey", "l_partkey"], + right_on=["ps_suppkey", "ps_partkey"], + how="inner", ) -df = df.join(df_nation, (["s_nationkey"], ["n_nationkey"]), how="inner") +df = df.join(df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner") # Compute the intermediate values and limit down to the expressions we need df = df.select( diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index 25f81b2ff..94b398c1d 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -74,7 +74,7 @@ col("o_orderdate") < date_start_of_quarter + interval_one_quarter ) -df = df.join(df_lineitem, (["o_orderkey"], ["l_orderkey"]), how="inner") +df = df.join(df_lineitem, left_on=["o_orderkey"], right_on=["l_orderkey"], how="inner") # Compute the revenue df = df.aggregate( @@ -83,8 +83,8 @@ ) # Now join in the customer data -df = df.join(df_customer, (["o_custkey"], ["c_custkey"]), how="inner") -df = df.join(df_nation, (["c_nationkey"], ["n_nationkey"]), how="inner") +df = df.join(df_customer, left_on=["o_custkey"], right_on=["c_custkey"], how="inner") +df = df.join(df_nation, left_on=["c_nationkey"], right_on=["n_nationkey"], how="inner") # These are the columns the problem statement requires df = df.select( diff --git a/examples/tpch/q11_important_stock_identification.py b/examples/tpch/q11_important_stock_identification.py index 86ff2296b..707265e16 100644 --- a/examples/tpch/q11_important_stock_identification.py +++ b/examples/tpch/q11_important_stock_identification.py @@ -52,9 +52,11 @@ # Find part supplies of within this target nation -df = df_nation.join(df_supplier, (["n_nationkey"], ["s_nationkey"]), how="inner") +df = df_nation.join( + df_supplier, left_on=["n_nationkey"], right_on=["s_nationkey"], how="inner" +) -df = df.join(df_partsupp, (["s_suppkey"], ["ps_suppkey"]), how="inner") +df = df.join(df_partsupp, left_on=["s_suppkey"], right_on=["ps_suppkey"], how="inner") # Compute the value of individual parts diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index c3fc0d2e9..def2a6c30 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -75,7 +75,7 @@ # We need order priority, so join order df to line item -df = df.join(df_orders, (["l_orderkey"], ["o_orderkey"]), how="inner") +df = df.join(df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner") # Restrict to line items we care about based on the problem statement. df = df.filter(col("l_commitdate") < col("l_receiptdate")) diff --git a/examples/tpch/q13_customer_distribution.py b/examples/tpch/q13_customer_distribution.py index f8b6c139d..67365a96a 100644 --- a/examples/tpch/q13_customer_distribution.py +++ b/examples/tpch/q13_customer_distribution.py @@ -49,7 +49,9 @@ ) # Since we may have customers with no orders we must do a left join -df = df_customer.join(df_orders, (["c_custkey"], ["o_custkey"]), how="left") +df = df_customer.join( + df_orders, left_on=["c_custkey"], right_on=["o_custkey"], how="left" +) # Find the number of orders for each customer df = df.aggregate([col("c_custkey")], [F.count(col("o_custkey")).alias("c_count")]) diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index 8224136ad..cd26ee2bd 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -57,7 +57,9 @@ ) # Left join so we can sum up the promo parts different from other parts -df = df_lineitem.join(df_part, (["l_partkey"], ["p_partkey"]), "left") +df = df_lineitem.join( + df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="left" +) # Make a factor of 1.0 if it is a promotion, 0.0 otherwise df = df.with_column("promo_factor", F.coalesce(col("promo_factor"), lit(0.0))) diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index 44d5dd997..0bc316f7a 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -76,7 +76,7 @@ # Now that we know the supplier(s) with maximum revenue, get the rest of their information # from the supplier table -df = df.join(df_supplier, (["l_suppkey"], ["s_suppkey"]), "inner") +df = df.join(df_supplier, left_on=["l_suppkey"], right_on=["s_suppkey"], how="inner") # Return only the columns requested df = df.select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue") diff --git a/examples/tpch/q16_part_supplier_relationship.py b/examples/tpch/q16_part_supplier_relationship.py index cbdd9989a..a6a0c43eb 100644 --- a/examples/tpch/q16_part_supplier_relationship.py +++ b/examples/tpch/q16_part_supplier_relationship.py @@ -56,7 +56,7 @@ # Remove unwanted suppliers df_partsupp = df_partsupp.join( - df_unwanted_suppliers, (["ps_suppkey"], ["s_suppkey"]), "anti" + df_unwanted_suppliers, left_on=["ps_suppkey"], right_on=["s_suppkey"], how="anti" ) # Select the parts we are interested in @@ -73,7 +73,9 @@ p_sizes = F.make_array(*[lit(s).cast(pa.int32()) for s in SIZES_OF_INTEREST]) df_part = df_part.filter(~F.array_position(p_sizes, col("p_size")).is_null()) -df = df_part.join(df_partsupp, (["p_partkey"], ["ps_partkey"]), "inner") +df = df_part.join( + df_partsupp, left_on=["p_partkey"], right_on=["ps_partkey"], how="inner" +) df = df.select("p_brand", "p_type", "p_size", "ps_suppkey").distinct() diff --git a/examples/tpch/q17_small_quantity_order.py b/examples/tpch/q17_small_quantity_order.py index ff494279b..d7b43d498 100644 --- a/examples/tpch/q17_small_quantity_order.py +++ b/examples/tpch/q17_small_quantity_order.py @@ -51,7 +51,7 @@ ) # Combine data -df = df.join(df_lineitem, (["p_partkey"], ["l_partkey"]), "inner") +df = df.join(df_lineitem, left_on=["p_partkey"], right_on=["l_partkey"], how="inner") # Find the average quantity window_frame = WindowFrame("rows", None, None) diff --git a/examples/tpch/q18_large_volume_customer.py b/examples/tpch/q18_large_volume_customer.py index 497615499..165fce033 100644 --- a/examples/tpch/q18_large_volume_customer.py +++ b/examples/tpch/q18_large_volume_customer.py @@ -54,8 +54,8 @@ # We've identified the orders of interest, now join the additional data # we are required to report on -df = df.join(df_orders, (["l_orderkey"], ["o_orderkey"]), "inner") -df = df.join(df_customer, (["o_custkey"], ["c_custkey"]), "inner") +df = df.join(df_orders, left_on=["l_orderkey"], right_on=["o_orderkey"], how="inner") +df = df.join(df_customer, left_on=["o_custkey"], right_on=["c_custkey"], how="inner") df = df.select( "c_name", "c_custkey", "o_orderkey", "o_orderdate", "o_totalprice", "total_quantity" diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index c2fe2570d..4aed0cbae 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -72,7 +72,7 @@ (col("l_shipmode") == lit("AIR")) | (col("l_shipmode") == lit("AIR REG")) ) -df = df.join(df_part, (["l_partkey"], ["p_partkey"]), "inner") +df = df.join(df_part, left_on=["l_partkey"], right_on=["p_partkey"], how="inner") # Create the user defined function (UDF) definition that does the work diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index 3a0edb1ec..d720cdce6 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -70,7 +70,7 @@ ) # This will filter down the line items to the parts of interest -df = df.join(df_part, (["l_partkey"], ["p_partkey"]), "inner") +df = df.join(df_part, left_on="l_partkey", right_on="p_partkey", how="inner") # Compute the total sold and limit ourselves to individual supplier/part combinations df = df.aggregate( @@ -78,15 +78,18 @@ ) df = df.join( - df_partsupp, (["l_partkey", "l_suppkey"], ["ps_partkey", "ps_suppkey"]), "inner" + df_partsupp, + left_on=["l_partkey", "l_suppkey"], + right_on=["ps_partkey", "ps_suppkey"], + how="inner", ) # Find cases of excess quantity df.filter(col("ps_availqty") > lit(0.5) * col("total_sold")) # We could do these joins earlier, but now limit to the nation of interest suppliers -df = df.join(df_supplier, (["ps_suppkey"], ["s_suppkey"]), "inner") -df = df.join(df_nation, (["s_nationkey"], ["n_nationkey"]), "inner") +df = df.join(df_supplier, left_on=["ps_suppkey"], right_on=["s_suppkey"], how="inner") +df = df.join(df_nation, left_on=["s_nationkey"], right_on=["n_nationkey"], how="inner") # Restrict to the requested data per the problem statement df = df.select("s_name", "s_address").distinct() diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index d3d57acee..27cf816fa 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -52,13 +52,13 @@ df_suppliers_of_interest = df_nation.filter(col("n_name") == lit(NATION_OF_INTEREST)) df_suppliers_of_interest = df_suppliers_of_interest.join( - df_supplier, (["n_nationkey"], ["s_nationkey"]), "inner" + df_supplier, left_on="n_nationkey", right_on="s_nationkey", how="inner" ) # Find the failed orders and all their line items df = df_orders.filter(col("o_orderstatus") == lit("F")) -df = df_lineitem.join(df, (["l_orderkey"], ["o_orderkey"]), "inner") +df = df_lineitem.join(df, left_on="l_orderkey", right_on="o_orderkey", how="inner") # Identify the line items for which the order is failed due to. df = df.with_column( @@ -102,7 +102,9 @@ ) # Join to the supplier of interest list for the nation of interest -df = df.join(df_suppliers_of_interest, (["suppkey"], ["s_suppkey"]), "inner") +df = df.join( + df_suppliers_of_interest, left_on=["suppkey"], right_on=["s_suppkey"], how="inner" +) # Count how many orders that supplier is the only failed supplier for df = df.aggregate([col("s_name")], [F.count(col("o_orderkey")).alias("numwait")]) diff --git a/examples/tpch/q22_global_sales_opportunity.py b/examples/tpch/q22_global_sales_opportunity.py index e6660e60c..72dce5289 100644 --- a/examples/tpch/q22_global_sales_opportunity.py +++ b/examples/tpch/q22_global_sales_opportunity.py @@ -62,7 +62,7 @@ df = df.filter(col("c_acctbal") > col("avg_balance")) # Limit results to customers with no orders -df = df.join(df_orders, (["c_custkey"], ["o_custkey"]), "anti") +df = df.join(df_orders, left_on="c_custkey", right_on="o_custkey", how="anti") # Count up the customers and the balances df = df.aggregate( diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index e59f00d9f..efd4038ae 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -20,9 +20,8 @@ """ from __future__ import annotations - - -from typing import Any, Iterable, List, Literal, TYPE_CHECKING +import warnings +from typing import Any, Iterable, List, TYPE_CHECKING, Literal, overload from datafusion.record_batch import RecordBatchStream from typing_extensions import deprecated from datafusion.plan import LogicalPlan, ExecutionPlan @@ -32,7 +31,7 @@ import pandas as pd import polars as pl import pathlib - from typing import Callable + from typing import Callable, Sequence from datafusion._internal import DataFrame as DataFrameInternal from datafusion.expr import Expr, SortExpr, sort_or_default @@ -368,27 +367,105 @@ def distinct(self) -> DataFrame: """ return DataFrame(self.df.distinct()) + @overload + def join( + self, + right: DataFrame, + on: str | Sequence[str], + how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", + *, + left_on: None = None, + right_on: None = None, + join_keys: None = None, + ) -> DataFrame: ... + + @overload def join( self, right: DataFrame, + on: None = None, + how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", + *, + left_on: str | Sequence[str], + right_on: str | Sequence[str], + join_keys: tuple[list[str], list[str]] | None = None, + ) -> DataFrame: ... + + @overload + def join( + self, + right: DataFrame, + on: None = None, + how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", + *, join_keys: tuple[list[str], list[str]], - how: str, + left_on: None = None, + right_on: None = None, + ) -> DataFrame: ... + + def join( + self, + right: DataFrame, + on: str | Sequence[str] | tuple[list[str], list[str]] | None = None, + how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", + *, + left_on: str | Sequence[str] | None = None, + right_on: str | Sequence[str] | None = None, + join_keys: tuple[list[str], list[str]] | None = None, ) -> DataFrame: """Join this :py:class:`DataFrame` with another :py:class:`DataFrame`. - Join keys are a pair of lists of column names in the left and right - dataframes, respectively. These lists must have the same length. + `on` has to be provided or both `left_on` and `right_on` in conjunction. Args: right: Other DataFrame to join with. - join_keys: Tuple of two lists of column names to join on. + on: Column names to join on in both dataframes. how: Type of join to perform. Supported types are "inner", "left", "right", "full", "semi", "anti". + left_on: Join column of the left dataframe. + right_on: Join column of the right dataframe. + join_keys: Tuple of two lists of column names to join on. [Deprecated] Returns: DataFrame after join. """ - return DataFrame(self.df.join(right.df, join_keys, how)) + # This check is to prevent breaking API changes where users prior to + # DF 43.0.0 would pass the join_keys as a positional argument instead + # of a keyword argument. + if isinstance(on, tuple) and len(on) == 2: + if isinstance(on[0], list) and isinstance(on[1], list): + join_keys = on # type: ignore + on = None + + if join_keys is not None: + warnings.warn( + "`join_keys` is deprecated, use `on` or `left_on` with `right_on`", + category=DeprecationWarning, + stacklevel=2, + ) + left_on = join_keys[0] + right_on = join_keys[1] + + if on: + if left_on or right_on: + raise ValueError( + "`left_on` or `right_on` should not provided with `on`" + ) + left_on = on + right_on = on + elif left_on or right_on: + if left_on is None or right_on is None: + raise ValueError("`left_on` and `right_on` should both be provided.") + else: + raise ValueError( + "either `on` or `left_on` and `right_on` should be provided." + ) + if isinstance(left_on, str): + left_on = [left_on] + if isinstance(right_on, str): + right_on = [right_on] + + return DataFrame(self.df.join(right.df, how, left_on, right_on)) def join_on( self, diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 7b20e9e39..330475302 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -321,14 +321,72 @@ def test_join(): ) df1 = ctx.create_dataframe([[batch]], "r") - df = df.join(df1, join_keys=(["a"], ["a"]), how="inner") - df.show() - df = df.sort(column("l.a")) - table = pa.Table.from_batches(df.collect()) + df2 = df.join(df1, on="a", how="inner") + df2.show() + df2 = df2.sort(column("l.a")) + table = pa.Table.from_batches(df2.collect()) + + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + df2 = df.join(df1, left_on="a", right_on="a", how="inner") + df2.show() + df2 = df2.sort(column("l.a")) + table = pa.Table.from_batches(df2.collect()) expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} assert table.to_pydict() == expected + # Verify we don't make a breaking change to pre-43.0.0 + # where users would pass join_keys as a positional argument + df2 = df.join(df1, (["a"], ["a"]), how="inner") # type: ignore + df2.show() + df2 = df2.sort(column("l.a")) + table = pa.Table.from_batches(df2.collect()) + + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + +def test_join_invalid_params(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]], "l") + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2]), pa.array([8, 10])], + names=["a", "c"], + ) + df1 = ctx.create_dataframe([[batch]], "r") + + with pytest.deprecated_call(): + df2 = df.join(df1, join_keys=(["a"], ["a"]), how="inner") + df2.show() + df2 = df2.sort(column("l.a")) + table = pa.Table.from_batches(df2.collect()) + + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + with pytest.raises( + ValueError, match=r"`left_on` or `right_on` should not provided with `on`" + ): + df2 = df.join(df1, on="a", how="inner", right_on="test") # type: ignore + + with pytest.raises( + ValueError, match=r"`left_on` and `right_on` should both be provided." + ): + df2 = df.join(df1, left_on="a", how="inner") # type: ignore + + with pytest.raises( + ValueError, match=r"either `on` or `left_on` and `right_on` should be provided." + ): + df2 = df.join(df1, how="inner") # type: ignore + def test_join_on(): ctx = SessionContext() diff --git a/src/dataframe.rs b/src/dataframe.rs index dd5d89ce9..ee8fbbf9d 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -271,8 +271,9 @@ impl PyDataFrame { fn join( &self, right: PyDataFrame, - join_keys: (Vec, Vec), how: &str, + left_on: Vec, + right_on: Vec, ) -> PyResult { let join_type = match how { "inner" => JoinType::Inner, @@ -289,16 +290,8 @@ impl PyDataFrame { } }; - let left_keys = join_keys - .0 - .iter() - .map(|s| s.as_ref()) - .collect::>(); - let right_keys = join_keys - .1 - .iter() - .map(|s| s.as_ref()) - .collect::>(); + let left_keys = left_on.iter().map(|s| s.as_ref()).collect::>(); + let right_keys = right_on.iter().map(|s| s.as_ref()).collect::>(); let df = self.df.as_ref().clone().join( right.df.as_ref().clone(), From 3c662010f9d133cda65749b119a1b1731edbe4e5 Mon Sep 17 00:00:00 2001 From: Michael J Ward Date: Sun, 10 Nov 2024 11:02:01 -0600 Subject: [PATCH 019/248] Upgrade to Datafusion 43 (#905) * patch datafusion deps * migrate from deprecated RuntimeEnv::new to RuntimeEnv::try_new Ref: https://github.com/apache/datafusion/pull/12566 * remove Arc from create_udf call Ref: https://github.com/apache/datafusion/pull/12489 * doc typo * migrage new UnnestOptions API Ref: https://github.com/apache/datafusion/pull/12836/files * update API for logical expr Limit Ref: https://github.com/apache/datafusion/pull/12836 * remove logical expr CrossJoin It was removed upstream. Ref: https://github.com/apache/datafusion/pull/13076 * update PyWindowUDF Ref: https://github.com/apache/datafusion/issues/12803 * migrate window functions lead and lag to udwf Ref: https://github.com/apache/datafusion/issues/12802 * migrate window functions rank, dense_rank, and percent_rank to udwf Ref: https://github.com/apache/datafusion/issues/12648 * convert window function cume_dist to udwf Ref: https://github.com/apache/datafusion/issues/12695 * convert window function ntile to udwf Ref: https://github.com/apache/datafusion/issues/12694 * clean up functions_window invocation * Only one column was being passed to udwf * Update to DF 43.0.0 * Update tests to look for string_view type * String view is now the default type for strings * Making a variety of adjustments in wrappers and unit tests to account for the switch from string to string_view as default * Resolve errors in doc building --------- Co-authored-by: Tim Saucer --- Cargo.lock | 373 ++++++++++++++++++--------------- Cargo.toml | 9 +- examples/tpch/_tests.py | 4 +- python/datafusion/expr.py | 4 +- python/datafusion/functions.py | 11 +- python/datafusion/udf.py | 1 + python/tests/test_expr.py | 16 +- python/tests/test_functions.py | 67 ++++-- python/tests/test_imports.py | 2 - python/tests/test_sql.py | 7 + src/context.rs | 2 +- src/dataframe.rs | 8 +- src/expr.rs | 2 - src/expr/cross_join.rs | 94 --------- src/expr/limit.rs | 22 +- src/functions.rs | 18 +- src/sql/logical.rs | 3 +- src/udf.rs | 4 +- src/udwf.rs | 29 ++- 19 files changed, 338 insertions(+), 338 deletions(-) delete mode 100644 src/expr/cross_join.rs diff --git a/Cargo.lock b/Cargo.lock index 0835f219e..497c5b850 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,9 +84,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" [[package]] name = "apache-avro" @@ -130,9 +130,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9ba0d7248932f4e2a12fb37f0a2e3ec82b3bdedbac2a1dce186e036843b8f8c" +checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" dependencies = [ "arrow-arith", "arrow-array", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60afcdc004841a5c8d8da4f4fa22d64eb19c0c01ef4bcedd77f175a7cf6e38f" +checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f16835e8599dbbb1659fd869d865254c4cf32c6c2bb60b6942ac9fc36bfa5da" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" dependencies = [ "ahash", "arrow-buffer", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a1f34f0faae77da6b142db61deba2cb6d60167592b178be317b341440acba80" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" dependencies = [ "bytes", "half", @@ -195,9 +195,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "450e4abb5775bca0740bec0bcf1b1a5ae07eff43bd625661c4436d8e8e4540c4" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3a4e4d63830a341713e35d9a42452fbc6241d5f42fa5cf6a4681b8ad91370c4" +checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b1e618bbf714c7a9e8d97203c806734f012ff71ae3adc8ad1b075689f540634" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98e983549259a2b97049af7edfb8f28b8911682040e99a94e4ceb1196bd65c2" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b198b9c6fcf086501730efbbcb483317b39330a116125af7bb06467d04b352a3" +checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" dependencies = [ "arrow-array", "arrow-buffer", @@ -282,9 +282,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2427f37b4459a4b9e533045abe87a5183a5e0995a3fc2c2fd45027ae2cc4ef3f" +checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15959657d92e2261a7a323517640af87f5afd9fd8a6492e424ebee2203c567f6" +checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" dependencies = [ "ahash", "arrow-array", @@ -311,18 +311,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf0388a18fd7f7f3fe3de01852d30f54ed5182f9004db700fbe3ba843ed2794" +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b83e5723d307a38bf00ecd2972cd078d1339c7fd3eb044f609958a9a24463f3a" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" dependencies = [ "ahash", "arrow-array", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab3db7c09dd826e74079661d84ed01ed06547cf75d52c2818ef776d0d852305" +checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" dependencies = [ "arrow-array", "arrow-buffer", @@ -351,9 +351,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.13" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e614738943d3f68c628ae3dbce7c3daffb196665f82f8c8ea6b65de73c79429" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ "bzip2", "flate2", @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -515,9 +515,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "bzip2" @@ -542,9 +542,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.28" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" dependencies = [ "jobserver", "libc", @@ -557,6 +557,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -725,9 +731,9 @@ dependencies = [ [[package]] name = "dary_heap" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" [[package]] name = "dashmap" @@ -745,9 +751,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee907b081e45e1d14e1f327e89ef134f91fcebad0bfc2dc229fa9f6044379682" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" dependencies = [ "ahash", "apache-avro", @@ -804,9 +810,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2b914f6e33c429af7d8696c72a47ed9225d7e2b82c747ebdfa2408ed53579f" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" dependencies = [ "arrow-schema", "async-trait", @@ -819,9 +825,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a84f8e76330c582a6b8ada0b2c599ca46cfe46b7585e458fc3f4092bc722a18" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" dependencies = [ "ahash", "apache-avro", @@ -832,6 +838,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", + "indexmap", "instant", "libc", "num_cpus", @@ -845,9 +852,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf08cc30d92720d557df13bd5a5696213bd5ea0f38a866d8d85055d866fba774" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" dependencies = [ "log", "tokio", @@ -855,9 +862,9 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86bc4183d5c45b9f068a6f351678a0d1eb1225181424542bb75db18ec280b822" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" dependencies = [ "arrow", "chrono", @@ -876,9 +883,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202119ce58e4d103e37ae64aab40d4e574c97bdd2bea994bf307b175fcbfa74d" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" dependencies = [ "ahash", "arrow", @@ -888,7 +895,9 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap", "paste", "serde_json", "sqlparser", @@ -898,20 +907,21 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b181ce8569216abb01ef3294aa16c0a40d7d39350c2ff01ede00f167a535f2" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ "arrow", "datafusion-common", + "itertools", "paste", ] [[package]] name = "datafusion-functions" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4124b8066444e05a24472f852e94cf56546c0f4d92d00f018f207216902712" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ "arrow", "arrow-buffer", @@ -936,9 +946,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94acdac235ea21810150a89751617ef2db7e32eba27f54be48a81bde2bfe119" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" dependencies = [ "ahash", "arrow", @@ -950,16 +960,16 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "half", + "indexmap", "log", "paste", - "sqlparser", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c9ea085bbf900bf16e2ca0f56fc56236b2e4f2e1a2cccb67bcd83c5ab4ad0ef" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" dependencies = [ "ahash", "arrow", @@ -971,9 +981,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c882e61665ed60c5ce9b061c1e587aeb8ae5ae4bcb5e5f2465139ab25328e0f" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" dependencies = [ "arrow", "arrow-array", @@ -994,21 +1004,34 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98a354ce96df3ca6d025093adac9fd55ca09931c9b6f2630140721a95873fde4" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" dependencies = [ "datafusion-common", "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] name = "datafusion-optimizer" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf677c74fb7b5a1899ef52709e4a70fff3ed80bdfb4bbe495909810e83d5f39" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" dependencies = [ "arrow", "async-trait", @@ -1026,9 +1049,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b077999f6eb6c43d6b25bc66332a3be2f693c382840f008dd763b8540f9530" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" dependencies = [ "ahash", "arrow", @@ -1037,30 +1060,26 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64 0.22.1", "chrono", "datafusion-common", - "datafusion-execution", "datafusion-expr", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "hex", "indexmap", "itertools", "log", "paste", "petgraph", - "regex", ] [[package]] name = "datafusion-physical-expr-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce847f885c2b13bbe29f5c8b7948797131aa470af6e16d2a94f4428b4f4f1bd" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" dependencies = [ "ahash", "arrow", @@ -1072,13 +1091,15 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d13238e3b9fdd62a4c18760bfef714bb990d1e1d3430e9f416aae4b3cfaa71af" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" dependencies = [ + "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools", @@ -1086,9 +1107,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faba6f55a7eaf0241d07d12c2640de52742646b10f754485d5192bdfe2c9ceae" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" dependencies = [ "ahash", "arrow", @@ -1102,8 +1123,8 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", @@ -1121,9 +1142,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585357d621fa03ea85a7fefca79ebc5ef0ee13a7f82be0762a414879a4d190a7" +checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" dependencies = [ "arrow", "chrono", @@ -1137,9 +1158,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4db6534382f92f528bdb5d925b4214c31ffd84fa7fe1eff3ed0d2f1286851ab8" +checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" dependencies = [ "arrow", "chrono", @@ -1155,6 +1176,7 @@ dependencies = [ "arrow", "async-trait", "datafusion", + "datafusion-functions-window-common", "datafusion-proto", "datafusion-substrait", "futures", @@ -1171,15 +1193,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad8d96a9b52e1aa24f9373696a815be828193efce7cb0bbd2140b6bb67d1819" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" dependencies = [ "arrow", "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", + "indexmap", "log", "regex", "sqlparser", @@ -1188,9 +1211,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "42.0.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f92b1b80e98bf5a9921bf118816e0e766d18527e343153321fcccfe4d68c5c45" +checksum = "8b9c768d2b4c4485c43afbaeeb86dd1f2ac3fb34a9e6e8c8b06180d2a223d5ba" dependencies = [ "arrow-buffer", "async-recursion", @@ -1530,9 +1553,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", @@ -1568,9 +1591,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -1684,9 +1707,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -1763,9 +1786,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.159" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libflate" @@ -1793,9 +1816,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" @@ -2011,9 +2034,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" dependencies = [ "async-trait", "base64 0.22.1", @@ -2086,9 +2109,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.1.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310c46a70a3ba90d98fec39fa2da6d9d731e544191da6fb56c9d199484d0dd3e" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" dependencies = [ "ahash", "arrow-array", @@ -2228,9 +2251,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -2261,9 +2284,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", "syn", @@ -2271,9 +2294,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -2342,9 +2365,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" +checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" dependencies = [ "cfg-if", "indoc", @@ -2360,9 +2383,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" +checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" dependencies = [ "once_cell", "target-lexicon", @@ -2370,9 +2393,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" +checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" dependencies = [ "libc", "pyo3-build-config", @@ -2380,9 +2403,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" +checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2392,9 +2415,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" +checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -2456,10 +2479,11 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" dependencies = [ + "cfg_aliases", "libc", "once_cell", "socket2", @@ -2517,9 +2541,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -2552,9 +2576,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" +checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" dependencies = [ "hashbrown 0.14.5", "memchr", @@ -2562,9 +2586,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -2649,9 +2673,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" dependencies = [ "bitflags 2.6.0", "errno", @@ -2662,9 +2686,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ "once_cell", "ring", @@ -2698,9 +2722,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-webpki" @@ -2715,9 +2739,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" @@ -2813,18 +2837,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", @@ -2844,9 +2868,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -2974,9 +2998,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" dependencies = [ "log", "sqlparser_derive", @@ -3042,9 +3066,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.41.9" +version = "0.45.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a3bf05f1d7a3fd7a97790d410f6e859b3a98dcde05e7a3fc00b31b0f60fe7cb" +checksum = "a127ae9d8e443cea5c2122eb2ffe5fe489e802a1e746a09c5a5cb59d074c0aeb" dependencies = [ "heck 0.5.0", "pbjson", @@ -3055,6 +3079,7 @@ dependencies = [ "prost-build", "prost-types", "protobuf-src", + "regress", "schemars", "semver", "serde", @@ -3073,9 +3098,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.79" +version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", @@ -3112,18 +3137,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", @@ -3167,9 +3192,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", @@ -3297,9 +3322,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6beec125971dda80a086f90b4a70f60f222990ce4d63ad0fc140492f53444" +checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" dependencies = [ "typify-impl", "typify-macro", @@ -3307,9 +3332,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bbb24e990654aff858d80fee8114f4322f7d7a1b1ecb45129e2fcb0d0ad5ae" +checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" dependencies = [ "heck 0.5.0", "log", @@ -3327,9 +3352,9 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" +checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" dependencies = [ "proc-macro2", "quote", @@ -3447,9 +3472,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", "once_cell", @@ -3458,9 +3483,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -3473,9 +3498,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.43" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -3485,9 +3510,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3495,9 +3520,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -3508,15 +3533,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -3527,9 +3552,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 073f82cf0..11ce08c75 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,9 +37,10 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "42.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-substrait = { version = "42.0.0", optional = true } -datafusion-proto = { version = "42.0.0" } +datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-substrait = { version = "43.0.0", optional = true } +datafusion-proto = { version = "43.0.0" } +datafusion-functions-window-common = { version = "43.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } @@ -58,4 +59,4 @@ crate-type = ["cdylib", "rlib"] [profile.release] lto = true -codegen-units = 1 +codegen-units = 1 \ No newline at end of file diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 13144ae9d..3ce9cdfe5 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -25,7 +25,7 @@ def df_selection(col_name, col_type): if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type): return F.round(col(col_name), lit(2)).alias(col_name) - elif col_type == pa.string(): + elif col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) else: return col(col_name) @@ -43,7 +43,7 @@ def load_schema(col_name, col_type): def expected_selection(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return F.trim(col(col_name)).cast(col_type).alias(col_name) - elif col_type == pa.string(): + elif col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) else: return col(col_name) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index c4e7713f3..b10724381 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -51,7 +51,6 @@ Column = expr_internal.Column CreateMemoryTable = expr_internal.CreateMemoryTable CreateView = expr_internal.CreateView -CrossJoin = expr_internal.CrossJoin Distinct = expr_internal.Distinct DropTable = expr_internal.DropTable EmptyRelation = expr_internal.EmptyRelation @@ -140,7 +139,6 @@ "Join", "JoinType", "JoinConstraint", - "CrossJoin", "Union", "Unnest", "UnnestExpr", @@ -376,6 +374,8 @@ def literal(value: Any) -> Expr: ``value`` must be a valid PyArrow scalar value or easily castable to one. """ + if isinstance(value, str): + value = pa.scalar(value, type=pa.string_view()) if not isinstance(value, pa.Scalar): value = pa.scalar(value) return Expr(expr_internal.Expr.literal(value)) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 907f801af..5a2eab56d 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -297,7 +297,7 @@ def decode(input: Expr, encoding: Expr) -> Expr: def array_to_string(expr: Expr, delimiter: Expr) -> Expr: """Converts each element to its text representation.""" - return Expr(f.array_to_string(expr.expr, delimiter.expr)) + return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string()))) def array_join(expr: Expr, delimiter: Expr) -> Expr: @@ -1067,7 +1067,10 @@ def struct(*args: Expr) -> Expr: def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr: """Returns a struct with the given names and arguments pairs.""" - name_pair_exprs = [[Expr.literal(pair[0]), pair[1]] for pair in name_pairs] + name_pair_exprs = [ + [Expr.literal(pa.scalar(pair[0], type=pa.string())), pair[1]] + for pair in name_pairs + ] # flatten name_pairs = [x.expr for xs in name_pair_exprs for x in xs] @@ -1424,7 +1427,9 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False) nulls_first = "NULLS FIRST" if null_first else "NULLS LAST" return Expr( f.array_sort( - array.expr, Expr.literal(desc).expr, Expr.literal(nulls_first).expr + array.expr, + Expr.literal(pa.scalar(desc, type=pa.string())).expr, + Expr.literal(pa.scalar(nulls_first, type=pa.string())).expr, ) ) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 291ef2bae..d9d994b22 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -229,6 +229,7 @@ def udaf( which this UDAF is used. The following examples are all valid. .. code-block:: python + import pyarrow as pa import pyarrow.compute as pc diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 1847edef2..77f88aa44 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -85,14 +85,18 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) - assert plan.skip() == 0 + # TODO: Upstream now has expressions for skip and fetch + # REF: https://github.com/apache/datafusion/pull/12836 + # assert plan.skip() == 0 df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") plan = df.logical_plan() plan = plan.to_variant() assert isinstance(plan, Limit) - assert plan.skip() == 5 + # TODO: Upstream now has expressions for skip and fetch + # REF: https://github.com/apache/datafusion/pull/12836 + # assert plan.skip() == 5 def test_aggregate_query(test_ctx): @@ -126,7 +130,10 @@ def test_relational_expr(test_ctx): ctx = SessionContext() batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array(["alpha", "beta", "gamma"])], + [ + pa.array([1, 2, 3]), + pa.array(["alpha", "beta", "gamma"], type=pa.string_view()), + ], names=["a", "b"], ) df = ctx.create_dataframe([[batch]], name="batch_array") @@ -141,7 +148,8 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") == "beta").count() == 1 assert df.filter(col("b") != "beta").count() == 2 - assert df.filter(col("a") == "beta").count() == 0 + with pytest.raises(Exception): + df.filter(col("a") == "beta").count() def test_expr_to_variant(): diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index c65c633a4..b3a5a0652 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -34,9 +34,9 @@ def df(): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [ - pa.array(["Hello", "World", "!"]), + pa.array(["Hello", "World", "!"], type=pa.string_view()), pa.array([4, 5, 6]), - pa.array(["hello ", " world ", " !"]), + pa.array(["hello ", " world ", " !"], type=pa.string_view()), pa.array( [ datetime(2022, 12, 31), @@ -88,8 +88,8 @@ def test_literal(df): assert len(result) == 1 result = result[0] assert result.column(0) == pa.array([1] * 3) - assert result.column(1) == pa.array(["1"] * 3) - assert result.column(2) == pa.array(["OK"] * 3) + assert result.column(1) == pa.array(["1"] * 3, type=pa.string_view()) + assert result.column(2) == pa.array(["OK"] * 3, type=pa.string_view()) assert result.column(3) == pa.array([3.14] * 3) assert result.column(4) == pa.array([True] * 3) assert result.column(5) == pa.array([b"hello world"] * 3) @@ -97,7 +97,9 @@ def test_literal(df): def test_lit_arith(df): """Test literals with arithmetic operations""" - df = df.select(literal(1) + column("b"), f.concat(column("a"), literal("!"))) + df = df.select( + literal(1) + column("b"), f.concat(column("a").cast(pa.string()), literal("!")) + ) result = df.collect() assert len(result) == 1 result = result[0] @@ -600,21 +602,33 @@ def test_array_function_obj_tests(stmt, py_expr): f.ascii(column("a")), pa.array([72, 87, 33], type=pa.int32()), ), # H = 72; W = 87; ! = 33 - (f.bit_length(column("a")), pa.array([40, 40, 8], type=pa.int32())), - (f.btrim(literal(" World ")), pa.array(["World", "World", "World"])), + ( + f.bit_length(column("a").cast(pa.string())), + pa.array([40, 40, 8], type=pa.int32()), + ), + ( + f.btrim(literal(" World ")), + pa.array(["World", "World", "World"], type=pa.string_view()), + ), (f.character_length(column("a")), pa.array([5, 5, 1], type=pa.int32())), (f.chr(literal(68)), pa.array(["D", "D", "D"])), ( f.concat_ws("-", column("a"), literal("test")), pa.array(["Hello-test", "World-test", "!-test"]), ), - (f.concat(column("a"), literal("?")), pa.array(["Hello?", "World?", "!?"])), + ( + f.concat(column("a").cast(pa.string()), literal("?")), + pa.array(["Hello?", "World?", "!?"]), + ), (f.initcap(column("c")), pa.array(["Hello ", " World ", " !"])), (f.left(column("a"), literal(3)), pa.array(["Hel", "Wor", "!"])), (f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())), (f.lower(column("a")), pa.array(["hello", "world", "!"])), (f.lpad(column("a"), literal(7)), pa.array([" Hello", " World", " !"])), - (f.ltrim(column("c")), pa.array(["hello ", "world ", "!"])), + ( + f.ltrim(column("c")), + pa.array(["hello ", "world ", "!"], type=pa.string_view()), + ), ( f.md5(column("a")), pa.array( @@ -640,19 +654,25 @@ def test_array_function_obj_tests(stmt, py_expr): f.rpad(column("a"), literal(8)), pa.array(["Hello ", "World ", "! "]), ), - (f.rtrim(column("c")), pa.array(["hello", " world", " !"])), + ( + f.rtrim(column("c")), + pa.array(["hello", " world", " !"], type=pa.string_view()), + ), ( f.split_part(column("a"), literal("l"), literal(1)), pa.array(["He", "Wor", "!"]), ), (f.starts_with(column("a"), literal("Wor")), pa.array([False, True, False])), (f.strpos(column("a"), literal("o")), pa.array([5, 2, 0], type=pa.int32())), - (f.substr(column("a"), literal(3)), pa.array(["llo", "rld", ""])), + ( + f.substr(column("a"), literal(3)), + pa.array(["llo", "rld", ""], type=pa.string_view()), + ), ( f.translate(column("a"), literal("or"), literal("ld")), pa.array(["Helll", "Wldld", "!"]), ), - (f.trim(column("c")), pa.array(["hello", "world", "!"])), + (f.trim(column("c")), pa.array(["hello", "world", "!"], type=pa.string_view())), (f.upper(column("c")), pa.array(["HELLO ", " WORLD ", " !"])), (f.ends_with(column("a"), literal("llo")), pa.array([True, False, False])), ( @@ -794,9 +814,9 @@ def test_temporal_functions(df): f.date_trunc(literal("month"), column("d")), f.datetrunc(literal("day"), column("d")), f.date_bin( - literal("15 minutes"), + literal("15 minutes").cast(pa.string()), column("d"), - literal("2001-01-01 00:02:30"), + literal("2001-01-01 00:02:30").cast(pa.string()), ), f.from_unixtime(literal(1673383974)), f.to_timestamp(literal("2023-09-07 05:06:14.523952")), @@ -858,8 +878,8 @@ def test_case(df): result = df.collect() result = result[0] assert result.column(0) == pa.array([10, 8, 8]) - assert result.column(1) == pa.array(["Hola", "Mundo", "!!"]) - assert result.column(2) == pa.array(["Hola", "Mundo", None]) + assert result.column(1) == pa.array(["Hola", "Mundo", "!!"], type=pa.string_view()) + assert result.column(2) == pa.array(["Hola", "Mundo", None], type=pa.string_view()) def test_when_with_no_base(df): @@ -877,8 +897,10 @@ def test_when_with_no_base(df): result = df.collect() result = result[0] assert result.column(0) == pa.array([4, 5, 6]) - assert result.column(1) == pa.array(["too small", "just right", "too big"]) - assert result.column(2) == pa.array(["Hello", None, None]) + assert result.column(1) == pa.array( + ["too small", "just right", "too big"], type=pa.string_view() + ) + assert result.column(2) == pa.array(["Hello", None, None], type=pa.string_view()) def test_regr_funcs_sql(df): @@ -1021,8 +1043,13 @@ def test_regr_funcs_df(func, expected): def test_binary_string_functions(df): df = df.select( - f.encode(column("a"), literal("base64")), - f.decode(f.encode(column("a"), literal("base64")), literal("base64")), + f.encode(column("a").cast(pa.string()), literal("base64").cast(pa.string())), + f.decode( + f.encode( + column("a").cast(pa.string()), literal("base64").cast(pa.string()) + ), + literal("base64").cast(pa.string()), + ), ) result = df.collect() assert len(result) == 1 diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 3d324fb62..6ea77b15f 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -46,7 +46,6 @@ Join, JoinType, JoinConstraint, - CrossJoin, Union, Like, ILike, @@ -129,7 +128,6 @@ def test_class_module_is_datafusion(): Join, JoinType, JoinConstraint, - CrossJoin, Union, Like, ILike, diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 39e5ffe6d..a2521dd09 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -468,6 +468,13 @@ def test_simple_select(ctx, tmp_path, arr): batches = ctx.sql("SELECT a AS tt FROM t").collect() result = batches[0].column(0) + # In DF 43.0.0 we now default to having BinaryView and StringView + # so the array that is saved to the parquet is slightly different + # than the array read. Convert to values for comparison. + if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray): + arr = arr.tolist() + result = result.tolist() + np.testing.assert_equal(result, arr) diff --git a/src/context.rs b/src/context.rs index f445874d6..c2a263fa7 100644 --- a/src/context.rs +++ b/src/context.rs @@ -287,7 +287,7 @@ impl PySessionContext { } else { RuntimeConfig::default() }; - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new(RuntimeEnv::try_new(runtime_config)?); let session_state = SessionStateBuilder::new() .with_config(config) .with_runtime_env(runtime) diff --git a/src/dataframe.rs b/src/dataframe.rs index ee8fbbf9d..e7d6ca6d6 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -402,7 +402,9 @@ impl PyDataFrame { #[pyo3(signature = (column, preserve_nulls=true))] fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyResult { - let unnest_options = UnnestOptions { preserve_nulls }; + // TODO: expose RecursionUnnestOptions + // REF: https://github.com/apache/datafusion/pull/11577 + let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); let df = self .df .as_ref() @@ -413,7 +415,9 @@ impl PyDataFrame { #[pyo3(signature = (columns, preserve_nulls=true))] fn unnest_columns(&self, columns: Vec, preserve_nulls: bool) -> PyResult { - let unnest_options = UnnestOptions { preserve_nulls }; + // TODO: expose RecursionUnnestOptions + // REF: https://github.com/apache/datafusion/pull/11577 + let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); let cols = columns.iter().map(|s| s.as_ref()).collect::>(); let df = self .df diff --git a/src/expr.rs b/src/expr.rs index 49fa4b845..bca0cd3fa 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -65,7 +65,6 @@ pub mod column; pub mod conditional_expr; pub mod create_memory_table; pub mod create_view; -pub mod cross_join; pub mod distinct; pub mod drop_table; pub mod empty_relation; @@ -775,7 +774,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/cross_join.rs b/src/expr/cross_join.rs deleted file mode 100644 index 5bc202aac..000000000 --- a/src/expr/cross_join.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::logical_expr::logical_plan::CrossJoin; -use pyo3::prelude::*; -use std::fmt::{self, Display, Formatter}; - -use super::logical_node::LogicalNode; -use crate::common::df_schema::PyDFSchema; -use crate::sql::logical::PyLogicalPlan; - -#[pyclass(name = "CrossJoin", module = "datafusion.expr", subclass)] -#[derive(Clone)] -pub struct PyCrossJoin { - cross_join: CrossJoin, -} - -impl From for PyCrossJoin { - fn from(cross_join: CrossJoin) -> PyCrossJoin { - PyCrossJoin { cross_join } - } -} - -impl From for CrossJoin { - fn from(cross_join: PyCrossJoin) -> Self { - cross_join.cross_join - } -} - -impl Display for PyCrossJoin { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "CrossJoin - \nLeft: {:?} - \nRight: {:?} - \nSchema: {:?}", - &self.cross_join.left, &self.cross_join.right, &self.cross_join.schema - ) - } -} - -#[pymethods] -impl PyCrossJoin { - /// Retrieves the left input `LogicalPlan` to this `CrossJoin` node - fn left(&self) -> PyResult { - Ok(self.cross_join.left.as_ref().clone().into()) - } - - /// Retrieves the right input `LogicalPlan` to this `CrossJoin` node - fn right(&self) -> PyResult { - Ok(self.cross_join.right.as_ref().clone().into()) - } - - /// Resulting Schema for this `CrossJoin` node instance - fn schema(&self) -> PyResult { - Ok(self.cross_join.schema.as_ref().clone().into()) - } - - fn __repr__(&self) -> PyResult { - Ok(format!("CrossJoin({})", self)) - } - - fn __name__(&self) -> PyResult { - Ok("CrossJoin".to_string()) - } -} - -impl LogicalNode for PyCrossJoin { - fn inputs(&self) -> Vec { - vec![ - PyLogicalPlan::from((*self.cross_join.left).clone()), - PyLogicalPlan::from((*self.cross_join.right).clone()), - ] - } - - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) - } -} diff --git a/src/expr/limit.rs b/src/expr/limit.rs index 876e154c1..84ad7d68b 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -46,7 +46,7 @@ impl Display for PyLimit { write!( f, "Limit - Skip: {} + Skip: {:?} Fetch: {:?} Input: {:?}", &self.limit.skip, &self.limit.fetch, &self.limit.input @@ -56,15 +56,19 @@ impl Display for PyLimit { #[pymethods] impl PyLimit { - /// Retrieves the skip value for this `Limit` - fn skip(&self) -> usize { - self.limit.skip - } + // NOTE: Upstream now has expressions for skip and fetch + // TODO: Do we still want to expose these? + // REF: https://github.com/apache/datafusion/pull/12836 - /// Retrieves the fetch value for this `Limit` - fn fetch(&self) -> Option { - self.limit.fetch - } + // /// Retrieves the skip value for this `Limit` + // fn skip(&self) -> usize { + // self.limit.skip + // } + + // /// Retrieves the fetch value for this `Limit` + // fn fetch(&self) -> Option { + // self.limit.fetch + // } /// Retrieves the input `LogicalPlan` to this `Limit` node fn input(&self) -> PyResult> { diff --git a/src/functions.rs b/src/functions.rs index fe3531ba9..e29c57f9b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -16,7 +16,6 @@ // under the License. use datafusion::functions_aggregate::all_default_aggregate_functions; -use datafusion::logical_expr::window_function; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -33,6 +32,7 @@ use datafusion::common::{Column, ScalarValue, TableReference}; use datafusion::execution::FunctionRegistry; use datafusion::functions; use datafusion::functions_aggregate; +use datafusion::functions_window; use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::sqlparser::ast::NullTreatment as DFNullTreatment; use datafusion::logical_expr::{ @@ -758,7 +758,7 @@ pub fn lead( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::lead(arg.expr, Some(shift_offset), default_value); + let window_fn = functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -772,7 +772,7 @@ pub fn lag( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::lag(arg.expr, Some(shift_offset), default_value); + let window_fn = functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -783,7 +783,7 @@ pub fn row_number( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = datafusion::functions_window::expr_fn::row_number(); + let window_fn = functions_window::expr_fn::row_number(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -794,7 +794,7 @@ pub fn rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::rank(); + let window_fn = functions_window::expr_fn::rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -805,7 +805,7 @@ pub fn dense_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::dense_rank(); + let window_fn = functions_window::expr_fn::dense_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -816,7 +816,7 @@ pub fn percent_rank( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::percent_rank(); + let window_fn = functions_window::expr_fn::percent_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -827,7 +827,7 @@ pub fn cume_dist( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::cume_dist(); + let window_fn = functions_window::expr_fn::cume_dist(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } @@ -839,7 +839,7 @@ pub fn ntile( partition_by: Option>, order_by: Option>, ) -> PyResult { - let window_fn = window_function::ntile(arg.into()); + let window_fn = functions_window::expr_fn::ntile(arg.into()); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) } diff --git a/src/sql/logical.rs b/src/sql/logical.rs index fc398ff89..40f0a6a65 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; -use crate::expr::cross_join::PyCrossJoin; use crate::expr::distinct::PyDistinct; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; @@ -68,7 +67,6 @@ impl PyLogicalPlan { match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), - LogicalPlan::CrossJoin(plan) => PyCrossJoin::from(plan.clone()).to_variant(py), LogicalPlan::Distinct(plan) => PyDistinct::from(plan.clone()).to_variant(py), LogicalPlan::EmptyRelation(plan) => PyEmptyRelation::from(plan.clone()).to_variant(py), LogicalPlan::Explain(plan) => PyExplain::from(plan.clone()).to_variant(py), @@ -92,6 +90,7 @@ impl PyLogicalPlan { | LogicalPlan::Ddl(_) | LogicalPlan::Copy(_) | LogicalPlan::DescribeTable(_) + | LogicalPlan::Execute(_) | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( "Conversion of variant not implemented: {:?}", self.plan diff --git a/src/udf.rs b/src/udf.rs index ec8efb169..4570e77a6 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -31,7 +31,7 @@ use datafusion::logical_expr::{create_udf, ColumnarValue}; use crate::expr::PyExpr; use crate::utils::parse_volatility; -/// Create a Rust callable function fr a python function that expects pyarrow arrays +/// Create a Rust callable function from a python function that expects pyarrow arrays fn pyarrow_function_to_rust( func: PyObject, ) -> impl Fn(&[ArrayRef]) -> Result { @@ -97,7 +97,7 @@ impl PyScalarUDF { let function = create_udf( name, input_types.0, - Arc::new(return_type.0), + return_type.0, parse_volatility(volatility)?, to_scalar_function_impl(func), ); diff --git a/src/udwf.rs b/src/udwf.rs index 43c21ec7b..3f5ad0b1d 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -20,11 +20,16 @@ use std::ops::Range; use std::sync::Arc; use arrow::array::{make_array, Array, ArrayData, ArrayRef}; +use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; use datafusion::logical_expr::window_state::WindowAggState; +use datafusion::physical_plan::PhysicalExpr; use datafusion::scalar::ScalarValue; +use datafusion_functions_window_common::expr::ExpressionArgs; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use crate::expr::PyExpr; +use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::{DataFusionError, Result}; @@ -33,9 +38,6 @@ use datafusion::logical_expr::{ }; use pyo3::types::{PyList, PyTuple}; -use crate::expr::PyExpr; -use crate::utils::parse_volatility; - #[derive(Debug)] struct RustPartitionEvaluator { evaluator: PyObject, @@ -90,6 +92,7 @@ impl PartitionEvaluator for RustPartitionEvaluator { } fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result { + println!("evaluate all called with number of values {}", values.len()); Python::with_gil(|py| { let py_values = PyList::new_bound( py, @@ -299,11 +302,25 @@ impl WindowUDFImpl for MultiColumnWindowUDF { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { + // TODO: Should nullable always be `true`? + Ok(arrow::datatypes::Field::new( + field_args.name(), + self.return_type.clone(), + true, + )) } - fn partition_evaluator(&self) -> Result> { + // TODO: Enable passing partition_evaluator_args to python? + fn partition_evaluator( + &self, + _partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { + let _ = _partition_evaluator_args; (self.partition_evaluator_factory)() } + + fn expressions(&self, expr_args: ExpressionArgs) -> Vec> { + expr_args.input_exprs().into() + } } From e3e55b7cb70ac27e209edfcad2e008f685687e90 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Nov 2024 10:43:22 -0600 Subject: [PATCH 020/248] build(deps): bump tokio from 1.40.0 to 1.41.1 (#946) Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.40.0 to 1.41.1. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.40.0...tokio-1.41.1) --- updated-dependencies: - dependency-name: tokio dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 497c5b850..f483a6a2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3192,9 +3192,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.0" +version = "1.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 11ce08c75..d86948b3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.39", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.41", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "53", features = ["pyarrow"] } datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } From 53cdb11637ac1945bdb8291a72bf4c7fbad95c49 Mon Sep 17 00:00:00 2001 From: kosiew Date: Tue, 12 Nov 2024 19:20:55 +0800 Subject: [PATCH 021/248] Add list_cat, list_concat, list_repeat (#942) * Add list_cat, list_concat * Add list_repeat * docs: add examples for list_cat, list_concat, and list_repeat functions * Amend list_repeat code example - literal * Amend list_ to array_ in documentation --- .../common-operations/expressions.rst | 29 +++++++++++++++++++ python/datafusion/functions.py | 27 +++++++++++++++++ python/tests/test_functions.py | 12 ++++++++ 3 files changed, 68 insertions(+) diff --git a/docs/source/user-guide/common-operations/expressions.rst b/docs/source/user-guide/common-operations/expressions.rst index b2a83c89f..e94e1a6b5 100644 --- a/docs/source/user-guide/common-operations/expressions.rst +++ b/docs/source/user-guide/common-operations/expressions.rst @@ -110,6 +110,35 @@ This function returns an integer indicating the total number of elements in the In this example, the `num_elements` column will contain `3` for both rows. +To concatenate two arrays, you can use the function :py:func:`datafusion.functions.array_cat` or :py:func:`datafusion.functions.array_concat`. +These functions return a new array that is the concatenation of the input arrays. + +.. ipython:: python + + from datafusion import SessionContext, col + from datafusion.functions import array_cat, array_concat + + ctx = SessionContext() + df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[4, 5, 6]]}) + df.select(array_cat(col("a"), col("b")).alias("concatenated_array")) + +In this example, the `concatenated_array` column will contain `[1, 2, 3, 4, 5, 6]`. + +To repeat the elements of an array a specified number of times, you can use the function :py:func:`datafusion.functions.array_repeat`. +This function returns a new array with the elements repeated. + +.. ipython:: python + + from datafusion import SessionContext, col, literal + from datafusion.functions import array_repeat + + ctx = SessionContext() + df = ctx.from_pydict({"a": [[1, 2, 3]]}) + df.select(array_repeat(col("a"), literal(2)).alias("repeated_array")) + +In this example, the `repeated_array` column will contain `[[1, 2, 3], [1, 2, 3]]`. + + Structs ------- diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 5a2eab56d..88ea7280d 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -147,6 +147,8 @@ "length", "levenshtein", "list_append", + "list_cat", + "list_concat", "list_dims", "list_distinct", "list_element", @@ -162,6 +164,7 @@ "list_prepend", "list_push_back", "list_push_front", + "list_repeat", "list_remove", "list_remove_all", "list_remove_n", @@ -1145,6 +1148,22 @@ def array_distinct(array: Expr) -> Expr: return Expr(f.array_distinct(array.expr)) +def list_cat(*args: Expr) -> Expr: + """Concatenates the input arrays. + + This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. + """ + return array_concat(*args) + + +def list_concat(*args: Expr) -> Expr: + """Concatenates the input arrays. + + This is an alias for :py:func:`array_concat`, :py:func:`array_cat`. + """ + return array_concat(*args) + + def list_distinct(array: Expr) -> Expr: """Returns distinct values from the array after removing duplicates. @@ -1369,6 +1388,14 @@ def array_repeat(element: Expr, count: Expr) -> Expr: return Expr(f.array_repeat(element.expr, count.expr)) +def list_repeat(element: Expr, count: Expr) -> Expr: + """Returns an array containing ``element`` ``count`` times. + + This is an alias for :py:func:`array_repeat`. + """ + return array_repeat(element, count) + + def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr: """Replaces the first occurrence of ``from_val`` with ``to_val``.""" return Expr(f.array_replace(array.expr, from_val.expr, to_val.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index b3a5a0652..c14cfc2dc 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -291,6 +291,14 @@ def py_flatten(arr): lambda col: f.array_cat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], ], + [ + lambda col: f.list_cat(col, col), + lambda data: [np.concatenate([arr, arr]) for arr in data], + ], + [ + lambda col: f.list_concat(col, col), + lambda data: [np.concatenate([arr, arr]) for arr in data], + ], [ lambda col: f.array_dims(col), lambda data: [[len(r)] for r in data], @@ -439,6 +447,10 @@ def py_flatten(arr): lambda col: f.array_repeat(col, literal(2)), lambda data: [[arr] * 2 for arr in data], ], + [ + lambda col: f.list_repeat(col, literal(2)), + lambda data: [[arr] * 2 for arr in data], + ], [ lambda col: f.array_replace(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], From 5e32ada2565cebec3df54a1bbf9725f3a434b24d Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 15 Nov 2024 11:23:27 -0500 Subject: [PATCH 022/248] Add foreign table providers (#921) * testing FFI for table provider * Was able to get round trip schema from datafusion -> delta table -> datafusion * Expand file structure * WIP on execution plan * Working through execution plan FFI * Using datafusion-proto for execution plan properties * Adding plan properties parsing from ffi * Standardize naming for FFI structs * Intermediate testing and troubleshooting * Adding record batch stream ffi representation * Mimimum viable product demonstrating foreign table provider * Move ffi module to datafusion core * Modifications need to compile against latest DF * Set DF to 42.0.0 * Rebasing and pulling in a few changes for DF43.0 * Add wrapper for register table provider * Suppress deprecation warning * Add example for FFI table provider * Add pytest for FFI module to CI * Add license text * Change the name of the FFI table provider test so it doesn't try to run during the first pass of pytest when the module hasn't been built * Build example in build stage to be used during test stage * Combine pytests into one stage * Fix path for unit test * Installing maturin for ffi test in test script * Need to install the wheel for unit test * Add online documentation about using custom table providers * Raise an error if method is not implemented when it is expected --- .github/workflows/build.yml | 4 +- .github/workflows/test.yaml | 9 + Cargo.lock | 627 +++- Cargo.toml | 1 + docs/source/user-guide/io/index.rst | 1 + docs/source/user-guide/io/table_provider.rst | 56 + .../ffi-table-provider/.cargo/config.toml | 12 + examples/ffi-table-provider/Cargo.lock | 3175 +++++++++++++++++ examples/ffi-table-provider/Cargo.toml | 36 + examples/ffi-table-provider/build.rs | 20 + examples/ffi-table-provider/pyproject.toml | 33 + .../python/tests/_test_table_provider.py | 40 + examples/ffi-table-provider/src/lib.rs | 115 + python/datafusion/context.py | 8 + python/tests/test_dataframe.py | 1 + src/context.rs | 29 +- 16 files changed, 4078 insertions(+), 89 deletions(-) create mode 100644 docs/source/user-guide/io/table_provider.rst create mode 100644 examples/ffi-table-provider/.cargo/config.toml create mode 100644 examples/ffi-table-provider/Cargo.lock create mode 100644 examples/ffi-table-provider/Cargo.toml create mode 100644 examples/ffi-table-provider/build.rs create mode 100644 examples/ffi-table-provider/pyproject.toml create mode 100644 examples/ffi-table-provider/python/tests/_test_table_provider.py create mode 100644 examples/ffi-table-provider/src/lib.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f52913ce8..084a96192 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -156,7 +156,9 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Build Python package - run: maturin build --release --strip --features substrait + run: | + maturin build --release --strip --features substrait + - name: List Mac wheels run: find target/wheels/ diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f9383db5f..21faedecd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -110,6 +110,15 @@ jobs: pip install -e . -vv pytest -v . + - name: FFI unit tests + run: | + source venv/bin/activate + pip install -e . -vv + pip install maturin==1.5.1 + cd examples/ffi-table-provider + maturin develop --release --strip + pytest python/tests/_test_table_provider.py + - name: Cache the generated dataset id: cache-tpch-dataset uses: actions/cache@v4 diff --git a/Cargo.lock b/Cargo.lock index f483a6a2a..7b57b330a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,54 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "abi_stable" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" +dependencies = [ + "abi_stable_derive", + "abi_stable_shared", + "const_panic", + "core_extensions", + "crossbeam-channel", + "generational-arena", + "libloading", + "lock_api", + "parking_lot", + "paste", + "repr_offset", + "rustc_version", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "abi_stable_derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" +dependencies = [ + "abi_stable_shared", + "as_derive_utils", + "core_extensions", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", + "typed-arena", +] + +[[package]] +name = "abi_stable_shared" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" +dependencies = [ + "core_extensions", +] + [[package]] name = "addr2line" version = "0.24.2" @@ -63,9 +111,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "611cc2ae7d2e242c457e4be7f97036b8ad9ca152b499f53faf99b1ed8fc2553f" [[package]] name = "android-tzdata" @@ -84,9 +132,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.91" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "apache-avro" @@ -349,6 +397,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "as_derive_utils" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" +dependencies = [ + "core_extensions", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "async-compression" version = "0.4.17" @@ -367,6 +427,15 @@ dependencies = [ "zstd-safe 7.2.1", ] +[[package]] +name = "async-ffi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" +dependencies = [ + "abi_stable", +] + [[package]] name = "async-recursion" version = "1.1.1" @@ -375,7 +444,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -386,7 +455,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -542,9 +611,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.31" +version = "1.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" dependencies = [ "jobserver", "libc", @@ -637,6 +706,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_panic" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "013b6c2c3a14d678f38cd23994b02da3a1a1b6a5d1eedddfe63a5a5f11b13a81" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -668,6 +743,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "core_extensions" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c71dc07c9721607e7a16108336048ee978c3a8b129294534272e8bac96c0ee" +dependencies = [ + "core_extensions_proc_macros", +] + +[[package]] +name = "core_extensions_proc_macros" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" + [[package]] name = "cpufeatures" version = "0.2.14" @@ -686,6 +776,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.20" @@ -917,6 +1016,24 @@ dependencies = [ "paste", ] +[[package]] +name = "datafusion-ffi" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +dependencies = [ + "abi_stable", + "arrow", + "async-ffi", + "async-trait", + "datafusion", + "datafusion-proto", + "doc-comment", + "futures", + "log", + "prost", +] + [[package]] name = "datafusion-functions" version = "43.0.0" @@ -1176,6 +1293,7 @@ dependencies = [ "arrow", "async-trait", "datafusion", + "datafusion-ffi", "datafusion-functions-window-common", "datafusion-proto", "datafusion-substrait", @@ -1238,6 +1356,23 @@ dependencies = [ "subtle", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "dyn-clone" version = "1.0.17" @@ -1268,9 +1403,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "fixedbitset" @@ -1369,7 +1504,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -1402,6 +1537,15 @@ dependencies = [ "slab", ] +[[package]] +name = "generational-arena" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" +dependencies = [ + "cfg-if", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1477,9 +1621,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" [[package]] name = "heck" @@ -1631,14 +1775,143 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] @@ -1648,7 +1921,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.0", + "hashbrown 0.15.1", ] [[package]] @@ -1786,9 +2059,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.161" +version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" [[package]] name = "libflate" @@ -1814,6 +2087,16 @@ dependencies = [ "rle-decode-fast", ] +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libm" version = "0.2.11" @@ -1836,6 +2119,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -2289,7 +2578,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.87", ] [[package]] @@ -2328,7 +2617,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn", + "syn 2.0.87", "tempfile", ] @@ -2342,7 +2631,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -2365,9 +2654,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" dependencies = [ "cfg-if", "indoc", @@ -2383,9 +2672,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" dependencies = [ "once_cell", "target-lexicon", @@ -2393,9 +2682,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" dependencies = [ "libc", "pyo3-build-config", @@ -2403,34 +2692,34 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 2.0.87", ] [[package]] name = "pyo3-macros-backend" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" dependencies = [ "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", - "syn", + "syn 2.0.87", ] [[package]] name = "quad-rand" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" @@ -2479,9 +2768,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e346e016eacfff12233c243718197ca12f148c84e1e84268a896699b41c71780" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" dependencies = [ "cfg_aliases", "libc", @@ -2584,6 +2873,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "repr_offset" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" +dependencies = [ + "tstr", +] + [[package]] name = "reqwest" version = "0.12.9" @@ -2673,9 +2971,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.38" +version = "0.38.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" dependencies = [ "bitflags 2.6.0", "errno", @@ -2788,7 +3086,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 2.0.87", ] [[package]] @@ -2812,9 +3110,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -2852,7 +3150,7 @@ checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -2863,7 +3161,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -2887,7 +3185,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn", + "syn 2.0.87", ] [[package]] @@ -2971,7 +3269,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3014,9 +3312,15 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -3048,7 +3352,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.87", ] [[package]] @@ -3061,7 +3365,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.87", ] [[package]] @@ -3085,7 +3389,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn", + "syn 2.0.87", "typify", "walkdir", ] @@ -3098,9 +3402,20 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.85" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -3116,6 +3431,17 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "target-lexicon" version = "0.12.16" @@ -3124,9 +3450,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -3137,22 +3463,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.65" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.65" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3175,6 +3501,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -3214,7 +3550,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3266,7 +3602,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3284,6 +3620,21 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tstr" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" +dependencies = [ + "tstr_proc_macros", +] + +[[package]] +name = "tstr_proc_macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" + [[package]] name = "twox-hash" version = "1.6.3" @@ -3294,6 +3645,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typed-builder" version = "0.16.2" @@ -3311,7 +3668,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", ] [[package]] @@ -3345,7 +3702,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn", + "syn 2.0.87", "thiserror", "unicode-ident", ] @@ -3363,31 +3720,16 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn", + "syn 2.0.87", "typify-impl", ] -[[package]] -name = "unicode-bidi" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" - [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -3420,15 +3762,27 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.11.0" @@ -3492,7 +3846,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -3526,7 +3880,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3560,6 +3914,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -3569,6 +3939,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" @@ -3690,6 +4066,18 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xz2" version = "0.1.7" @@ -3699,6 +4087,30 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -3717,7 +4129,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", ] [[package]] @@ -3726,6 +4159,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd" version = "0.12.4" diff --git a/Cargo.toml b/Cargo.toml index d86948b3a..02707b957 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ arrow = { version = "53", features = ["pyarrow"] } datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "43.0.0", optional = true } datafusion-proto = { version = "43.0.0" } +datafusion-ffi = { version = "43.0.0" } datafusion-functions-window-common = { version = "43.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } diff --git a/docs/source/user-guide/io/index.rst b/docs/source/user-guide/io/index.rst index 05411327e..b885cfeda 100644 --- a/docs/source/user-guide/io/index.rst +++ b/docs/source/user-guide/io/index.rst @@ -26,3 +26,4 @@ IO csv json parquet + table_provider diff --git a/docs/source/user-guide/io/table_provider.rst b/docs/source/user-guide/io/table_provider.rst new file mode 100644 index 000000000..2ff9ae46f --- /dev/null +++ b/docs/source/user-guide/io/table_provider.rst @@ -0,0 +1,56 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Custom Table Provider +===================== + +If you have a custom data source that you want to integrate with DataFusion, you can do so by +implementing the `TableProvider `_ +interface in Rust and then exposing it in Python. To do so, +you must use DataFusion 43.0.0 or later and expose a `FFI_TableProvider `_ +via `PyCapsule `_. + +A complete example can be found in the `examples folder `_. + +.. code-block:: rust + + #[pymethods] + impl MyTableProvider { + + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let provider = Arc::new(self.clone()) + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + let provider = FFI_TableProvider::new(Arc::new(provider), false); + + PyCapsule::new_bound(py, provider, Some(name.clone())) + } + } + +Once you have this library available, in python you can register your table provider +to the ``SessionContext``. + +.. code-block:: python + + provider = MyTableProvider() + ctx.register_table_provider("my_table", provider) + + ctx.table("my_table").show() diff --git a/examples/ffi-table-provider/.cargo/config.toml b/examples/ffi-table-provider/.cargo/config.toml new file mode 100644 index 000000000..91a099a61 --- /dev/null +++ b/examples/ffi-table-provider/.cargo/config.toml @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock new file mode 100644 index 000000000..3b57cac75 --- /dev/null +++ b/examples/ffi-table-provider/Cargo.lock @@ -0,0 +1,3175 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "abi_stable" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" +dependencies = [ + "abi_stable_derive", + "abi_stable_shared", + "const_panic", + "core_extensions", + "crossbeam-channel", + "generational-arena", + "libloading", + "lock_api", + "parking_lot", + "paste", + "repr_offset", + "rustc_version", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "abi_stable_derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" +dependencies = [ + "abi_stable_shared", + "as_derive_utils", + "core_extensions", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", + "typed-arena", +] + +[[package]] +name = "abi_stable_shared" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" +dependencies = [ + "core_extensions", +] + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "arrow" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "arrow-select" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "as_derive_utils" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" +dependencies = [ + "core_extensions", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "async-compression" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-ffi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" +dependencies = [ + "abi_stable", +] + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + +[[package]] +name = "chrono-tz" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" +dependencies = [ + "parse-zoneinfo", + "phf_codegen", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "const_panic" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "013b6c2c3a14d678f38cd23994b02da3a1a1b6a5d1eedddfe63a5a5f11b13a81" + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core_extensions" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92c71dc07c9721607e7a16108336048ee978c3a8b129294534272e8bac96c0ee" +dependencies = [ + "core_extensions_proc_macros", +] + +[[package]] +name = "core_extensions_proc_macros" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" + +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +dependencies = [ + "arrow-schema", + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", +] + +[[package]] +name = "datafusion-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "paste", + "sqlparser", + "tokio", +] + +[[package]] +name = "datafusion-common-runtime" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +dependencies = [ + "arrow", + "datafusion-common", + "itertools", + "paste", +] + +[[package]] +name = "datafusion-ffi" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +dependencies = [ + "abi_stable", + "arrow", + "async-ffi", + "async-trait", + "datafusion", + "datafusion-proto", + "doc-comment", + "futures", + "log", + "prost", +] + +[[package]] +name = "datafusion-functions" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown 0.14.5", + "hex", + "itertools", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "indexmap", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + +[[package]] +name = "datafusion-functions-nested" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "itertools", + "log", + "paste", + "rand", +] + +[[package]] +name = "datafusion-functions-window" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "rand", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-plan", + "itertools", +] + +[[package]] +name = "datafusion-physical-plan" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-proto" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" +dependencies = [ + "arrow", + "chrono", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-proto-common", + "object_store", + "prost", +] + +[[package]] +name = "datafusion-proto-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "object_store", + "prost", +] + +[[package]] +name = "datafusion-sql" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", + "strum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" + +[[package]] +name = "ffi-table-provider" +version = "0.1.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion", + "datafusion-ffi", + "pyo3", + "pyo3-build-config", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "24.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generational-arena" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.1", +] + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools", + "parking_lot", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "parquet" +version = "53.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.14.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "repr_offset" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" +dependencies = [ + "tstr", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "sqlparser" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.87", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tempfile" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +dependencies = [ + "backtrace", + "bytes", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tstr" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" +dependencies = [ + "tstr_proc_macros", +] + +[[package]] +name = "tstr_proc_macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "url" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +dependencies = [ + "getrandom", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml new file mode 100644 index 000000000..4e54eaf03 --- /dev/null +++ b/examples/ffi-table-provider/Cargo.toml @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "ffi-table-provider" +version = "0.1.0" +edition = "2021" + +[dependencies] +datafusion = { version = "43.0.0" } +datafusion-ffi = { version = "43.0.0" } +pyo3 = { version = "0.22.6", features = ["extension-module", "abi3", "abi3-py38"] } +arrow = { version = "53.2.0" } +arrow-array = { version = "53.2.0" } +arrow-schema = { version = "53.2.0" } + +[build-dependencies] +pyo3-build-config = "0.22.6" + +[lib] +name = "ffi_table_provider" +crate-type = ["cdylib", "rlib"] diff --git a/examples/ffi-table-provider/build.rs b/examples/ffi-table-provider/build.rs new file mode 100644 index 000000000..4878d8b0e --- /dev/null +++ b/examples/ffi-table-provider/build.rs @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +fn main() { + pyo3_build_config::add_extension_module_link_args(); +} diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-table-provider/pyproject.toml new file mode 100644 index 000000000..116efae9c --- /dev/null +++ b/examples/ffi-table-provider/pyproject.toml @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["maturin>=1.6,<2.0"] +build-backend = "maturin" + +[project] +name = "ffi_table_provider" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/ffi-table-provider/python/tests/_test_table_provider.py new file mode 100644 index 000000000..56c05e4fa --- /dev/null +++ b/examples/ffi-table-provider/python/tests/_test_table_provider.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datafusion import SessionContext +from ffi_table_provider import MyTableProvider +import pyarrow as pa + + +def test_table_loading(): + ctx = SessionContext() + table = MyTableProvider(3, 2, 4) + ctx.register_table_provider("t", table) + result = ctx.table("t").collect() + + assert len(result) == 4 + assert result[0].num_columns == 3 + + result = [r.column(0) for r in result] + expected = [ + pa.array([0, 1], type=pa.int32()), + pa.array([2, 3, 4], type=pa.int32()), + pa.array([4, 5, 6, 7], type=pa.int32()), + pa.array([6, 7, 8, 9, 10], type=pa.int32()), + ] + + assert result == expected diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/ffi-table-provider/src/lib.rs new file mode 100644 index 000000000..473244d88 --- /dev/null +++ b/examples/ffi-table-provider/src/lib.rs @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ffi::CString, sync::Arc}; + +use arrow_array::ArrayRef; +use datafusion::{ + arrow::{ + array::RecordBatch, + datatypes::{DataType, Field, Schema}, + }, + datasource::MemTable, + error::{DataFusionError, Result}, +}; +use datafusion_ffi::table_provider::FFI_TableProvider; +use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule}; + +/// In order to provide a test that demonstrates different sized record batches, +/// the first batch will have num_rows, the second batch num_rows+1, and so on. +#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)] +#[derive(Clone)] +struct MyTableProvider { + num_cols: usize, + num_rows: usize, + num_batches: usize, +} + +fn create_record_batch( + schema: &Arc, + num_cols: usize, + start_value: i32, + num_values: usize, +) -> Result { + let end_value = start_value + num_values as i32; + let row_values: Vec = (start_value..end_value).collect(); + + let columns: Vec<_> = (0..num_cols) + .map(|_| { + std::sync::Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef + }) + .collect(); + + RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from) +} + +impl MyTableProvider { + fn create_table(&self) -> Result { + let fields: Vec<_> = (0..self.num_cols) + .map(|idx| (b'A' + idx as u8) as char) + .map(|col_name| Field::new(col_name, DataType::Int32, true)) + .collect(); + + let schema = Arc::new(Schema::new(fields)); + + let batches: Result> = (0..self.num_batches) + .map(|batch_idx| { + let start_value = batch_idx * self.num_rows; + create_record_batch( + &schema, + self.num_cols, + start_value as i32, + self.num_rows + batch_idx, + ) + }) + .collect(); + + MemTable::try_new(schema, vec![batches?]) + } +} + +#[pymethods] +impl MyTableProvider { + #[new] + fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self { + Self { + num_cols, + num_rows, + num_batches, + } + } + + fn __datafusion_table_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = CString::new("datafusion_table_provider").unwrap(); + + let provider = self + .create_table() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; + let provider = FFI_TableProvider::new(Arc::new(provider), false); + + PyCapsule::new_bound(py, provider, Some(name.clone())) + } +} + +#[pymodule] +fn ffi_table_provider(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 5221c866c..a07b5d175 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -685,6 +685,14 @@ def deregister_table(self, name: str) -> None: """Remove a table from the session.""" self.ctx.deregister_table(name) + def register_table_provider(self, name: str, provider: Any) -> None: + """Register a table provider. + + This table provider must have a method called ``__datafusion_table_provider__`` + which returns a PyCapsule that exposes a ``FFI_TableProvider``. + """ + self.ctx.register_table_provider(name, provider) + def register_record_batches( self, name: str, partitions: list[list[pyarrow.RecordBatch]] ) -> None: diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 330475302..b82f95e35 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -306,6 +306,7 @@ def test_unnest_without_nulls(nested_df): assert result.column(1) == pa.array([7, 8, 8, 9, 9, 9]) +@pytest.mark.filterwarnings("ignore:`join_keys`:DeprecationWarning") def test_join(): ctx = SessionContext() diff --git a/src/context.rs b/src/context.rs index c2a263fa7..8675e97df 100644 --- a/src/context.rs +++ b/src/context.rs @@ -28,7 +28,7 @@ use object_store::ObjectStore; use url::Url; use uuid::Uuid; -use pyo3::exceptions::{PyKeyError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyKeyError, PyNotImplementedError, PyTypeError, PyValueError}; use pyo3::prelude::*; use crate::catalog::{PyCatalog, PyTable}; @@ -67,7 +67,8 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; -use pyo3::types::{PyDict, PyList, PyTuple}; +use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; +use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -566,6 +567,30 @@ impl PySessionContext { Ok(()) } + /// Construct datafusion dataframe from Arrow Table + pub fn register_table_provider( + &mut self, + name: &str, + provider: Bound<'_, PyAny>, + ) -> PyResult<()> { + if provider.hasattr("__datafusion_table_provider__")? { + let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; + let capsule = capsule.downcast::()?; + // validate_pycapsule(capsule, "arrow_array_stream")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignTableProvider = provider.into(); + + let _ = self.ctx.register_table(name, Arc::new(provider))?; + + Ok(()) + } else { + Err(PyNotImplementedError::new_err( + "__datafusion_table_provider__ does not exist on Table Provider object.", + )) + } + } + pub fn register_record_batches( &mut self, name: &str, From 92b093c767a8ec76918db6abeae35f85ee19fa60 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 23 Nov 2024 23:21:24 +0800 Subject: [PATCH 023/248] Add make_list and tests for make_list, make_array (#949) --- python/datafusion/functions.py | 9 +++++++++ python/tests/test_functions.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 88ea7280d..6ad4c50c2 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -184,6 +184,7 @@ "lpad", "ltrim", "make_array", + "make_list", "make_date", "max", "md5", @@ -1044,6 +1045,14 @@ def make_array(*args: Expr) -> Expr: return Expr(f.make_array(args)) +def make_list(*args: Expr) -> Expr: + """Returns an array using the specified input expressions. + + This is an alias for :py:func:`make_array`. + """ + return make_array(*args) + + def array(*args: Expr) -> Expr: """Returns an array using the specified input expressions. diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index c14cfc2dc..0d40032bb 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -576,6 +576,37 @@ def test_array_function_cardinality(): ) +@pytest.mark.parametrize("make_func", [f.make_array, f.make_list]) +def test_make_array_functions(make_func): + ctx = SessionContext() + batch = pa.RecordBatch.from_arrays( + [ + pa.array(["Hello", "World", "!"], type=pa.string()), + pa.array([4, 5, 6]), + pa.array(["hello ", " world ", " !"], type=pa.string()), + ], + names=["a", "b", "c"], + ) + df = ctx.create_dataframe([[batch]]) + + stmt = make_func( + column("a").cast(pa.string()), + column("b").cast(pa.string()), + column("c").cast(pa.string()), + ) + py_expr = [ + ["Hello", "4", "hello "], + ["World", "5", " world "], + ["!", "6", " !"], + ] + + query_result = df.select(stmt).collect()[0].column(0) + for a, b in zip(query_result, py_expr): + np.testing.assert_array_equal( + np.array(a.as_py(), dtype=str), np.array(b, dtype=str) + ) + + @pytest.mark.parametrize( ("stmt", "py_expr"), [ From 54e5e0d9cc876ca31eea6f79a623e5163eae75f9 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sat, 23 Nov 2024 16:22:03 +0100 Subject: [PATCH 024/248] fix: udwf example (#948) --- examples/python-udwf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 05b3021d8..55de2bdc7 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -185,35 +185,36 @@ def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: df = ctx.create_dataframe([[batch]]) exp_smooth = udwf( - ExponentialSmoothDefault(0.9), + lambda: ExponentialSmoothDefault(0.9), pa.float64(), pa.float64(), volatility="immutable", ) smooth_two_row = udwf( - SmoothBoundedFromPreviousRow(0.9), + lambda: SmoothBoundedFromPreviousRow(0.9), pa.float64(), pa.float64(), volatility="immutable", ) smooth_rank = udwf( - SmoothAcrossRank(0.9), + lambda: SmoothAcrossRank(0.9), pa.float64(), pa.float64(), volatility="immutable", ) smooth_frame = udwf( - ExponentialSmoothFrame(0.9), + lambda: ExponentialSmoothFrame(0.9), pa.float64(), pa.float64(), volatility="immutable", + name="smooth_frame", ) smooth_two_col = udwf( - SmoothTwoColumn(0.9), + lambda: SmoothTwoColumn(0.9), [pa.float64(), pa.int64()], pa.float64(), volatility="immutable", From deb1f255ac314d569ee9299dfd421028763bbbb9 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 29 Nov 2024 11:57:42 -0500 Subject: [PATCH 025/248] Documentation updates: simplify examples and add section on data sources (#955) * Add a simple example to the introduction page to demonstrate loading a dataframe from a csv file and displaying the contents * Update basics doc to be a little more straight forward * Move downloading of data files for examples into the build scripts and just point the users to where these files are located instead of adding url lib requests to the python examples so we can focus on what is most important to the user * Handle a few errors generated by doc site builder * Switch example so that there is not confusion about the single and double quotes due to capitalization * Add section on data sources * Build pipeline doesn't have polars and it isn't really necessary for the example, so swith to a code block instead of ipython directive --- .github/workflows/docs.yaml | 2 + docs/.gitignore | 2 + docs/build.sh | 11 +- docs/source/images/jupyter_lab_df_view.png | Bin 0 -> 150303 bytes docs/source/index.rst | 25 +-- docs/source/user-guide/basics.rst | 74 +++---- .../common-operations/aggregations.rst | 10 +- .../common-operations/functions.rst | 6 - .../user-guide/common-operations/index.rst | 2 + .../common-operations/select-and-filter.rst | 11 +- .../user-guide/common-operations/windows.rst | 6 - docs/source/user-guide/data-sources.rst | 187 ++++++++++++++++++ docs/source/user-guide/introduction.rst | 34 ++++ docs/source/user-guide/io/avro.rst | 2 + docs/source/user-guide/io/csv.rst | 2 + docs/source/user-guide/io/json.rst | 2 + docs/source/user-guide/io/parquet.rst | 3 +- docs/source/user-guide/io/table_provider.rst | 2 + python/datafusion/dataframe.py | 2 +- python/datafusion/plan.py | 4 +- 20 files changed, 300 insertions(+), 87 deletions(-) create mode 100644 docs/source/images/jupyter_lab_df_view.png create mode 100644 docs/source/user-guide/data-sources.rst diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index e47497b2a..86288e2d8 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -75,6 +75,8 @@ jobs: set -x source venv/bin/activate cd docs + curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv + curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet make html - name: Copy & push the generated HTML diff --git a/docs/.gitignore b/docs/.gitignore index 41e135341..6e8a53b6f 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,2 +1,4 @@ pokemon.csv yellow_trip_data.parquet +yellow_tripdata_2021-01.parquet + diff --git a/docs/build.sh b/docs/build.sh index 5afe85812..31398d195 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -19,8 +19,17 @@ # set -e + +if [ ! -f pokemon.csv ]; then + curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv +fi + +if [ ! -f yellow_tripdata_2021-01.parquet ]; then + curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet +fi + rm -rf build 2> /dev/null rm -rf temp 2> /dev/null mkdir temp cp -rf source/* temp/ -make SOURCEDIR=`pwd`/temp html \ No newline at end of file +make SOURCEDIR=`pwd`/temp html diff --git a/docs/source/images/jupyter_lab_df_view.png b/docs/source/images/jupyter_lab_df_view.png new file mode 100644 index 0000000000000000000000000000000000000000..9dafb4f61d3a82b6f51e03fa43f3c97a4f8bde0f GIT binary patch literal 150303 zcma&M1yozj)&NQkDGtTGP~6=q1b5fs1P{SoT8b5?c(I}_UR;W_xCgfYDeeTfATRyy z{bjwk*1P{%=j6kBs( z6cj8e2LM1*2>_tgbO+lyI0I2o6hEaJpc(2666cy|)77Iq)s_K=zE45Xma%_QMDkl+ z@o5-=HEq=VgxrfcPU0W=<*%cu@Wia_@DGBtWTlNrFuga`UhcG32ZB4mM|W3we2*d? zMi+BE^z3nFk zo>9Q4bjbze;nA4?CRCgnMUlV9;ZEF)vnLYe7n`4zk|c^O8CX)loW1+UQxFCg-dZ?s zcK)wa-t3ZJhz-G$8t162C$#16EuXParWa`;%zuDyu^S|#a%2PZ(NVlmdpcTLl5g<7 z$v8g)_23PkXF5pj*xae{ffSR6M@hS)TLv&iG&7jSR@D(wQzxA&F7ck@kwg_`nV-iE z7qTr^T>HQHR&sy=sPFg;!;!(&@ zvJZWV)jW0B?Xtk>aSi_(-W8}W-s6wQ^=jiqFAGReg|VMCDBo+BKu~4S{EzZRFtCGK zxsK3K`F<|*X}mU#4%<+yMrg*%8o3YLJOow(QB-HO>s_gWg)zJfNgo9X94gp@F{7+s z6eOaAOX??bV#Mn$rf+RwkC8yLSvF)@Pd?m@Tx-e&|FEStpq9$UoMptTAwczd?q^Mp zy4^+c9F=7&-V0L}!%aB{ckS-IVXt8}>)0p##dX`zY&wC^#H_UR94S_>4G3I_|`fu~)wE4OC8-*zxi-_brHdy7JuGV&*i z!7l;bZ2c|%HR0{EV^MFEG)d&Js`@AJ0X z?!8bxH^^_lz$PZ~H}!j>gL{L57-dg}UCxb^`xV#WpK1NwWD+2)E zkqDvSG_##|cin%r6B?*ZbsMhjx>iDf`L0(Tm=BrNU#0NIu4&ZV_!WK0`2tOhW}lPV zQowgw)a+OQNJt^<9utEkox4LCdx)*jYR={z4qkg>m7`%DFwDMhu;>T++IvYPRaZ%a zYfgP@^W%?c9#Hct*pvO#1=Jdfg7U}1-NNF}jt}Pn0}4Xo&!yC9PMu|!M&wHrlKA7I z&re@O4s$-gPScDOQ==h$=3(({d;RIs2ccs$X<97V559?5j9oi?tD@=eOD#|{FqPku z8KUZcys#jgf9V}6@dxEQTK_KxSDbgKpg*Y1Xa*W2zE5s{i5-(k;}OUJcA_M$LJePE zMqzfz1}4!mJpCm1IZ2n{89tr14Jt1#3qYTCKZ!$8pq54?G+UnY_2B^32L9y7HhIZ> z(z0mBLiz7Rq6EMKc7Dn@1*WfLJFn`|a}qeE=L+<8T_g1U#WI5>uH{4yex^y zw89}Jw6@kVe3=%n^ux%IML%B8vcZr(aBS`eP6Y`fiqK7T>=)IampHwIuGEq*^1`|L z=GMhdMBB(ZBc!?z$KdDWzY{Gqj$gZ?TVs%gm4*?>h**lPa<8&4shEcO$iCJ}w^uuV zAHb8DA(yTGPIc++l8U&TGm|#~LW$wCWr-jP`=|D2RxN_=q%8Ns01@C{r6)5xSh;#kFs~mlZpH8`7t$X4Nw-Xa!uP z>`~1K@sp26Z)vFHZ%(C+mpRzk<2ukx?>NRgQVMh|Iyby*5N#-TK#o8mzlQ*c)6rzx_k~Ki$=b=6lWR_+ zbBkqO^5x0#6QjG%q6v}-9dW^FE5&Nljk8J{vEja#gH+I#2w z&D)DxTrAgTVjoFAmZDN%kCC6_%uydnWDB)V+H8#e!##J@q;G319Mb<&b z4|?r-O%1sM{jCG7eYYLhqkRzDXtmT)FOvd}DM6_BM^F}(Y`8`6b`rV=Fiz*6J@Xv|mzGe%S;J4LATFL9Sp&kFmhvA*(~%b}ZMIiO(r8oRi2rAtPdp!okf$t62u%sufjOObi>S~c4tk&Bd2Z^KPT>w>R#9Nxa|_} zvW*akERX09uf`3bv?QVXWKa1q9)qIJedJzoSW(L+=vCC3Y-OxJ9*9cf&8n(>0kAv9xGq>|*CAXhzE4^qvTydT07Rj_~_R6{%lzE0Z*ODyU4szQ4^GVir@$K2p ziR`K_igd*cCC(bVG$i~k7tK{CF6%0Nuxty7eAlktPpE_*jMRBWlApwOEm-LUSiB7O zNL*euMMLxYl*(22*K7Qoe7UY4?)2`cKO|U$|1oOSxQ#wP!pe(HC6mYu&fLswmuYlu zKi1Q?pDn4lTF_b$t3`AF?0&iR*+a)8A*x<>pE&00K*nU223ZgQAP z<2Ici;VJoP!)iic%(VP%%(>PFu(aIo$kIcCB)Iq?~>`R&ZjTtg=gyOQ~2 z>vYg=AFu~l02E3`ASH;5hulQ;SgN)DY^7BNtLcUJMbQk&u<5W+Wqwss)vh5$O`~N_ zZ!2)WqXP0Sr114uD{!&pX2H-E#Wh8h%Q?*O=b*jLLWxFJcorCpv7l`L)YI!oZEmpG zp8|b?jy?9>Rr>$@eOY^G?i1zhc@Vq2@a|B{w0+ohZj!uS-gv4m;O?kV;zIzrBb8&* z!OXHrBkn=+Q9y^3*W>83RkSZSytuQ}wc@&d8NtooBa^8*Ctd=H%&j`8%us(O@EHI13ZohkJ2T_eKjA(k1c0t z8bb~aUE#tH%*R^8>&^XFqJBACxf>EUlLC({hYmM<$#yf#oXh?Gp#h7z2>~k?GIwhp z;K95vA^IKlettV#kDbQ1eV1PCpWDcN+ppsuvZ%nOFu@CjE2+SA#L*dI68DzIJJ=>z z{E=mMst%&lnb=Wr?}%tPF}ZF+JZpLO%&IDk?UCr0h1}HC6W{h{D8{@fp1zWW@jBU$ z8!xo7jW*Hmm@0(nhhoG~8jc?&N=P1-m!4X{1by#9Z}--pM3p?jgJX} zei0xa->*;q3mUcd>$CqVKeb1;p-Afhl$4Nf9cyBFlfPxn9%$1LEl<@!C*LlNJDW2hs|1a&vOOmV8M|ODpbfV=JmHEC28A$S;Z4 z_MV=uqFh`)K0cg2e4Jo+J1!m(5fLtKUM^l<4kQGJho6h5r7wqz2mQYW`LA(gfgaZG z4z8XKU>DlI<62sQy*wpezy3SXe_#LlpFm%S|C!0f$vS|LG(*lIDaa!z{tx{Br{#ZU{9io{|EDL95D)+V?)tx){@-2oJb>;1 zFbFxPr{w>v*S|ae@5X<36zBSj`u`%uzv%o=Et1fZFU7h3d(k9cPI6u|A=i=8K~_T- z`9|)tztfT|r4ZxbN56!z!=s5(Ab0597X#U~hl+n)c~`yseeqMpV=UvJa)8N4@`#Hj;!Re2j`M$*96k08614BMhFRx7qJ;$Dwb`Zm&t!dqPpX0h zjoY-=D*6A%tnbq=sfUwt6>ZBo2*14S6>-+Cv z`r>|dKHkF*GVOA;?Chd&x6({p202@I3TBDs%tP)EQmX@j`%U_?7P`+n9xkfwq#kb| zCh0Ix({e45!MmCZ6SmFg!58J`K%r$%oxW3_PK2r8enp*ab;$jG9Y@nr?H$7*8S|69 zrf+${q5(%9d1B-Hs0c@VLkkGx3U~?azzKJEvlyr|V|66M{LC zLH>e>gSsnkYn}6*7_JUs#I8ojV(U*+?}axez*Ifs&VI|y#|QXhv>oHKA^Y4uZzw2z zIT&%++iQM1nQlvB=DTXoTTrfU^fWo2lKjP8AGQ&4UPJFmB_^UPnK!;W`H{r?AveS3 zwd)B|9fti@_p7)e!U0HL;)@QL6_9)T+}8DJg4BcWC350cIm^O0!689d;Mpv^`E=j> zQ512S-nq1RSDW6>9x&=;Hg#K>>i@jsG_~5s2NvXG{;BQAt9n@9Jm{ihEXSECpH}p0 z$%Ai2A-R{>gdGa*yg%3;PnWu#NT?m=>TIjtA5(-)#0jlrn1telZS~k;$ zcg`z*9jhv-$cx97jqygH}AJ)c77$xyL7d|5m)Z>U?aUZ$ljYgdRD^i);<( zn}8ae7wa7sV$9A9W-r~0{Qi7EtK}OLE*uyW+0IVpsOcdfi^vYTTK0KYD|{T-cGV6C z@zv%A-|kXD>7PL~2VZFZlDgSQ9&G1>h=O+p_X1 z)>qy2YUU@=1~y{wHVI52(w2~x5{A?alV=^iDIOfxljKmux2Mzb%@`D7J z^#vU}3qhUh3WCXg#%_~oXU^Q0j#q!Z|J5{{$Fkv8@|ArX+)w4hzDTgFljW}v_~#kT zfPGVDw*Ou!y-%psb37Qsp)ppWey(9&s345k#5sxM$FHYpAPzLy_opX%)&6=WdvotR z)EcL?RT@B9dr}2vaUBX}L4q?PSbGnBHI(Gj?U$cj)*~v)t2PA{i zX8TpX+rHRVulzkB@_hL~hkrc^4%Xp6z1!?SEW>Sw+3R9q)5e{bi%xv6@)&*Zg4YQ1 ze&iX?GN71{UePn<`mSMF>fYVML-O*E`QIefoL*FbD7H>9&97y=r?nI=yasc;7g@x% zGOTY!eRy-itXiQ#9k(;pobhDg3Y*{aR*~EXjTO&myYA&0yPUViMJx=szr!+OUZ^x3 ziKBAs!C#<&UH44}+|Bn(rUjkEm^&YaY>UA%PwdFk4+9=dmgj)Hp#|bs&(VMFi1BhR$@uhixj378)h6i7TdFwZw zl^xmXDrP>5!7|cgnU%tMb3LIzv#c*)tlfJoz3_3WuNkxTr6;jjS$$nu8ej5YVt8^- zz{(kQ!t0iJ5BDiA?^78G7v0PmvUx%c@gyqWKOYEJu6&8%k9Jas1+4xSGPORn53zhp zCM&0DX(wd|et!&Q5M{}Qx(h9f59}XUfue;8$mJW=hoT?tG?P(D(km%fd&AqW#uU*} z{j1mYaj?&A0~Fib0K|ngk{6{KL`GkEt|tkS|8wx#ld?$&9zW}6mlvko?rHIC@A zM0Nkly)X2MWv&k2*S!RgTTiVy5zB}$k-t?sqGC+A^CS*z0}p^9KBMVr{b0~=wqO4q zM~l7O_v`b`fP&=bpj{R7vJ^?Pl&~VwOZy8gscxFb>;8qRpGS*UVYSIgk!sOcMvzI_ z{1N77D-CXVvC2qxgoRc7VGiw9%sf9G8;4zQFi9;azwAADf<`%hE3ubaYCphR!n{O^ zp~Mi+7_LhDqHMxxJBVHz{?M;~3fan3BjOD_~bne9HrvM_4`J`{?Z zxh+hP{L+56$)&MXnp{W0q|0H)7(<82#FJ~Fx8MPhsO!d+YTH`{h*XgP(t!5zEOqF; z${B46)P6lK?dzX;FSB5Gwx!*6{mD!xV{Y2CeA@MM=#;V13wC)yj3w7>W&&>B?K0EV764w~oq!mE$OC+hhp1I3m{ReG`NCO<=6EQs?=&L@UpG zasSObgo=92HXm@XHg#T+)xJ)$4dG*@i=XUW7xn&uo`n~_y$JyI-Y4G(t&B0h!=Te* zEEKd^t%><&`-{FS>v1;2c`=DqDPcsY2s*bhPxSII^Q)l3;D~n8 zH{#`Hvvv#sVmcsR*RPu&e7Pj_IK;;Gbe#@6`<+Pg@X5a}c@E@7ZQ|YC%qcC4W&Sz) zwMj!CKumv^!M1Jwcxy08>aY|qemXD``Lq7eD=p05_Epqi*2;Ma*0(2v$LN8FPUczT zW@grL0@1gJ;aq{1-VlkaS9C4+%pF=y2CWmncZ&`r*2IDkP?*^l*%^0RUea3aAchW~ z*#5BZK}ca1eW~Ipvm^Mq@zu(EZ&^UvP{reizWk4kc_cDhg<(v(?1u)6YGy09vrV?l zx^@g?Brka7y-VI6Zv%Cv#KPyzoF zK1y#c?*7|CVlLV$x$RiqG$$|e(GQl-KjQl#x@jIX`2$JJ15aW_y81v?E>;cjAKf?+ z#Tg;=5i@l;O)LIReKt*uDx57gXsuScn+UZ+0f;)J&VVn)!y{YFth7qlO?dP`eln*5BbJN4><<1GS|05A^ z1up=7Q+sPz#_XF=AkMSe^1FW5Zf-+JWt67X z+fey*6x=1E;#;nN8OTiFv|C69Jks9S87z+iC~)rSy?I{{cY@A)ReMy%=JvW^YcKsJ-NZWOy&%U%|{{f2>0-B?qN3qXVTP z^E;_Q9wD8L)pPD8S#+4TOHCwLVatm-57({xVRgloRx(g*I~%My#(E7hs;(wb*LOwv z8(JU?55dfKO>EyeQB8NZK)Z#JH3B5gMtxI1%KWnpPkOKfrRH#iWE&u_y{&nCH0E=pN$HU0BLSE?hD%M#zgmRcC56uFXMiH04E{VpliYF zLOIG{O{j&fx~Dhx;&{7qHM~$G&nMXj#q^0N$4bBu!=fSS*2FIDUCFZTW`;)L+`iC_ z1iu!}1o1c6(}2Ys6Irv(yRd{ONa6#-<(-?@D60Tcl5TJY647@}SFr)pG(mXH|<=3fnbs5IDfZL?JfG z(Kki#=HSbNd_h41zTwG&LOA}=TTF$g#%sVJN)+U%B$Km zvNq1=O$SBW&#*3jz!xeA!LwKRN^2tCSRlbgud^|+YLFv`n|_z3rmbOqd!Kz%3U5j3 z0E&T?;rR5=(M^oFgYEC#JN6#H;>Awpr#gow9TI!PugV!+2seMyG`tnV>&B+*SL5E2 zedbiXsmcT(Mr}iyhmCri?xv@*na|QKG2$8O6V$lX7tZk-YpW24X%q~urf}@41kBW+h>hk(dm!M9lL zDZBL(J1}1*GS_Q$(g_L}pIE|eM29Q?T`xFR#GA##ZR?YaRA#W-9z{&Q;JsL}fr{IX z=W_6hf(qB=iL;pcAK`h~Nb+1Xd1|ivcb%p1d>4^0>1h1Mu&axBz{3OcM+ZLtZlAJT z`E4b!o^KzYq;)7#nPx#$boLvtjeLy)ijBF*_KPwO-cV7o{q>Tx<018&hrkrfk$pSt z9+wFASSqZ;tMCpDNc9xTi5XjsR^JY@{8&r0;pD>1)$a$I3tN>y1EYuP}Nb)&X`}rht6X|}Z`kBko!g-?>Ct7Rm z?0}6Wiosvu5ZEK?Q%g~6zw>2s_DF0@?gS~q>W-Of@7dMzr>E;EdW2-Q^X#I=2YTpP zy}6MVWk%O89m#ZtVpMRW0?_m#)HY!&nFFF;6+-^++oe_Vdm)gbby{0&2s9`w}_=teP1hvPGCb&d!i@(P`-osR;FhU1J45yV|J_oWYN0Jr@m3;W{Wb+4jzsDN&LQE1fRws-sekfmk zF5%ETRvd_1EO zz?1iw$0uBC%WNwY9+wNR+{T;Jf#LNzgoK<>9(mng%q3(Y9wf+yD>}f9k%T%!J{@+H zIQ}<{1)tlYR}}k_wM$@AizXYX=t4@EdKm@enjDNMuYtUv!72n|@u2fIrPF8`Z)f(4((9FE(3Qg^0bD@;J(FnE zY^3d7_Z>3q`SMbATtW-i4*?6Thhy zG<)!e$Q&k>8@!U30_e@|(8D`(x$s$;VaB&dWpc=US}oY z;R}DF14`itU#4RO`%#RRqHk5!IPIB7+4y^zQahP`bR0!Oh>I>v@f>A4Sd%!jNwiae zMGwn}Kh#gZCiyKHfNUd=?W(J;8$eU-|Ekm1i6B=rKCp-`?#~58QM*W8M63$Q+S4n) z%$B|*y<_6sY6Y*E$-Vz>lR1-S`N;z3(82Ev-mRMN*OQb7q+oVmAI?z=%~r^|GCU+N z+N)LkxG(}f{+g1vi1%WjyI7q;jyQ2W;5SWi z2|5nQ(AHy7&9%qv#itZU<7@5#XL`6EOYB7IU)`rxp zx8&|EB`btE-CXT1R2pe17=>hjgc&R-1@hn7sSC^NojJGLHPY1dHIRMU>3mi-*fiQ< z|6ZNU;NZ*0ck%KDnR0Z0b6GEKZLl@eQDq%oS!Zbf1$Yq-{0Pn}V+P z?yBz|PZ^TE)NG5CE<=fOiom161oa}EO$inq{ke9S`nH0?GqE(@&!S|EDjW^E3WW;a zzyp&pDl0THUo-1emSFqvL4Df`{}8Wu7*eTW5>`7M(~{TH=ZP-K(h_A|DBoZjo0|rk z{%Y7uIXSOpw$r)r_=X@2r(;V?7XPN3F{OTUiP$?g5W8p@+Uwh4Fxk#jAb#j@MsB?m z(T(b8my&eecBGmqfmwKLo%KOb_>dHaL;E;=R*$HRXF>E08R{q$7knYjlIGCyZzw~a zb#Qu|!fBNxa1-Jj`88LK_^WsE5PkA_y0KDP=q8@uPKlT!Jw&gJ(yjb9(LEl#-F`6{ zQ#G15H}CJ#09HK>we~k_CJyoI9ak3O!ZU`RC6ZMh8sqH86)Id zJ|}#293}}A8~p2RX6TyGi*-F4&`vAtB9_tQJ ziIaH$LBL_d;Wsr)?W={PG3Jh>C11WAyKRvZoP^}-wp593@f204ge&D{v64$-?hh%0 z=krHaWWzrKJq@722(53~qZCIVq`yVfs1{&aGL$sXG6n8 zZZ_lnh;&r_WTF(Rz@C;$seX_=8yD%_+@-%hZ27>JSy=|hV%lDPSezW8+F>kGk^p%L zs|?ZZnI~UAY(#h=ZLg*!_u)pTV~%dBL9ys553at(`wpxaFThGE=N=Q}!$n|`chbuD zhhDohF{2dQ@K(MpDIZ~$cnA?L;pJw|p}z5rebH{<{zIU7i7V~JPZ-?KSjKC}YtddZ zXx3wW#Pc*>^QvDkygFw6=EvD#mG~>|6SdQoRZ zI2^`~Z7c^F6=$zL&$xCvIXF$60zrlQuILvz`NL>Zf?7@+I5Z?76=w%$6An`z-wFzH zDC>wipNQp&#+>G!zW1&LHN83fUBb|jhji-;c)|202jdP1=b?N*-T;k{Qn)4Z5|}b= zj>7op;>6GYoG%$lN#v;%+hZCVH=Z9ZwR%bC@Jh}IeOs{;0}6O)-|4qRBHiWUgI0+Y z)(AS>6MtXAS^t zlL9krV!fgx1nC9ZyL8bAZy^Zdp6UVFNiMBdwfR+meF5cA4gh+~RU<)(APo*Nl2FTH z$ir>?qj&=bRUxeY!{|Cm(-7+r z`zw-`^!~6uUw#{}pXu{>Cpo3_%rVi5qNn=7i@e^hXM?`b_(6%o7BHK86`Q%PbOB#COg7llMJ#J#Jd3 z7_!jQ_wHphpW=9{#rK)orE}K(>Q)95<-Q3~YcS0-kO$ho%bX7KF1(iCIqn6=!(ZX7 zGn`}=<>rH7Zh?}Ji7=C2q^nZ}Q@h0^g<(=$RSG2?*>Mq3rH%t#zhs!wq*j;~mq<}1 zLT0tB;*{5fVhZNNz|d_0y)<`cF1RWYFwC^mnM^Dou$r+RKQPxwNG_TxS>wR8o{9PD zrf^K~Q;up!x*^z_o4TP#{07dp#i3VXef_pzjgE$lN`oubJD2693K#MS_8PvpZW~mQ z{L~ETI=6Q{g+Et*l@~JWkY3ZCB`YT_DFf_cxyi-_?Ix#dp~K$>a;*hH!9)3hR_Be* zeV1Ggo78)t{-ReBrLEr&i>O03%pO4E?NcC0N%F?x zk@~w{K@wOq+{Epjr}vsEPpJ;cG{<@&U4p+zn#RpQPa~L+vyj`S*W26EK9p%_ciN*4 zvB)-vWB1-jzs#f;rTp`#mi2PG_5bW zNVN-a`mZ#ss0-F?bbFJBYb%B(7OfjX)YsS!S+J=1oAwpxq*s*Lx(6& z{ra0hxvZFVDMAso?*6dy@p|vt?Th(yj{ziFZb6HEZfZP zG?#=`CE?RVgn`f#KcGpNP%R9L3BqQEFk)Zd*b{`KC5LavwKZCE#D0clP$=9%V(+$c z;u=BS4-+c((x+(#eIUB)d3i&=En?bO!c}`!_JjBL@qlifSP?<+w5=anm-_JOm|R)_ zfG9sWG%)cEkA;(ukNu z>Xy)PnngE>2;x{YoubX)F zP#zN5Yn(@of=dNCVum}+#e*72_a@^h`gnyVi3uS(eYfM;rA||3medTGM#Rw0mMLZY zGOk<-qg=N>GP$UqzVDMcgjgzGyw zGy>7WJcd2ZF+GsI0u1DBEX7Y=KtlO~G9Xv#~k6hg2aKBar+1)%aMh5H z^DUvw@(MEQTe;z2!s``5vgK-Gm*aI;GD~UnkAR4Rf%c$`>nCLCserKZqQG8Gz~dXC zF_McW-1Z&7__sX=8J)E7@Tx*5rvMb}`|rSkA&H3Sr=Qx!rBAG)1tk$EZ0@oF4OF=ONe6hPAAC@3iOXrQ^Ws1T8w>txP*&4}tObSIXS4I3>rx&QY?( znad&0$6phz*4!R^UD~5rF8xKoW#qMMf!u=dex&n$vE-{wf!F0Aw2?+ zf926>_@(<@!E^!5A2B`8an0E*&0K%W_j!poz>20d4AKuCTpP`PJDmYqFZ)lkN4~6mIE9MQ>$W8A{2Cuk)N6XD z;dONAXA*p-slM=%t-?YordSH~1rIzlWDbfm7Gg2~H!A<~8!41zSQonDU{rFq<2goq z>GoK^d|g<%mP2{q-!{B8j|c4x#B6E$LA7WszsYo_Hm9%zM49x0}p`b9wx0WP6?%HFpxh-X?N39!ALcvB|@}xo7Ox4=f{3^3oq|MQ!XOgfo{D z7GvG4w^I=vD+zRL=oI~}Gjar)9%hAbvqpE^jDm>^kAqfpqFJR{^P{y2T>dfPbvS*^f+a** z&YcDp?F-|g@oAac?6N?DA)oTA)|c}l)%SP5q!b5k3x*~nm$&hGC$SEp4{dQY&RfZt zf*gwW-1ZN8Phma;hj;43^DL@VS&`U7q?NqNpJ1)v{+_^Itu=8#)_cls2CuyI+S|#- z!iwX*Fk}>`b<fg?swg2uLE7cxz zbru2;_iL*6-KHO|Gx+)T%M8L5R&S!jkq+I71*BK-4C?aHvjDAwre@jOH9=~viGK{J zpU@&WjU+~>VDBKo`ag%U8mc`qlZmQW7n*dbKlnz6GNeWk$w;bsD z9@YI3<1i`9rIjTM>3;y}^qmcLPd5Y?(q5@+UOHO^gcDX!JLF_nr(76V=MCTfepoU* z3IOQm*o_HTQYu6||Kn$G5$#wJ}4TzVi&X z9c(G15YxY~U(4R-3Co=L^f%#4GwE&Yrjv)w!fua%t%*@fnt`~tJr7HR6sHjTD-SyE z1fpG9w|D}YPT{@)$-kjdhdLQbSVl1P$qla6Q-rm9F~|-5%=7wi15~S6?P23)-g2b} z4a8PG|KbycRO?r&2=RKZe{Y6K8Jny z;LehZg(4`pylOjVtg%Ah&+Z^>iKqDRON)D#cSD+6E_uhCIK$z&xU4G=89!lZf7{|9 zPoKnvy7KA7Os~y`s$wpMfN^AxNNqC?gvX3aD7t=(tXFv?8Qgz;w?T!#@Pp)7Bintx zh(!6PE*+11V@vr7ILvU&>71eVh)=FWL71tnJN-i36Bc%toFqTnclrHj5L95-xryjk z`#$9!ZoHr1->m9;nVSp-_g_}$aY4TIw?#|FEiv!;T(#qP!1|8ocIwB@hKSyR<|>if zTPAgXVs`bOJXN#gX4?sFJEC!9IDPrf!uBkyq$A%jKH03w8LZOXNps@g{*a42v8%82 zw|#Hr9&XZ7H{X3k2F)-#unD%|CmzE_58vT;Hx*|tv|f9mt-!zvKt?&on-ePXUP`0s zCrOsmMAvQG`R9z3U=Q=T!@o!Kg;9$5JA3Gb2U^)IomZ~NrykM5SirL6v$-u1PU`JdX zo06wPvmZ-)8kKU>{}K!qxR zQuMV0Cg;!Xo`>T8RIGTMF)(jG6f97{7F*9Ioc8&fK?6m+jpE$WwWi7EQ$7CLan!mW zebeGhp(1YJM}sH6%id;FlfTxvgOTi0Thc>mv=n@<90Q5}VLqa=ReV%OaW3>(C@n@a zKlUx}>b(UHbJoZ1#sny@f7sbkZz6VzSFAVhZ^HN=D#1P0lx_q7pS}r!H4*HvDe#14DWn=U>7pGG40L9pT)I819sxl?RI{+4*FB0YM47J1Z%78p!@v7 zV?4IZrHZ%pgMXH5X3cUBC3)XR(To5=B|p#g&5<4KJd`datRwRd^bFQ#ebN9hFF_(p0Iy&?(z8l=N;ib$7!c#OnH1|vI z(nrhRtG~y0V1$C2Or5GG<{vgmS56w{%%C`2U7Ebw!L%qO?}qVA(na7vQZGS}AIc(tTQ|>_g(GCCJyFQ?@4~4&o{z=}e(V(C zG9gcw<3Hi%q~1o5lBq5@0!cmQG^=jGIX~j-*W2+PlI-4+twCQrWK7>8RekLnn(4-Q z>H>~06!wdYpm!8o)1Y%V)u?$(FH1@e8EBm6-&_WxM9XFJ(=Ud1xF)sav=JQ!OwBe%b@0QOWGGgn;u5?zeOS0 zh$t^qpd8kVVYiyN@p6|4{<4(a9C@4K6|F9tsXo5^%0!A7=$=P)NEFOaIg&3^!Dc3i z1~SrJeby0RIn8_sy3JQ5vD!?P>XWyb`O8prxf;{~ERovqyydKAuGTU_b>ZK^#I0)k z*2R0f^Ru9w9sS#hhL$M`lS<$~?YpnX0?&w`NX<3=^+WK{;JhHNl4Gct3P9zb#2Mlb ztP$*|1VsC3Qs4n)FLX`qsjuI#ilo=JG3q@>qb4e~^!pQg8!{69r0A&ERFL324~zNH z+~v8k+Z@nKNc4#G%0fl$Ua{Si*J^uY8$Dp;y)C5k4%5NL7_-DdWki3|^HdqLTG&d; zlERtba(-SGV@RCEwJ2XYa=>Q{A@EiW2q4DxZ%)DJ``t@o-UjkPMl&>F=rE}qjg)ya z&AYFzDj1e9?)dnRx;!?2lT``VrgU=g>T(0}17qbsE?ILHVhPYS`ba+Q3no_S;;=}e zLlu3w*t+Q~E=4ZC$SL`YhI#2s%c+u$pUT)Z8{0^0Hd{L}tPZ8+2-C~ESenp330iIv(ed2$opXNDjtQ_^qt^~WZ zC4}6qldb+q^abf8DxgwkeJT8K*q!+ORbT1*7H#*0jWz1hEO*=u45I~PM#7@#M2%WE zMj#0(;p*tSP-ezJW|2Lsq36lhs)ckh>w78tdZJf3SVr|X%+{**0mlz#9dta|MnVmnr7?RwgSr=2lSg-C5Zp`9tq8joRW9(q_e&Kb9_NG^4cur5p*@TBJniZAB+P!lr$`Q@Y6-}L2G12@?S5xB6tGFQsSS(suF zfO43J*~Nk6EKbM8_auU#u@LXufdaF}-^4>z?&eowgpZ6SnW@d;ADpeIiLmg@d?7p4 zR?}UHDpHw^CX#EvX!OrxDj&%a5|oM*xvqir%jkoVHN7!HHglDj$oNRebvF%Tpvg3J z8o-{(G!g>y#x3NvSw?0B4|Y7c$G9z>=BSs>pkdjgy!Lj(;{ODxOn-1Z@VIasC_{`` z=SX*o0tcc&!7KY_o_s%$BJCR%MhD3E?xf$&mY@57A7J!T?Gj9>}URzBLBd*5*xJ1Crna4EL3oe2tXfXKTQ*1!XU*U#~ve&%o zr-5CTghgQpjrHln37Kjss^GKf@z$CZ=gG z9bu#T%Yr+RZ}kAg4*eqj^IOR6N%uQAj2E)SA_<-vqOKi|1{OfsjN765Eo$uE} z35WdN4(IX4w*of#7WrB<@E8OgszCHF<{CPJt8c*wKk9jiSnu<#nhDuRX4bbNxr1)j z3ZD;`!_+(BiPxe{(3b8c^Ir7NL%3(wf7)qYXe>=%GUhIS?Z6lPT^jG&)^z<-nVvV7 zFAAoYOBZMV%2mrqD~*PH+*jm_bm)49)Ze7-K^@f^!=~)G9AmQD&&_D9Jn{ZA3trc& zkxbQ}cP8|%WRjMK97Q9W~-qFa~!kt3)1JZFa& zK)hYFNE7U-qw15`S*YDS)NtIPa6+konJR+e7U)U3D#|R&^CeUEh0ai--nVzvlPz$Y z0hyL?E8z|?=Wat1qByr#%0u5jSVdg%G%{(j7A~Cv%iPE}J|{JiR{7(f!WGIDifvNs zzw7XXWms$g+}2+J(phTnygPFv`&qI<_d?K8(=wR4oyXN3bx)>i9q`U$JJg$bv zzX_IlkVLIf_uC>fXh(&$&TN)~znh4*4X1zu=@k#qepIIW$uXH^_TFQwxW4*rR6t@W z`Q>jCyO**qN|U#z;fb<8PB~J1u&lRuQ#00ZGyE{qlLrpG4iT2r135wOE$%k6PA>pC z5030;0RlEj)uIsbSu!BfN(e$dH7Ztx+YS^2=LaWb1lJyFu<}q^H^Q)y%D0n2|ORqZ#l51Fn4({lXVY^ zq8ny&YI9E*!hw*7C8{!WG5j?oxE-JB8vAx<;MJnZ*(uvI_glhN5_?5>+R`*4<9Cob z%QLegR3&sVZGWDmLd2%Fc#h8O_rvbLF6Da78iA$;(rTVzDs+kI;ft?oDatAdpolOE z${zPgYwbzw1R3*3pNjZI<1+;Z=mBk&cekc?>m_HfjhI`kqNsdRy!5~Ldw;x^w~8#E z^0+{>g)V_{kL zF<-6jS9b*QutVtSBb}Gz!=GCnUj3AS@YHn3Y%>NxgoJ91P zxgv5gMe64A;7M{Gb!oOtggtzy+6uSAwiBkJ99mht>77Sy_%bhkDs~V0g?$N^C6S`? zXZNaFvnzMbNXAf2b}3~<{Q#Z42dv@fD=9=Lt|Rff)6c`>a(k`0eZ| z@(_rFMhed(^PTR1(&ZX&mtgh7lNn{O@Vr;eQ3*bhWfsFMAxHPWN>clqe|lrzQ{;HX z+#LLMNmJP6P&lO7awLE*yxL>@%j8jtv5?zqLXanqgtD8*3WQcOXZpHM=uvOUxvNuZI6Sq+pLaW zKXchvaMzPe)9LUW;N5QFU6JS~0YrX;tftuW2Wc9F&skU4G8$L1g-yrUtQSI3Al$Pn zu;`6p8yPIY?F!XT)|!MLJ0jocVw?`Cy9|hMu6bS5>-LZYDi-IwC+mI!iAc$9uB-5KG$P}jm7t(YbJGKXVTmfryFu znsD+sC~nECjUne7-x}$=ii~$@uC`0@`ZbGjgDGE;Qw}a>0xrW3I)?oHp(Uf5vBcW( zl1`~{p@*zGS|8t)74qG`%Ndl$*B-v~a_GsFQRBq$!sO>Z&sOh|<7DsSNjt|1RW>GY zE9kGCIGw4}Z*K1iJ8N^B-E25M2(LEbybN)4C`?jaAU_}1FrYfeSdnO_1v81-X-LKnR$|h(kyIqX zARYy?+&Za{{0!6UUwP;|>ift2w7IzD!4j!+X2wC!aRFjeLt;-}OXwoz%a86wajrO@ z4YZMs0(h;LndQSVDu(7JgXd9vVJ`mIK|cYTHsTX$8LIAgUe-SuyF>i@&iPMu?{lB? zVh7V!-4y--wy;O&3|0uAboWhtg|yHXXqu#d-e{?6nRyRA7gBpB4}l+K4?pJ`C;#R! zC1t&XW#_&qF}uv@dIpzv|a|BR+*R9%)1m|taWb53S zm>1t089Yikf3Th)C$hTm%ld^|J@>lsxk+(JrHT=4Tdh%1wew?!mYHH8{g@u}UxDNgmE{rN4oQArUn>jf!H49i;gqjJ~X)&{C|IA2A*EvpyYrvx)Q&+U#GZargRHj;Jg&agGypY=A+2PxH zqnp~jpwmL};dnJCip=gdX`>IR%K9mVH&w~0zo;F{HM052$1vin@yfN$6SIDqR_!lb zbA7r=e+d2j%zp*guY>aB;CB5=diHOBv`}|tn;V2OG8%S}w%ceU*BWmBY4qZoaNp&x zM&W}&Dc{MZps!6vqWok1V~voU9XD641g(=1vaXOmR#`{Rki;dI5@pVT)oL29Fk8S zaLo4ZJ(0PH6AM2nO`$^91 zmUAB+5`CEkNOv=g5O)eC)mz-dSB{Eo!&ij&jDkv-S=sSWNh#rn7-F+U$_P117B_Ma z0xL13ssVM=2d?%sIGqbT!Jpy1wRjq@*svRe=MMkpbw!>o}c*mma(QmUi`h$floGu+1a*t6HMzP3vl= zm~h4>l219d8&|J>4@T1!i!;#B{q;q8meJ=^DI^;LR_LiMlRJVm*_T}bT4kCc{mSiY zo{C$qI`7HRCLED~%4kqYjXPFPw4i(X(=yR`x?+9vxE(j}?PvLQA{EToiOdkxS^L^^ zg*qpp7W^-|u>HB#(xhojjCS-ld;oy*n_>;?AM3!&qN!I;ER}LPoEEgz2jQCvHnQrO z0@BRzdSS6Zq2Vg3t*X&+fN!GSluTNH&K}9xy-Q_~`?2Vz?)(mT2hn(l>4Ohpqp2!? zyVTTw?(N*YnB;rVL83vOvfEC>FP<;Hu{wp}@PyA2&#!2N6h#0NLD+9IFFXdf_R9L@ zj~om&;=W~oeC1FR(NGd+w5`4W z!_BYsBe(x!DO}JsB}^Q@3zm8fTN%0jb~bAH!sm`n$afyX5ZROT{9Y^F8o1)5H45jZ zYCL>>wKr+b=u728cp?*6v19Y1wzq}5%V!h!215z zBB28HzyI?|4UpVZSn>a!;>HU3_7D{;SVby2e}g5@6av`wD+b+v#`-@O`M>$e{Q=Pa zgd4=;fK<(K;ZY458Agn$4l!t+DfAOHIwnD7G>MEE!s zl!^Xtgzb<3i13fhMTh@~pYVVCnJMWW%IPsc;zRzeTLMtp4x5=A#vsY%-&qP?!D=kIOyl^ zf4l+BV0t_2eF6iz9n+fmFb*s6&2s;8RB(RFrIVh9fzn}`z}ZtM=9Bj|xbxF>{!Q81 zJAvQ81v&%XVrZ3|mpJOQ$w9kQ$4mHv&S{HFih&V(zXqH`Hya|f86oj@tg!K>Cq+- z=Y%vmxoI4gG6>(xq(B(CV-*8+vFZ_ zvSt9Ei&2%Mw#O&G;?jJt&jPcfU9A9$$@%^6L=JA<)$w#y#?_{E=aE~to{97L>vlZh ze&DA-9-l5ADLw#NzMCkEj2J=OecJP(V3qhuFI(2>uf69RWf^uglUBd+N3SW-fv3ea z_r+m>)NhXmPdF>&Yq}qJBJ!1fA8pzDieGdIH)}bW-&~)xSl{ES*3^=m&#mfZPC=v> z)5Q6I18GpAH$YkJVJ<+dF_>WLu?GAq+FWnO%X|bcfJ4IQD?pE&uPYdNf*|_7{L%Ka zTbK_)%(vO=cgAO8Gu zzRs{ESo#}svCMnRw(}rQ65Q?5>N53T7is*AoIDbIHM+#dBL!zHU|s`rr`*5t--KU{6vqP!=_UTV8$5hQc+ zkp*#bzNbC?hKUpC_d%{VgijW1m^}5^#weo7#LniW(at$BXhMlQyF^p%+y9}q|Y4XD+8 z6exG-_znF{!&c4fo2!#SV6zaq7@vO9xB>%k?5i5ET+AmL8Iq6X=$sZ~6l;zxLL}F6 zSWtTUx@N>{>nLnf=6e+Z%vZN6ht8kxdKNWLx=$! z)Cp!kfN&=Fx@j{?0ru%r^&?pRR{Q;+p2`(DAxU5+VugAhEQ_o8DR0=*w*qKF@iM@v zi-QU8cw|TU@HQL)R0kS!ggP!1Q%O6%Hy3d^)c`>khK-H?&c$Ye)!O!lTzI0M)JLM( z=w9E!rsF8)8ybx(@o%knF-79^fxijG0B`l(jSyTz?~(TKO#ok~3=IKhUn5gY4ZRC4 zty^se-oaApk!bmf)w?iUoRoZda3(6LZ#!jc-@bdcVS4hljYqbQ9Bu?GT3!@Cq70a` zd`Cj9y4}R*3OrU`1tPO=U-vLXkunf$B3YN45A(PMt@`P!A3_PC{>{sRWfgrkqKs&W zE7o&W@Xn-|lajUYzBTbVTdFCB*eO{2?nCV!o4f4<_6MpKzR7eP?3lGb&BM(F?#)fs zJ;<_BQB)|MR2 zZtXYx>`_|!Fj)aK_={L8>_hFDqOCDb&paVFH1G&9G8jym2B_>S!tHyVS=h*6luOAexCv_wYojv@-?s0eN7p_qx#@d;9eahcka3=>o90t6 zmSVK*Sr48X6AP}FcbR%wpl3r)D(S|qsPO1$1e+yQ(@B<2GoLWynG@7gcqaM?>=x#- zO4Z;6VsBqa8aBX6yAFT!)uuZ6$2b+8G?oNh(JQzh^_@x$FgvNu+lY5}lWYMVC~vm9 zSry#w>X7ZJ>DXtAa9+I8Vm9&G>r|i&Xy}Yy>ZJM#xvK@s&6M)}xdE4DNTv7F#`VY0 z*7F5UGijyK;bVOedB%1YY3~4%yDt(QlB7dJWvlz61L@{7{qfoPRDQBgY2D*2m*!Y# zJP3%fc%&KNe#7%l834?Ej~YtyO1j#QfsYa^Uk0CLmVCixWqA+nr{y>4Rn=%vNm;5ya9 zHr?m-Ynt>eOI-FZMa@mXOHGpBS3Y#?HbD}k$tQuR?HOa8PAQQOFDvfwD%)|QHr9)w+Ea2Z*y20V!l89HWdMaO&tlU`8UKs50 zkGt0{c$ffzrw6z^-s2XPU_luoSt^C-K1iBaLUkr16?vD$^9mR2s8q0_cX(aN9Rr>| zCb%m#=xt|f3_sM+(S=_-3$3|@G1iOmLts=3B%}?>17@PWb`w` zdp9x(x%5DN9!e6Z7w=arIf{c!-L%by3|Ie4smtAVik-draBkDHX9*K8xEm-cIi&(< zYph3)vzzHjxefGO?_mV>J*pIw1vo5g+CdYjuRAiym zoHB>Q#5rrM(r+uue%Po#)rv`*I(MYnp3cV$>&f;q1w}+0IdYoY@Eo=m0g0jpb7s82 zbEyEI>Z~TkzPVm7gLqDxZ7>p;PtiXIuQc>krm>thbiq9M8ZY(M&P*>$hOu!l4;^bM zMW~Kdk1Y0eT*hUtNP4{su>PsQcF!%TAasJQsfs95b@|;=u}6v`<3bHaV!#qfuT=IJ~QKxNp;H+KNq&iso@3g+tbE48bNJn?uLt3esGj zO@CsSU8szx$0;6&hpoL5H9)Ig;gv|HJ8PJ|dh_mq#-VY58q{D)`1=m%ZqsrVs0V%t z>eA7h8M3|kbbUX*tsHB}KEL#IS-7SC?eeCJ3EUL*f9`n?0Shog z54WdsdM)RK7+jjxv6$4SRw;HhBaMfZIV!A2$?rOUIuR=vP9T!#!qo-aJ+;K#39!M1 zgTB;B-mDrO=zbf8y$%z)t4IlS%&$i2G(rQhLsXDOK%2=>|Cz0CixM1}M!W?ole;FQ znr>s0@&l>C`(CJIjFi~PQpXQd2*c~1+KX;ND0=5>6~jRD3y^}!Zr`1$q_pO?^Llw{mtQ; z<_oM(@eEPMTVZEdb(n~6Wc1&+Lp4=cz1)OMS@~_o3btoSy}im1Qd&H2=IBeJA9qWM z2DVIgJoJYjR&DuAYM5SnD){iO>(sctkSE6JFACE1-L5OmQRUHa#MMBsu+3d8cq&m; z@;uOiPI?tt9jZ`N+IW>!bWs8?SJ(BjjR;(XJ?w0M1=lmpa}pSq-OHh2aDrs@u%?2( z4YY+9C(<>^!;sGl4U&(2FJx%hERx!`0YQ;jvM^;QP?RS(1w%ePR_+RaPut|bfa`4s zjb>nzBRh%>oZS}4prza7Ov{&Zi6U>^L*nIFey{5R5ZOdin5eD+#qh?Nn9o~upR0}M zwX#~H_?7~%%Vcl9Pu(m`4XhreGxzN^CDi^(oKlZlT4T>3%p{n6a-%fXmGTYbyE=zw z$afTSVHN_}_WeCRWdzwRN;eiQSD@2oJo-I)yi77c$x}qvordj1CIxX;v>`Sh1rEYr zA}6!HzI_adDY10-aEOVLo}r=%E72mX&h~SCK2JI%^vzw)?z#AZFWD8vpq)2BuVQge z2R8AANyQOvJX{>BB`Nu%iN^j3=^`oz>pK{zvG{a@I*z1RI7JEURcCKQ-IY~=KU#}- z+g{CW*baqt>M%qc&eN|f@1u!kWNi9b*bvunhIRZ&2o0_r1iz3GmHJW6o6kUSsT!@F zY_7x??k_s2b5(0>ESCFy2(@0%1AAArwo|^he`i6jSeh`j6rQ3hXxaK}dGp0KmQ=;d z`#;zMi6c1f)d!3WqDL9QYi2K(h3!TInDAsr;1T6M2W?FbQIPrM=j~L3m!0D7e=ZjC z+s=W}oN50>Ta&y$Uzi8{WTw3&jxJmP&O3-}!H{Ft#PjN2*-Lm9!X2Vvk-QT{J1*vo zgY&v$#`d{yLPU=kFej)4OwA)B@1yZ>Eop>3f#j*#Txl(kU8}$KgN$FPjc;gwHVauk z+x6MQ=bInI)dX=dh*bay5M3;ZK$r)7pjo<0oc2Mx+je>t`uufO$L{33-<(4!yB(S1Lbv1mRllEdB)?H8o0xTX0$2DYQpJGOfOOXc3WTYIam{3K9Fvicc z#QWtn888~Tggk%!J4kx5E0j4<5K%v_q7x^9$e&x{0tXL>sWn!dJMZiF^Nh^CZO03w z3-hP&wm3-IO^LSW_MGf9;DXCQ+BJ2vs+Stu@x@DilCRjDO-(c>pe)ilC2?&bOiAcT zy)(&Ax}dS0^_J`9{Bn8@!Wc}AP(2RkJda6b4R-75Z|{~6C>IK(JsJFTzwD5M-;&7X zd)qg&%K*AT@)auF2#9OUYJ?FB_g<51VXtTB#;6QYr56`v^Dtg)HjZk1Fl`DjZ8fNYMw5Q zux1{63ddsBZWE#0<;;k4?(Gr9UF->EN&mUu12iJ@22}AqJvOOc$c~BbLiaf2!07QX z;VuJ*gDib68(k^L3@%qKaYrFMI`8_pYAx-(7O0FG8mpkTIeL^VW*D&4mbZx3wfFmz z_i{LrS2&j_zB`a9yeFzgl{Sp%u(@7QKk zq=9g(c5OaLBS}sBX@5mLB_uy-CMl4z5PcG#VDI2++oGH zse){y_m7JS_O=YpAFXOQ{Af?c^d#{1K+mDv`U#9SCo4xrE5p=nA79wr392xiJw!W9-zs2tIWJUIlV=A*I7c#h6e?C$vgj`F zGdxtCR9tCamTn^%yV6rBXNtH^6lb4$>vJ(T8qNQwq@VUP5mT z%g}Fz*sX}b$#U-u+K3dl4^UO|><>?Ve2t?n)mDhj?QH+{kEoejM4SAKm9)&{>zziL z6}ds4{Wt!2LaZ)F2X8Jx4Z@bf8S=Z6m76;N#<2K+$j_a;v2v7FxabH77bzX!-(+9D z=TQt1G`&2?j3L`0=;S_$=!kX%372f(R3oWFIwO*+`5rFklcgX_QZ;7h{rN#APNJnq za`+WwfDkcfWpmK2y6lY9p`4YlrTk{K+UeFQuHE@oKQE`N7P|%FH}X@fc^)y;d+|Mz z<~Y_q$v$=Gz1N795Q7^KsqC<)t{obrrF;$UefFbZ8)jUyCpp^n-jkZ**Z1h9J~sF& z8U5&N3V~sd%SiQ^;ibbfT3R$8OQsx&*NbWTw{oGDd-#)=eh>kkgbo)9*xj6KyJo3v z%Ju~4KJN9I5vwMy+894?Zsg!?I&*Ap$_!c@W+eO#OmOgLo8r0C7fBJc>Wa0CVWHM~ z0?b0bRvCN%4%xf+EseRXlYCXDqC$Imu4oZw z2+Kvh|H7^9XgT+-4~uom7ZUL{V0XTKp18By35QxIpJP*>)_g!@%zm{I)OKEG#LGs# z+|mi>9C4BEq|Lv#?o@|sn6_#Ij54GZ(T1a+y6`-(aeKh8{O(YigTu`GnKPcDsp$6T z(V1d>#C&GhDG_6H^zlsfuS9oUUFE7#-euk{-hfh`oglHzM!I6;FYUB_;Ue#VXC3bY z3E3kFpJ{2L^HNx;@ED(4TVi0cTjL_SM4jJE)__V}nF40g6`6X-k!p^P zvj*9kWlx(;2P(g*u5uxD+ z)iLcZ!82LNnwOKDx2HIZKYx8?l#TczpZbva&Xxl58Psx0qIU1>;%j<EK9ih#j{FePk%Mn;aqDKJc2Yi!Wj_!G4sbX|m}R={_p*dbLG6?0 zk)B!?L;|_(SbU^JiyyR1eB=!pBd?gzX)4B)!=>2K8^!EM@6fOv?3T0-;2J{gG~es> zrYt!aC-}%GoOm3cRbiyhT5Y(Q@a?yD;NqwHqh*B=zKI?7mf)BnNDzv}+@U!5_@W`O zpW%1WPEcU_JoSEG4|ITOEUyW3lS&a_Va@LL0sIP4K(Oau?gx6Q`W4uLgeQ?5FZMgT zZmtg;DUHGXdeZOSBZEygeoJ?Mddz)qVhpT2DY-)(ZDHaSAxha4-MFCrIu#HV*Q=xel=vz->H<~$@^Nk{`11lYrS zJ>49=!A+TV)!^5a=?QhUJ` z+Bq)|+VR%ZU2Xix0P^lrn*$uhl{-=`?~twd7R)%(%f<>tV!DRu)vsC121{o5_3R2< z($59Yu$(q%)tbdW@;d$kUY}`P{a5uGO zm%Q8WPNcZ~JzAl;uDh4jXNF!@k#C+GXU|`YGN^E5po|I&;ZifFxwwX2 zRGW%C{Ra$CTP{Y8d$^*(BeQU=-teyzBNAo|G;lzEMIB7k((mrKAEHi|^_nPv35mExF0}^)w-wzzl??QIL^*Za0 z1;lZe9&rF0K==8~Y>&mEKGyPqX!pg!OC3afuMerF6X!QW%oQQTTiBK!^^^WRk{k#_ zE0BKJWTQ46*KIM2Ml7dw+6aC1_!M>Y67Lust{(Sl+9AcE?e^hPRgja4wCHF8ch@$d5E8h{cEZIpufw!Xr3no7o{x+R$dL3f%QNZ#AdMm^)7Z=}2DI`!G z!U-i$a^L`8g|_So<~ko@Q>6VK}<8#&i~MKt@JQ zXPkC@IK(I>gS`LveMmP;eI;9u@TbG8pK(5j=>xB0{0`#+)F!%lbWf2 zp>OaB`Dj}bjG>~7NrH+Y!Qrj6+c88k`&;M$hAhSV{Wy!dduXxjU8j&(X1p zsUK!6~o4-ArgEx3{{C51z zfFbjq6+HkwFS6h&$bm1iM&?Y~=UW>~vw+MgMCIdfa+@jtTOTf=mr ze7Q3ix^evI$kRvF9(wWH$$dIBV@e$)8?&FVQE?$=|JBDC4ImT!F@d826SwEa0=9dd z;>ap%$mPiDHxy{+k3O#|Bxf@)!8w5|`gj!(=jLJsr-|iH3sF>lB^vPohto534_?a` z9hL5)UUeA!mYp%rBrTj+d9XU$Cdoey>->gVKE>k-A?HG>p=%h7$Z2-vTpp;n9Zxor z0bP&7yzkBMu#Bj}N)J^oPeK-ach8C-sJ!&L1EGaD zL~9dXpJ^dtcUX7D=JA_T3%fL}TdXZw#=O1x zp|lZ})6k~FAkLc^z1~EEveC7f#@1pLo7S-}dzmm(bhti5obX0%{QHym6QmfuPHS|a zY`Rq2`EUpqiIk|FFf3zFnWD0h+*x?XS@tB$#_A0*X+REHvxRxc3H$Zqtp19aEQCJ{ z_VC0m%PHA$fONgd)TmqP$+zW|jd;wQmyoo{@KcX&akQh@phz0d+o?vH>L={g7pk|l zj*O@9SsxQlW$W0Pb{0_wd8ghiJFtJ77GZ*qZokCCS0ua)Ktgz7wi&nX9W-_{b!BI`)wj^o1xd9Y^kC zXk_Z#*zD?0-2aFYlP#sW2m{7Bd$ta=S-zN1rjbRy)zW|E@q^u2P1rkR!WUnxG8os; zE=pet>;)g0hr;Rls4P&$V?DRWRE2?Pk%!qsL%Af1-H*uzz?PRAtmsrEeRw9k@G+yW zCu(H>Y@gv1NW?DPO(|eVchXfdJwLZIw@GLPCv-FTnW?xY&

x6C^NjF2J3m(Je<2+;640kKUYK6*|wB&(X9( zc+|DKqnGYGn-Y=k43a*nxCCS)M}E6Kf&?_e75hTGK?9r&iu~$cxNmBx*g?gr?VgG| z?R6I7N2Xw>;&tY^+lpx&@PUhDA&$?Imy&45EjKQ)k$bvWV6|_5WYMKeWP{1%CSCW4 ztSfF1>V+yHl*X7f8S8gnN4<}Kd^4?W_kBVfj>A+vyjeA9+CRKTW56kR<0W|B^$9*( ziQUPL@pAcEUg)Qpe_M{Ab3uTFMF)8{kq4Ws|0MKObDcf6q-j7>c0#+09=`t~9V>TT ziK9)M_!afuRi3&3H6HvHtRj$=d*jBzp-t`$DBUSY8axZ}6f!Vkf_kyyU+=V_<~wvM zJ@V-)KfV?zO+@YeN6xk^*6d%>kQx5R@bvfbqJ`JPtBwWKx=i572gWrhEN9M|5m|6< zfe{a1?>~}USMlVvH1vR-6V68E6Lffe_K!u(2$|aEOXQ4Bo{s5gbb3czH-nnhv$*JC zrRP9fPpS4iG!*#yWxUsyH-BgVGP$_q%+}Bk7=dqfMLaD?8B`(IbAkJQqIA&)A{UN* zP)i5Xq8w>Ozca4>xU?X4IZJQ4h>1HG7~H674O(v>`~?OCl-Be9+{QD!zV?I(gK(4JrnGM~!oqn#>C9HY+jj3SPxGIhRdepA z!@5s#@vVBym4xTgQG+yo_Mu^*;z`+U2XD7%^;P3u8r-JJ7phXh;^T_<|@zai#{t7P!GBPrsG4j3o zkujKVu`{}jS6rFay&ZZwZE@BWmSa>RugwHWE_XdoyKx|ug113(7r=YRI6%hl`+c_2c#nI@UJ!7Wl0+%w~$f1+I z8dOSEm$i`zSN#@30CnVMHQPw@xXwZ0`*GytvsrcxqruN{b@SI2wxiHT+p zbjV9NZ}p1|Ax4I_1qmfo!E)HDHb&nkx6U;zYb%IBucR1K2-gX2x+7wpi~H+nTi!a6 zDAPoVChYX>xQoOrtDR>mVGM+QMm%rNr=(`;mCLJ&T&}juS3=xYuO2nkkQ_27XNK-4 z1dO|=%#U{|%cC|X;@@4k8RP0foMjryYZqS6>DN_2R(Q28$I1D~gJ<7-OW*T-|LxFG zMxuhZSaV`mpqG86IVuXYD)HEE47hV@sGHf3dI3I)Y~Z;=H@!eW|F~^cXGE16PhS;{m`g=(WIDwhppCA3GZ5GVQ6qY$d_5iNO z-${h6oR@xhZP#(#?Jg&cIu!FED=W{Vr2VwHX4Xfmzea9%y@g_uv7jw0OV?q9l>r@x zuqW!OMc3w_tpEs(_BTb4%0Ezru-k3u79@kX!Pt$uJQ&2t;g(<_w84QFhMj+;yU7S_ z0$g8#wi5eHVZ%;ygXX6LwInxGoCamwfx9mJw20!8%_gc0Aar9D+}PV-v2!0?n*rm%1-)iTh>$wOEgI^p_s)%K0v-^3|eLr3i9n@0hAS-&T(lEmk6oz zxfUpw$Z({a?<28r+nFILz|q~|LiEQI`%mFVsjy$c+Y0q+lhR)ANHe`W$8z-ab_Y^q zW(OwT@u1rnX|f<9Zsq25H~JiJ&)-*XYLj^87Ve80Q4#npcgv5(O4o~kc}2yU5dDnc_k89x{bIGKX@m8>EUf(TPTq z&U-F-rE1t*43Sl6CBn-)f8LK6_pd9X;Qh|NR63>3KcB+;WxE~(=d0gqW6dcbIGsYO zSqRr@;Bq3$61(5-C@>OJwSK?;#KFaHc~nf$`eHWUJPS|W^lf$igLXsW;CHq;A4et6 z)}MmrdwfzPIp7pS-wRtl6?Fg`IY-H%DUsX?^#Wc*FG5p^PdluQ_phUrHE1tHV5i3| z;xB9M{WFQP$X62Sv#cLETYwd|A5EF7&QSgU%;@mX0nW{ssDSqFSA|BIO#FsI%O1ab z2lr73Mv2RPs2Y*%BNbG3t+t!M<(YBWFo$R8?;%`=Nl6MZ?pC|>+?t&>-Cx6d4_nEj zQ~|X<1TI8Wbr_*S;xC5LCvv0E@{dE)5=8N-q#KJ?qYE+a0EB=S>Ozb9V1)Vi;{RevZi5ed`s!s@LX^s&fg<`lzt)N6*Ni&DW*;(F$ZNud2cS8VSK z{j3`dYAs~yll^_B`OoE_%Tz}qJ_AX?Q$<=o9h2Hm%!RBvO}i8vplm*tOk&2zM&$9~ zvxR~gl>X0(tIBU-Wbez-{QEPxa6a~w@}##k5WU^l6AZM*pFHZ+K)|L@AlXIT9fxNx zya=W+u`MT;d;JXIl#L>4PaUQr+y5T^%FPyU(@cxtCSLuzmquA;R*=(i1xnQ4J8h^V!!bViOwO%^Vq)U( zNri>js_ND6G+%_pV8NSJLYp*DaTQQeNA|3_p&z{%riEk;(Z>+POtZ z6;vZr<;Lc)UKjr>k#wRzes#YnMX-%i9wf%A;da`6ZzCwd!mazxd7E;sG3&u&!@-$y zt;q~QW6NUlLs^3+ielqs3EM-mQu7Ug?S^s=(dr>VEuk9NdcA_$sO7V!|y6#Wdq- z8|`49$V+&}-tHy{G#|nrpO7BmZa{hA4proL9YC5hv0$?Jc>TyDV&*i2nBf=eEQTZQ z?&Elh6p!8qvs8Pj%4LZ=c7*P0OtW!Y21rvC%Eor$5kd{Lo*m?hsv^*arEhkmZWB5DLa#kQM|L02lTXp$IdLS*= zVT=c5zJDv>m(D@y_el$g|CW9~4CwdOVsyp+XOYqWzp}`yKp(goAD{F$p7ndIF-D%z zL*rNZS)+f4F_O=8K*fHfE_T?zjidKi0x}@>|C5CK*C@z`E^H9{BHB!>e<$|QQCLOh zCxL$(_^54y1;qY>Qhes$SPy9uRP6g@kNjJ31$=@{DvDiUcuOC~#-C*H@;!`a03{=mJd3 z)Z4r~pJ*{PX7SFg!H@k0EUi-ZtSeX2J^z}dc$HedNYxBZ^Ge47(E=N(ci)JgaK_j* zpDj-U=s4p$Xn>LoRN#S6f8#P?;yO7vvSc34l>mnD_~czE)-TkyT-l+7&M+s5(>H`u z1Aj3rz#bh21W_8@n+^kP1*x&WfqY2e44?v!xs9C1pH`tJWg?x;#QY$`11JweRBf^n z)D;9hf!$5`^&lWG3?<}{-_*l+XRhqMH;gD={ycd8rhX~R5v5jt4W@n24x6l1UJ-vf zkbL_cSZYY3CZIlR)IiOf`v)MD{v0*ww9KuOgR)HcqG^y~s}VJ5wE>_M;bLL-G)zQW zedBsYTscj>2W!mi#SKK1TRv07V>zfgUlmwbF3-7p&kYV4O!s3GEx`oPr+;Z~Eig8! zVPecBY07BM)9MF>sVJNBe}Hht#`@ClwWNQ*!_xjKAyC; zR-|^2UyFEy;-G-ItQaQ?5$yufi$j$^U{s>%F-p%Wg8FWKKTJKhOyh?3;!P!mw&iSaREIPR2tC@dmN;#JxoJs#u`mSTN0F)6;(S+9n6!&HsnFt5#% z@+?|Zjp7)|)u*m%Y z3`J`~EhH?vwrR^QaDqR52R-}%~879i}JMua)^^~&l+#=wFoLv&M?5AtdVc6u ze+60!17rqfAO7Ol-hQmGcSdnrJW!~XAx$WJ!k)Ah;AO@fI`FO+x$QSquFP*|ffZr~ zvEehu5!qPntWCxkR`I6EeXLLf8L^6jF1tLD%2BjzF+Kg1lB`m=QVfp!5Iv%DcE@YQ zyKL?2p!Y!krS)>l@4=mOuWip|slH_XS;uA03FkCNz@W5{^5gNU+{;R}M(5Iz3LN>u zMG{mn`x4D$S{-$g@gK(K=j4|8T@za{I+uOlEZ^&*vKHCkH))bb?^6DQ!sL#@-O`Z3 zH6#kxa6vVGd+_+`7Bvir7?QU?EYtN{%aN%xn=;bMK91AK;EfZmv)(UEv3djMz!=WW zfD#2i3Z4o-Fg^fhmh%tyE7a-zp$2_F=(S-Dpf99ssNmZvvS{ws7Jm)+O}?&2b7jt; zHy=p(<fgx&>VlJm`eR1NV#-^mGm{t4qP$97`XhhB&K4&cSLmj*40Z(6F!|PJakzMxM`;D^4UpZSrb((BAH>)! zo&8GUUAY_(-!V=f#A~Y>7=1AKIIQpBtZ}1iU_((D?aTul>*q4{m7aR<&uog5jVlaI z-qo;W^rAajXx`On?<}4sr=NwK8V+VXI)-t5#R$cM$4N_WIT=E=1~U)JkDCrD z7gUqC2Zh9Oyq6_m7hAR5ND*Y^8FLdwrB`Q~XSjmsS>!3$U8$O&n16W=Xrjfv>d7X8 z$A%4X(d6d)`DO44o}9n-c)~qFH0$Lws=A&0V`5UW2z)7FPFCb!1qh%nDi^bXV{# z!3#Zz3shd=(k0wa`?$|zh4qnCsKU(){7=q>bdB#c4Y?xFm7qxVW~(2ZcV9LCP;r-v zw5+h_qWz*}>WK19%g7?}CjsU1$0{{!4C?^V)e||0@jLZz)^hf{U1yD*Ola@@pZuZ%8INVfVCHAvjvgqw~=~(HB8j{;a&tlWhp1C@vFVQFi zf<*tjXxSZl9u+jm=MuOW$mPPXb41&EqXm(O)f>Yf-JQz$p_$w=zA`tpDkZWyQA$-_tL+P4u;HaSUjwa4`cfxsVriqLe^IBs z|LYGQ!YD75a0=V#pN~5NPeLec`m3sn9KQ2o+k(>E7sdP21Ypc|rvv3)fZCm&bY<#% zg-aVw+&j+iK)#c>L%|-ghG=^qY13SHP$JO3(wA{xU~mdlfRN?z-+;p5J@``QZTI8L zwPH%+hYE%aE4u{xj1{6;0F?D$N@-oF;Q{dv{JF@p59G31)-Nq4Dwt*qW~4IIOR?gD zUa0QB#I$x-CwVYS>7HGr^^^G#nd|=jr6BLPYm?JsOx}R5nU~s;Tg?~SFb{LiW4jgv zT_2=Ejp7Q-lO9Z%Gk<$Be?LGVzd-qQ2c^59-oQlDJ*0NuldSo%?Y!$Mj|dOB5rW&) z6eR~Qw}NHlBtcO>{95Pf<8*G>=lQ%yyGRyX4OL~+F+}ee==7NJC^&&HW3Q;qK4P%-39f@x~*hl}` zlf+<=Mps-axmZ#)@KKN ze)Ihj(mLKVaEcn6I)eO_Jdwva?sqT|9kW~8Pr+sxhxt&Jgzl}zS?x2k{@vIze?I@) z9nZ2$6?1ahjeF1_fJnx{!kT7R~Hh| zoPZCy5?RQ|f*RIr*_Uy`yBN4soOApK?$hmxY8^ZIL7cPE;^(V5)}2k*w#{>zk^rJb zyj&d2vN_D5l1;eGmw`}G{1AGSPzG8*Qpd)W1*4Bli2@w(3;8W$n7bGR?HPd+${J1s z7$i{`G!u5y-h#GinT6)&Irb zTSis+w(H)4h;+xKg(*mPHzE^|4(V?fP3EebzgCwzvDO%UAQSNrGsDlKaIxQjxnpN{*79TX?G+- zlr)@MH*K@$C!kf6P{$m0IE0VO9^$`CQWq~xVQS=V@_vJOjZEaXxp4f= z8<<$-Unoe_iq39wm8W9T<*)FaFVNbdb8mK#b$JV910$>Xo?SsUD&>16>H^W$hNjX; zrwSCJ2kVi2L5G{IXBAXme^JBuItWuG#|G_(pgqw-$6bGn_ix=?TG!xgy%0kRaf(9% z@1SDI9=_VeX8qQPn66EuWk8*#u3p3wmV-~`aip%^ zFmLPe$7~VqZ*!x#1?^5JQqv`wlBf!$7t{lzX2!RFtt^~rS;e~F8tq4SlwcdQt3YdE zYHe31+tGYFq;F;#EqtF#*!e2w*g!QM|K^)_&L>=AFJ&K`6p;Q&oFs4j7I7{u^@s6N z&CWcZFm!o%p}2?AdY;$T26V?gjGA7q@~$XbMh9VW+F<~0jn`Y_=eR@=`V1c7s)q^! z9yP+YC&2;XQ3ZAmZ)@-LW0B*)2=Pd-FPeB6xceb&Z~5nNr(z6g-7pUm%d)P8?68z$ z1`4|7!Gejbc(r*$=dbb~V22TqKXqM` z@3xx!@QyJWMyun!;O85Xr4g6rmg0!D-}Vs=2ylIv@5v)uHR*X99!SaqHE+|oAL>;4 zcfe{Tj@Gs{%aI%*^bwgC?7T+P$_F$+jLp@99I1yD`D4g1vzEdjQ)|mhF2_{~Oli2= zue#;T-m&T!>;5h-TQkU_#orH_ z@(pHH`w97|Y=L;X0i`zl+46(|EbPI%vdOXxE{|4zmSxP$$7RSmauNqCgR1(>!y?#K zlR-AAF%-97bLz~0jWjpyglLBFY@hIr+*AEIJq9ZkUn29tRn0WhzBPX5?!QWShJoVr zX3H%e-ITKc?FN-Z`MkmSO(}w|KVH{NRry+797Ts+$gTk*jt@5(L9CAZ=fGdX?Tt!t zz=85XD~)za(i;O(d5S_Ugcd}|x<%)p!$=u*6h`u>LIp%M#r#G?J}Sphqh?}c!x@yW zxbk-ezfI?RE#1@zAB`DWfujR;djaM~0{T8P9ya-z{0Mp@ep}=d(jDseU;M*r7be97 z^Mu~5JTwUrNo~Cg&25>cs@24sYYoiMV+h};85-~Gh|_++Ie4&F)m)Xvlz7>>C6KI{ z-yIYtv&$9Z@8`9+<{_KKrn3F;yrHuxT)2VK-Qx#cc@ItC%|L&dpGQ?jkk@1#vapTO zlug9J`aB_LnrmYSu}0@M_g+;vxa}mQ+K=;1jcq;QC|}|Psl{`#mA1p3E@}Ef(PGW& zh=N-2hx|V6j+xqjGite2%>#81{RqzAg{P&a? zG2|XN>B8iE&c{kX1>5IK<=D0-+jsk$m5(dmVGM8UYEr^VgO%HsGBUoEeZ3O=Y_}cG zDXI4>%<;QVk!upxE;Z^wl|W^Sy9d)X#YU2egug^3D4HGm{*Y#m8&~>Ke1eT@Wqc@B z!Fi2E-KD^&+>d#1HfORY&D|JxpL&ByILqHTGHQ6r@O@ts649wtR|rAT$N$VKb=en%}gR zBDvX(G()IcMlvv&Rk+bGn#<*@<-+GZAl{KE^d)k)&GLxD2_g=WO^h&tSR7MxR>tA*;klFoo)DyG2?4`$VZuI+oO?0 zJ=&2xNq!l~d{O*Px`*(N=tjJ}9XAL_*v?IPzxs=9)z9NbN*ByXQYvJPHLLcf=CWsC zvW=8YVZ;>&Wl)=WtcD%w?Q7!*K_MLUUV*ThGpoZXoW2=OYz|D0VIK;*hcND*P{(am zD(ADF#^s=dvMm2|&v4E)p~|)5BT-OOEU50JcJSu8k|}==CUEu>_Rsr`i#Tzr0@6K` z@2pX9v&%7N6sgmP@G(nzTWaDY_T_l;zNNo*3lCj;)&g3BqdY^j*G}$i1_8Up-iw#W z*xq~0q}kKPFFyCaWotBDw+gQ)^S5=Bj0xwI*_GB~0b`9<(vaH1Ss6PDvpKdjv*ibK zoOUUD^|=uFz}=(=f#O)pmCx%5F8Y@lBJPt83dhc-Yz9)(N&bAegneFWqa4Csa|&~a zgfFd2U1{Z9vgupT?#o|>jXH%3s%MR`8qAl z$&YzQKNJH(!#S;Z9hQI4!yLwa`hHlCA&udl`CC9dXWYUV-LRp#JI4^iDEUs?EdEx) zdSWWsTRzp?yc9&@8bFedSZWD^wv>JO_w$u!n*_h=Q^Yw6*l1y9yv^Z{c1^6E{OiMG zONRVV%pj$BD}4KGrRFS(IZOC4tq;XupE8l?TytF|i41xQNEV_g=}PE*jc+Wu?v1&_ zyb}?m|9qw>S43L3C@B{`@p>>rGI8etFZ`Ow&z-im;(Kos6O}X?tj5TYPF(xEcnT(R zbASIa4|};(%-2OlVrNHMas7+AOW}O4JZf9xzJ7bR?+1nZ8&`zS)Pn;Bn`bCZY26oX z+Ux!>*bZd;@+gbWZlJLW=e)$rCy3me;Nhi-5Np(VgAYts)kC)Bc|RD^DYwaMP+hH4 zd*q>Tv?V7maTzjsij-(J9G`?QrB>pt!7nJ#hz)*WgQez%vrVoaTW^cj z-)dUuR0%y3UB6JjejJkQnRqS0D#8PPk(f^Jx#K(VmnHA(SoPI^w5ls@LKv1}kq`!dF3tf~1qzmLpuY8l|! zjD1pD^Ev9Je1D+-=Y_Wd&3#u|a0s~&<;}R?9eLJxx}y8fgPI|BG^#p3k8{r@e$w|Q zp363y((jGrjLu`}kM`d31hYw}kn~CuwJ=LZIWcKAD;&q=%E1Twtr1!sY2FdhbM;ZL z>3lNmLoxj&s`r0j_}>;M88Qn~hX@`*cdww6mAo9iRygD<6yH`|5>hlpC=S5xoP(wW>k-`-0h{<9+HqQ+yo$S=*g zua7!;9;;W?S8Goe@g(8s^eg1?pzHkv%~r~ZqfqNV8pH9MSD4$R)&yX(re*3cZw3(2+CSMX`}feo+KwGY)mx(17RV@4Vti zIysRcxHEYn|IVJ)GKoY7l0a{U$!9Yk03kkfuHE7fBb^lDRy$7d1djjmsb_*JYVEsI zEVM;d*JW_Qz|I%2e$^-gG4-}`{wF@Y4QDu~IwFXl&@)?w=j=<$t9)uldBeQ@Qo9S& zz6!LTzKC)HFL5fW7q*0!R%t1!l)+UOopIY1YB}sRjaWX2!8>N;;iE#2e$Zj>!Z0%L zQaM^rrJp=Z`8MJcxBk=|rXQfww$gX$VD_ca*56>Enq|CU|4FN}C%4PMoRZ^0Oe&AV zkjfK_Ni2iEis2Q?D(RhT)|;x}iOjx03iQp9zKpG_<(xhP?@{ex$zT86MI@v1z8L5+ zz)htaU8(`E*WGs1*L6mM+MGaygIU02fBY3@tp&N#qOI|^g48|E91IIcR^b#O8vr?Y(*6=l06EGaaok@8$PB% z_BZFe4H{3?L6S})^Wc;6wEasK%=DL6eblTITNgW(?B&czINjg4GIKS zvK?NuI?&nkGeer4Bjas2By#yCdg85nNHqGY4{V3J~X=ai--C zSWys3Xnreeqg6EfLcn(wvFC0Dj3-M6fnJyge82xp5x-IcFDVcK(LPpZtciy{2yLw;vFAACkl3}mlL4QMPx|$h0r?oblyIGK$U}bON3kgWR?TKOAIppTg z0nRjw6%Mc8sU-OW=^j`XN8To%uaXC$+{cjYlO2$FOitP82r5h(gzm@MBzHdfVb4yo z2*}v^EjA^zL|>0L%qGcEsgRg%Qm&dapG!>&)||oLq2btjwJ07Z&2Huc3;d_!ItAH7 zg{AYG_rrsvY4-`$9|)3Ze=_>qbsBV+fb~L0FRTw6A#!|?NnBFeM%rHv+sCK(Tbd9- zO$D<;{s;B0O706$8;|UWe>B|`4NkYMsH`ptDTw{rq160cFpz&E+q%(;_#FP)oRz&? zo+;^MQGz=$u%39wexk4Vd!K{tzSwBtIPPL2n6Q)Nt;MY-!5r}=G4)zYW?fc^nt#0y zcrl3XnQUK+(uI-@U8r|ls)Le}*HfKaO@dxLLm)8CV?^d`hwwhX;E%tO4=^8$|E9?>0ITmr|jy@dX$mGCCZ!e{v4<>+L{ z`9`5i)KvE%4MvWLSsfrLx)rUNP6OIbFJ8wF1pFyCA31KYWq17=LovV3@b}zs_OSJM zi3xxLE zb55>=tj&S%Y;)AB`rk#*^k#`OhEG1Pt7=i|H^@g`OaT)Lu_O)K^p!a+ zzWa0udW>0{qfRM221UjtEn1HhECw(t{wkT1-Xz+HV^_4_UMl>3-}c~(6whaC>r>#g zB0R77#N4ggZ;7Mqpl~cpLYBgp1j)=|ieyAG?_Dx}zUz~!(clzEGuS91KLcJrQ`V`B zK9ZuOh^Yqd+$wl9@dI;-AL5X zF8omP?p*M$c$sPvoGd_qJz(vCHaiFSC2V``|A)2k4rBindQMNAKkoUa&>GdNtk`d=b zB0L+t038~kikVYR27P})`Sg%bF!IiSntcq@V)y_1Nc!lV!j?Pm9L*xl5|Q?kr=a|4 zmSQMv#5q*_KGRlEZsKo+y;M0b4yqdsj?c20sFQ-E2*a~vm)1WshSCMl%U_OCX>pN* z$Bvj$+dAAEA5y$G{CNZ1%om-uzzr6{(r8f7fRb*>G)fUdpt&*B0whxzeL8k`djblI&57x zIvMvx6x}>Wb+bcb<)#04r7a`3wiR`LK7C?A0*;3XZHPagAMxcx=?I~DP>U)TXC=8s zBHDurlUzyHWN%`&yY zA)#$M7>=4&#j^jjP0_J7ruxbI@C)9`If9*aj8%#TTiOJpkzKkiKUo5by5A4bwYug@ z+y%amuKRy(-};YyD0pRToAQjG>{E34cxfkVyvC<~`~h}GxO>I4m{q)nZ1 z+fb2#Ghuj>M_{@S*&Tdqy!Z~%@tVdhy}(rkmnBiwr&E)us7ljMRLS8|5`U)lh6zA?8jZXkl9X3Z-jR>>XYisfq3#09Rovx1Zq5an&I1?{5AY5a@>vF)4 z@!Db-#BBn=hRb?H{K>FX#e@Mf$^L(ZPArC{$fUE9{?b%71g&;H>4 zxnr8>9K_Tman@{t8{8w=$ zT9<)uP%@WMJ*%->FL@nG6sluT*y6((UV*ZTofq56N&>dVGDF<$*T=sA!zzDTq9|2% zzKBY&-)CjqHUxj8@KJ|X`^(j?wN4GNkFK?sT&HSC$Xf-lT_CS@GdM!n;!<#yv?@|Y z20fBp76ajWbvhbSLd_@!9OXXUyMx-P&STOzYhHs&-55LE_*QzKFrovsyIsp{1GVwm zN+9*7@FGLstNdhkn$|DLVr(5FWS6}M6|VOERvsmcWf=fyxiR{yx&Bk)ims9e9cQnR z6N>sKZnJEYTb6`=bEfydFB1kHhWzTnS4S4E|MPFNN_# z{3(?E^wCmtUO_X;&FeH=q=nJ>snOtBm+bi*RWFtMFiPDa`^Au|Vnr!(VIJ$+ueR~;ukpB88gdUkUUAcL55O~(y z+nRj;A7mR?Ar*rSE5&$KpO)ezLokXPS6Gsd90T!D0t>NhC_*QT4%9;CQ4;JMVAjX6 z%U1PfDswqm3i!%vEa}nf8=4kC=0`zkC@I_j_-mNfyVI>9v(ws*I#;kM|K|(|!2W7m zl%repo|67=AF%B=+k5!fC$Nvo-UgtnR$7=ucUB~AFKAZI`4(x@lnPn{jFG@q1 z-HZt1jrs(tJ5@oC_-CoT{I6SLLjbo2Cg|OrUmO##ODKw z(uo1PI=N-|`@w|%7N-EI7x&wiXE5?}nZj*h+zTaDcd)^5pl+;+Cha&3?fmeL7StTV z_(fVy&)4I|2*kJZ4qske-Q)3=d)1*&5oPl?-61$D&hBLUkrHgg%fuB9f^!L%GSEA+>sp(YQgg(mK|MXAw8!4)Hpd==- z=(|^$wB>2_3d^F&cgT1-5sce6Phq=&mevsWNnnr$yLqz0C>^wxZ3Uh|6nXRuQ0ex& zBy_GyKfqY@JIyByZ~#t-0lmbEv%9s6mP?kzQl=LrywG&W{^+}mqZ<%(QGCp`Uo zk^8r_*<{P_K@iC;-GR3vvGx+{_iV4_&k(i^hO6{3f0iq7-ZY`@a!>b=pO81w7D-FU z=(}Nzpj~t2I?X-F+9E47Xo}F%XS2lk0Sd!8;!?-!^_g0}Mpx+*6!5twFgNFFH3ooJ zihHQ)pELzeTUJ!yMlEIgdGDqr+_%1)iaBEPTh}rW#d5@$+TK}kRLzS+E&{%}4s`(V z{)wtB!CP7;pK{oO`A#9<m4H6Dvw8&Fa$aDk0uT06$}$3hQz)+gyNpv-~~ zwU!@v5M;pDl){SEL5E-R{ZSThB^%#)LKXYA6W0fCQ>4&6UiZ<=O5x254A6Srk?#6) z50!}nPY{FmJ?51ipGa1Hgx1CdlPKVoofnC;cxm_h1swf*+$Vu>TGCkS3Snyso$9dV z@i_ktX!OvLgZz;M*IRzv`#cy$cmg!%u3?`?N@dg-yG!a`lJzYvpEf@3RD*bDTMvj> zHin|28H)JuLa^P7R4y6Eg>I@^PZ!`5LRe2l5eBC_ack3dwH*VFo(3{C`m6z@9d}*< zgMZ4-Kmg>9@Y?tsg;yE)QFgahzLO2Kz$p8>>_%%_f4{Je$WCA!%Uz@CB2W}P?O+@a zA8=CpN|RHxcT2WPs#UL*I6UYorgl6TO^Rz59Pr-^ed@zwNHs$emdH#f6kXndl%M}3 zI=%~qs-qU+#CdA(}cHIMFCbiyaK&^7jl!acly|AGl{ac*jxh) z2hIk)u0ai^u!%)@#CsOWjXv|wqZGDXh%7+#dpr!f3-x?9XjtcR2ymJQ_Bvm5*?6~2 zGSO7%b}%(P-(jhWp(We1(jO^V??)tO`ThP(m9vI5<@eHD^&i0k{m(N{!o(RXm!qsL z)*2%7@OI$>o0l3zq%T~Z@Id2{9Y_#LpUPL0f=A(PLg|}Y_DVv}3phnB=}=9_!7 zxyG`RXA`9H~XBuNhY)0dbt;&M|12 zOrj*pH4CPobg5Nr4c1Y$Mcu@iuMdvjo4iuix~1S*yUjOQ0^v2$Q257V+|$|_9H=+Z z!VkV`YrGKQD7C^`9NpHNy^KJ!gg(|R1F4XF(lFMpOSj?IjOmi-6(PO8PPxH28 zv7t{O%(4Clek+;Z=xue!PaE3Fh zspUuUqgh7Dll6XgrLUmvPy6%RtG)SG{VP1n3}QrzthP?b$JnoW#xBoHNKX#e#8~lf zPMXqo8n;zZo7UdeTL!nE^pgD*A>BUkAn&bO;i$98pIL6vo(R#nIWnPq?J)ZZw2-?v zRt6nZoez4DLcpL|+Ncey)!O=~KmOf&fi5h<8D>648NMaDLMkQm4v&384zQt`-`;Mlo+w^41BSLbu3u0Yp;X(1LUzL} zdo&aV#Ezw6F6XW?`H8G4>7Da6>GN%BwYam2X*(lFBh!-6KC7Adw=uPOUoF&YQScyy zeV6vkM2`BDqg|byaA*g|&RP!wYE%;Y(^AI~0|noUh6%z-Ga&)*G-EISylr_KUmMNu zY!vz`@>PD>_tx{ZXk3k#%h-f&G@acCs3uD<7XGO`l(L^loK-k6sP^mb=3j~Ozou9o zYBFTos==uAr!lQ4qZZIhYWnpfgpfP1mSO;GgzSs!vmc=Q+Y2|#&ebWvH?cd=v)s@leq!~ruPtl)$lLWuUz~q`*kW%o9iRu8GYMswH zcB1S}F;yQ&QOz^{k{7Qv5&7Jrum6&KjyS`6bSTb%_z|FLO`xE=v<+t#@@~PEJlycQ|TD=+a(<2ToGnwfiD<+ zw2+%%K-`2u6}La=Qd;NcY8N){c8yUkIW)|N2UU31-X&RXev&bp40T+s zto5z`983HRPIR_u3*zo2hCblMw4mw0GU{Tzh7kduZ!h;KdD3@JUQN z%X~}DzrK`NlUfgS`6_L8szA%?%M;bMo>&AD&w*W8RIGTmucTTxL9@B&g+T6wD(wDv zj|e}NSUm_XBE!rB2d~n!vT^Z5GNiW<4=B_3RsvsLwB&n&uNY3!`pLG^Q+r!1MCH}xzk=KMWn6XDVLZsyo%NC!MOwSu>IV{_CF?YplLyorf zH5I)V0@FJth2k_N&$6yG4 zG%v{{O@1Zk10Kz|k9zM@GEFpmRDA_%g?=iG39XYP7ke*RRT1Y&V+%Bvk5rJgxYHS(2h;b~`G9 zP^9&aOrc-TxWWp9T!ieUZ~t3RBJzfNMqnW|^2${A{yO5+XrL!sPSaooI9vvc{xPOW z49x!Hc{PJl4Ag%cAsDQDr|!fGtqlZamiH%qQx5h9B9E=c9b5~0AjnP6R&WxxAv$y) zoI)ckre8v(*c90ekK0CR4HC@ETHpHe2&a(u``a@K+VmpUEImli9<+5&g4`}jUsyuh z4kD9cbx~$5SOmYTpkA;1Xm$Xjo zje8lzRVMB5y|pvArI)orGtGArj}^{uXGG1jp_BRLZGso7@9OqNZTYZ}e5YbU`J~Ik zrb6M#G9!;_#!<~b@&a)7*nSZBTq~Cz;?aF}vYeK$&15_97|mf%B+Mzv0^VbwlxZ6y zG?TWeyu{3XYj%tYt!_C_fU#9;zq$GFTVQP%ulyMli>l_i4u)_5C0@_zQjIVk)S%Vk zj-VpiZuImCRm>A9K}RRNPW=A+#vG&Vnfp4oQ=gBbAfxlt4W5M<*B&s{%}7hLNy1?6 z9ewh&2Q^Z&d&_3H4Q#zh3l6kw+S-~@%0$8H!N436%-%X1PC4+ z>IxM^AG%D_pOJ)rphLSHA0F(Yu4H2zVN=uxIhBU*J=6{igLfjZ;O}Igr3)iicWSt> z46=!w%<<$=%7aNg5ee4B8t~>tit6@}_>j4=zSMlE1WM5qgy^hfF3IFkZ*+u@PvI5( z^0J}@KY>7&GscXsm-+1KV=MX`P3_}*B4>;cG=oL-qrGDfKAVXnzAr4}VfA5> z#vKKbzO!V@j4!diBZD{7G#6j`&ASIwld(ggJ8>vt{63|75B{tJp7oF7!1~e`pJBG1 zO3#t)<;iU`IS=*fX|&&J&1X#;oDAEAcd$*u?)1{K26)eKf2g!kPq}S5%_O$i-9=SO zdX-)e4y7HozNqP?=pClM^`AwaS~{9z^eKS3fkrhJTO~+{@&#HW%;b|5(c;4 zdQDa4Fbl}pUcNW~&-8=Vg%(B*A(y+G$!yv;;)bR`zk-49W>d)wlc>S`76JnHUeO0K z{C&wFkCnZHa_c>Zs^a-5HH|)F0CfdnRd;JAjL@U>jg`PBdV@Mk)ZR5`ShG&`vCLov%Cx;gmrtT(gJ8+Dtz`b#W3R;ieVN8#G$O4e~ zi$R**t}TnuAI-Tlp0)nzLlV4~(t;R%v8vf#ziRr!AwRF%fLjLt43*q~yegxk<1Zcr zUcYEq4}1Z#z^cd>#i$mO*4rz8+m8%}Ylbkst)qm#Pe(rj4S06=E+S8kD&(w@r+P#p z*0&GQ2Unotw!$A{?@N$OiDHw&A#SEe-_S(7!vBXOmWB8sFxXCAaZ|YV`RR7iM3DO&QcTi2epBRv+Wbh++%jpJjNtZPcf(U_&Z!e z6&=y6+DwiCDe4%|wRIiICqFYUW&rC z2QH+Hm5kv}i6Z&(c220d>MVoC{*;of4e?q^8JhwcCRXNUH4g~8BH+M`Wfc6#_c(9* z6jNf9PyAk(WMA8~uAqqsp1huDV%U~y^tq#?=J{$7Esj%he%4`^pc?C2a0GzUY`jah z_$Uqn3vr9QA&u?*G_|73EUtMEdkvhj`52^6Sd(&jaa0R{pX#$~NXl2;T)Un%BiFpH z^A2i7z9XLwnilYDy08=v44UAlorT!INP3f zVfHl^DO^{Dr?uWPr_R=w8*boVUbBWN_&3(-)^&~9vuqbPwuflGKP`rhCxnHYi0+K{oM%BCA5C z_oKe9$(jk`yc`6l;(1$15_Y3L(FMfLxUCOLk}I%)d!+B>p>9GmETC6$inr4)`;<$ z&ZCDc6%9j5oDZ}3rC+zk_F|u(KD=SvRb4s#5NJ0j2kU3D%@UHOC#ek`xCL0%3 zWIZg2)IROAIvo(jH$3%EK2Y@u&*vs<&mpC^ zt?=_i$xYwq-!TNvOVRh}QRLFAC*^pC%mr5Wp<_U2uJ>!!XW&x81*y_R#V&ywU8taq8<$U=zYAhR@Aztv)7$$+pRZcqRZ<*i>*`R3JeT5T`9ESdz&nU;b(8CdRra4=0P1ZT zh(P|Z0;wQm4~G{B%;qHUjQJ08Rmg^_`3sj%?jL!g_9{#76;#KhtHT;fs4lu1T98Fp zO@kECkw4nki;1NX9Oj;_LvV(3et4R%K=;(=UffGAPS?DD7OVh!=@+a%yz$diE!R>g zlw9#LOyf{TcFnod>v4uMOYWV`_vyJ6-U>1OSbUP-DT)^)E$lSGq)OvjSo;OxbxwOj5$r~Unk9%Cr0VFv4rhkf zGYqIczHHmwFX9IYy|{U6_|bW)1X~AZ#GjsIkTJiF%RB{uRk&iuSnr_V9glM@52$yB z^_|16$YG$u2aml3-bE_eC45d3x2dp?;^BgEoOD9*l2Ggh3w?-#c-zE||Fvy+2K zQ?f9$R-Jh4&@Bq2n$kP(9f!BioTfj=+GLC3n3?8N-h3t@6AOU&8_+;3b*2eDI++@O z)|x58FO+V?>rc?`*nX&H-BO#tcbmCO{hs9lqyFb^ z0zu3AZYv;h%uqc1S*2GK7*R@ zNJnbvi3KA*`56X4+Ey^z_$R=wgsCxu}c$u0tG>I{Ej7pJ??jXp(2{V zT3BC+IHBJ`4aKqq1D+wTmB(UN7%r}f8e&e)aJXeVGFLls1+men6+(;Ko7~CD&zlwM zlzM&%%0HXkv77jBq#F-tgn{>kY51f~;)k+2b4m@kzu;~>`tSG^v2kcX zI{|4s^F9`+i}+vhJu?&F&nhh@UN{qWy3>9mQdLXMR7mN>(UaV!y42(Q8Y>o~Uh7&F zowk@RI4Ambe(q$B`vA%^XEck*Y5iqP%r9$(LW+I;uND~Bg0ZEpUmx4}%E1PhDteni z>!0E(;DZ-&{O!9A2)1sDqcuF0Ug`mu3-l;W8N&nZd=FhlzLXRp8Qb>7OI|eoPG7wM z>x5=1e=i!oEaJRtk@56pY~kEvOs&H(9>fa36{eIs#FssYhWC;Q7^VpNdJw@+E91H{ z9(hpz2A*PF=k)-0qUZ!?l8bOIGeQWYCC}l3nXpj@O|525;WgsATZ!bgdW#6cXVu46 z2I>sKUh5?UKHFNiCcl8S0jKDDCJnK@=RPdpGK*)Q3P@?u@Kh ziDt4p4)NUTmRuIAAxaF8! zXU25m2Zc3reqjuMGloBp-vr< zd*@#2>D$t+I@h=^tt^4wrlufnp^98nM4n`a*WEd9@N$F}NLnR-d^h+U%GXxe(9k7Z z*c<7Z+D1y^9(yt>x*93*yVLv6s8?IkQb{2T^kVxH)3aES-g}qtAO6UeyqMwTuLfD4 z?p6I_DX)u1Aa~xh>&nd{b53OGh=PepOE{)rv~*Hc%&0T;Y;g}Vz>ERXh^-S|(Rdl( z_+?(MrpH;?64F|kuqhRf7LwxFlm0JpjSMVtRV&0Qlh90m0OKFs0skCngkD%vNn`Dob9T#Bp#yV)t(7t0{yl7Xij!2jj~>9FV0w zd>u&T%O^j?+tdcJbkr?3I_GSm8E^9xZP^whN~ zpP*K&`x@4A48O)R-$jQvLddzsRNPMAv|L`V2;HPDM+!Ywo9OfU2#_S^LM1UpY895a zJGOMtmlaus@C6VAQU`kMpLiE^Ty-c`KUk4am-}bNJH2emllq!GRgcTY;jAmRb)#!| zom&40NPn4Zs?QP22(!Wp7MHo=DtRjyL2P+Nc-MK)f)N`KVPj9c=wz{m~=)-;$6BNOT8--24e&?nsc-^dXlROb()i)UR3 z6BQ6JH(-}=YVPV`M@lA&(1n)IQ7mXf;h3-aHNo zE0&6RC9cmW#}>6^_-pwBWMeHE(%4nEo<>=wx#KC~BI0qu_J|Sr8@Ico$#w?>1%Q@; zrJuFtIpSm&<<8;;L^)KQX=_82V1Wv?axj5J>#egrENDua^m;6H*u{Ns?k_E-JU#yut zTb58youbpv+IrXRnUhg%AZg*na1aS@h;Xym^S;qN11YpN=9vU!_|3E_a%3l|x9w}y zPPQ)srA5aQWSfE!6G|1*{Lhum5-6<{7$#xhb~=04>9-0yOjTF_s{0Wb&wGa99qd`2mz?{ zO3pxHEY%TP4UH(4;Kwxp&`i92FKY1!CCSKo<`E${Dcj!b6CV+@^%GX1{jEKe&_Q{I zOHG25w+VN2sJL}yA`63< zmLBsMH@B;ywnsE&V_1Bdx05+yYKw1x;bgDF1Ni40u7e^6j8(*N1`d3~O-pn*41YP71TS zXxO#Nf30?4kS5N8Uie_VUX@0{1PMRI7~dRkR1$mi=^J^*GI=)kr7{nYNrmj%-MDA| zitCNHB`!<|FCvs>b+K*X=U)J#hI;0XNNyaTnP+m0pOBL(pYY3$0=(sT|J=b}OVKl8 z+Z{>4feV?ZEeH8&CGr5UwNxc~-TOVDR@;X(F{^6_*;D9N4P;fddIWqZ#!O6lcNqly zt58YztUuaLaA8l5G3f(UuSidjDn*@nJ|Qo?=`TY31-DPMkJ1ppdC&*sJd@FnY#b%m zIL3PwNOqQ}c@Ocl`>z{E5Wnvu%zcYX#r2dXe#by9eO&Q>u=kfyQMPaVF0Oz`N=i3U z0+NE#jnaseNVkB1N(loZCEeYf(j^St1JaFvfOHHD-T!Nz@7})`|2O(-zu0T9#ahpL zT(f3wt~u|UE6(F{e0~aXnoSES$sKGxlNPQny|SY?DKE>uGWo;ehLbB8R3_;g>Bu^w z25V1qQ6-1p$JCw@U2t2$bioXsyQSm>|e-?kW^4drm86aQu#7;NN|em zA@#qgE69XF9d)On{fo)qn?wk>3ZBQ{!TtANklPv5QK%>;@jp~PzBd`T3X|8L{zW`u zv;*ph!kzG66bW=FJm4z23O4>l1JOE-M7T~Ksr^IcOQ4y6s|Zpb`6r`E1rey`otGK^ zQ2AdlI>9~lFgwfi??X(7VhXGdlhe@>e}A}u`{Ek`cr44w_Wb?6mO$%P0VPzy;{7id z@&8xIN}}FK8NduOrXC4eYTQ2g?A?ZOwWZwxLnd_Z5QMwRfD^4I+wx>5CwsI8PoG=R z|6bX+c)i!uW$B2uk$!t+=^XC<%hd7nYM|Rx>CXB4|KJg#LzSQxf+&ys8xE3>8XI%q z0|QFkf-U2(6qPS*p2Qt&Cy9X;WiP&)nC7ua$m?4bc_Fqq2JS_SE3mQ|^aRl?7TN}G z=FVfvg699l-~)i-)90T^WON2eD(u{xws_To^L=M5o6+}s^AX3OrI_)uTd+A&9}p)q zAiF>58oXwR2KWUmL8!fFEiyxz(zd)Uhlfnp1G$yGgFFR`DMGU46l|9NoyA?zgRM=- z2|!(~%nl}emeJJIpx`sviT|*Yzv!2w$d{*ud~ybTQ)c`jAVz-97VMmdh+74PBj&vj z(diruM&Kl+FYUO*a-ta*w+HpXvy6=aBoQ%WaJ{8HwUCW1(@I2#2ih#%X8KstQ?8MX zB1p8DI3EzT9XmYXZUAf-1x=Yxn)SWXPXeTW0~=)D1HcyS0BS|K7IZ)lEvw9&1H6FO z{nrXjyy#ITUaC`R_%!q=my2_j&ev(HFrQ>-lku&ey6SIaytOk(UNh%Ud+Gj%P1Q}| z1SIn{7aRh7-w!6y488-)MSw_DfnZzKOF*hbeFstJGf1eOAtWC9hv{WUbxp-%)FIVU z&~rG0T++DA^bU=Aih&b`U>BrbGbrr-uI22{7~^orj;YNG8eA153eF&N@MFA?bbHXr zDU#oCoANI78|k=*lnP+`JX3fPvRvk|${wBRZ0-f}pe?~C=2s#AWd^L3wM8tE)&}sA z#kK%r=2c+s3qR?kbIY>RwXi=*g?q4EA^{Y2nkytjgtRCwyXSFa=!a&9c?r);Q6Fw=^ zUN9nC!Ycu0bts!U$fpG~!hqZt=tuI{JD~aucPfM?qwbimGR41*()T=H^?N`l`t|tI z$Y6cA;G&MnbzVRV#b;dGG_-)mK=IBsB6;y@4QD>EFPrALAWPM#WzKdJPRI<|N5WEe zUlXR?KR-1yexCCoZzMGe2pt67cWn%lg+$B3X+I9|_+_QVo86KRxytQcCh=M6AJ#PTWCS3-5k4i%_OKeSLZR`Pz?Q({ej1$ZPu z_n$Tf8r;*MjTgIP7kW2~lw9a;%z7DE)+pXH+VVy=^E)HmvdFnHz&zlAcT&QE`)hVs z%fM|lt7FjU>ux!U%>De-*DT80ISmdMBDWwLvVyzRxL&bOZ}DQ%7HAw^{ztBWAqM1%Hgx&n z40sB7vb99}?g@Ek6ZnrnPG<5&UI-6SeS-Piwv5G!+DSv7*n>_o?u}(Zx44t#5B;2^ zqGdiEEZkOU8?WMc8Z43ENk~DVV8^s~_O}Z^;I5^nHNSWTZb3;*a-xf?4a~H&PL^(! zZ5?bBa64K$4@=(vd6D-@sIYWXn${JdRqRv>ChANOb=Ktqx=pz|_vvOskboU8-30_B zcf6erNASw2KcM#C5Hz8cD^UwWY+H4`UGcwY5?ytD6JSWpo(0`VvA#`P+f z7aLTmgvkGtm8RY`Qk@YBXpu=@i}MD5Kj8{1a$5K*ADsA9i}3Gqpy;B_$?T!qhh z>E|FTUEdid2yT<300$B2cElAxoX4>;WY$bk=_)R?1b_tFgCf_~3ARGePVN$yhT@9I z{uTF4f(Rw#6OsWjAXE8~roy0LhoA}-h0c-ZnmY}10QoC?S6ilezdMnOheuR%K`5wVQzTnt4v3v62W7#=_;d1rZJG;a$Hk^>~8(( zf_uJ7_|+Nmu5Tyro*g1PC+oWct?wCD;-=qfDuW|oYvTStym%omzVyr%Ng9DJz;bd;{MnD({R{A4LS*^&gA8*PT#ca9*1&!r1 zv+0n#8>qwa+*S7(&!9a=jAyp!KL0ScTG4j)2XBgu`K%Kj&M zA@vq9BvQ%o0t9+5-BC7rfXd~<8F`}zkAyrEeC3~1;fiF}V1o_~1`{p2Guwj)_+-`1 z6sjD@K#4!WF24PADx};tpoF(7lgX~0!!$;Fer9E|> zl%b{vmZ^Fvo_}C|Q5vNQ1s=!04y&L7w5wWf-&MlGT9J+KdBLma9-?ux7=IK36In!- zFst@{#=6yY*n7HgS1K>YaEDX2H%Tam7eC>JsKVM=Qu>NV1NAcVP3z1AJi+jPigum*imn!!G_ z9`?LyaD8k)=Gav1dR=MnjBnec^lg&c8eh8Z!tMX;3!XZV6!w6%l;l6Cz(o+^E@twM z`AQ2>_|4MD1TL>j0{cH21eK1_*+t*Tp)~GosOK+PI+rZAL;8{e)e!xn!3~(0pjNDP ze7FUao%fu^jb5-be*I8oujKH8#{*O+A#M@E&_R%X??cc_{aA6qLmNB_^v;D{a1w}grjGhTEzHlV1KZ)J z$So|lw3D(NQSVp$F5VtWJ`u8$sj*B3b zdw0hOG%SwU&PQcrLDRiS^GkdG5)W7u3SsJQ3=~kKPuQcoY8bCU{`_~dR?pak<*)u~ zR%F$eYG`W<%zOae;>v>QH9o1yjUH(F;o^ZHFLa-yS{GG=y#-yXeCOljp+3Lg?DaJf z$P=p4^2A7-_j*U!4@CGO3RRi`2N!Q`pE@oit+nUGBb}Yl{YK2Ad5-|-COWLvF1E|K zM&qs}bGa7sT+Q^DSZP9EHjBw1Mr%blBq(b3AEl^cERrs=xQQL(_AIx#D2sf=yPn?6 zq-JbB-^fe+W#ecyv?2W4<06(9uBPQrO@1pfY6Lt-oMrYVUzj=1ZVn6OJk(aj6xc`O zKhfkU*+dJ@#-*E^$wOB{UqkH`Jd$3ox{Z?LlWl!XsNj9*IpN4oRFp>9*aBP(Jg^4A zlLru*%Vp9Q9Z&6Zey+X@>x?m-A<Re0b0uaI=bY@`zx3Y&R%&H9PPEb3|PwkE{w z#y4&J;YFU2Y}J2$X{38PX-~VLXk7f-5Z=)lh?sZ^L5%2_pH~MdN$mK-P({{hl5=m!qvJ*jVNag9Hb>x^#b30vp5q^#9Z@bSu zZdRl6HAg|*ie2WX)Q{4hzh${Tt2>H@atQTTT$0}-(dd4YQY<&ALxp&%z9y39Zz@*Z z?BUWF_iPorVd%%ggNzbHHfMc!J@>o&e(tQu>3QFyhwq#5t=vLsH@gF@4nVy4a&Jf= zl_09(K!j|{7DYBI9f!vpI6VM=5GGSj*U~(MauSQ|S<fzWqb(nX$%lgIh>eOr0V;wRviZFj!8A{=&|=U;-&1ug$W zcFC+ht~*O+S^MYm_49Pqd@bLXGgQ$Y*U7r8!b=C=BUwz$Bm72y>ETqssZ7*mv~KMf z3$CN9NqXynm21-Z5GJ}+l<-G`Gv*xao6u^A%^ur8OC#ga`X9uGbVsXz8gGEz}{m4Sok9*XXTCE;CLlXj~`rX z54*;tNi1v?6Ec^GSF7}sc?sR%lOD!r2q&vQvrV49U#NpYivT0minaooq|wr_Tg9~w z6&u-(hpi%`um!(qXIyAV$b3MX3G&FR-Cv*8&NaZ;w}mI+&0CW+wYTYIW!eT+VA|v> zIZ(Z{G)B0R-bG)F8+%V5fggD~_Np`O{-wib9}nC_avZvf?On1(+iA73;*@WAi^9gf z-A@#eN%FmE9kmLX3JK8+3#?tj{12b;20iBR`u^58mJJdb4ALnhYn;V?El(3DBtu7P zuW-^u{{vW>>0Lm8z);XTqR9pNw)Zz+CHHF0TDUuo;M2K~q8t{9q|g<8i7 zkBPj!tlQ}%JijDCR&(A3h&arw}BHN9Y z(5zo8m>%_26~QW*PcIR4Pu~>-H;09V1>sn{0EW5HwdhMhl%oTvWT3P6L_E*Dbaa01 zXuwIXU+UH&p_o&>K8y4xqU8e{2M@lmXX8Dc+xP|YO$9f{{MF*PX#VPk${p@QQ_n8E z*|p-%zr!`M*)g3oZr+^y5KC!e&;7rc6terC`JW~r`70oCUI(wK?snP0I$Nhg#?r1r ztyc%&G9A98I&V;exXIeQm#N|E;un5lf>WP)%#%cu-G7A3QOi+i$T+5{vl;B93HWnp zn6J}mO^Pynw1W`+e6gf z=1m_lR;tR#D({b>yUU_^&&zaBkr90Y!0cRefGh*_wc;aSFS;O zFzZ_nJ2BU>{i@C5guDG|iXzbJ)!mL4yOgw8iwXH((xZY-k4EF<6XKv;Lv8zQw2auH z_YT@4%to2;+=jTCC5d;Se2h3(LSL7JjCyFjJjiJuQz58eQyvxPx*AJ<)Y-3B&EV~m z%}7zqGkli3z2)okWOQS+%r4pvS#8Yz7tPOMG_DCB@8F^>Mmz%&xFELWjf6_E9P#y7 znBAd&E-ief`f3N3S$2uUUJB3+nP1W9T(m#TMNMHNEoj-HS`^d}{```>@=4RtC_*eG zJ%@df)jUJ{0gYoPfmnSOPVG7_MLf45?yJ+mpibW#qYHgg>C~4^L=-*GTa4PielR=C zD3hXaGxf6awID_vqT`AXx02CInSMo6C1+57GMRBe)_q*v_N}ff{lm|^!5o_M6V!EG zJ{IDSVGJRUE7a?KiHd`Z_w-YSB3+C~C?h9go6;Gc@T+}ZYYqt^-@C4vr4{oo5>Y=r zYN#yP@|-1jh8;zFo;?_Kq-ykbvp-o!U+zLHbaw0ZJ83w6BSmG=sO3nO>}nl3g|djg zIN`UG41goRUQvz=o=VXU;9PFBUVR$hcsE-NAiH>75q^%o#=oxIvavDE zvfo^M_O{09p)69{@s(nk;C-AphJ9!;`yA(C=5IH%^^WdH?L5C=Z=wfSdX0idwiPQr z;0v*$jN!@LFJ*rq+ts;+=C#S~u_DLQ;(27wLS|dOe2MKb0AZq@gI;%RyMRHO@t-l+Irar~JA~lD zmEi=hA1m)Wy#8j|(i{v)2IdoIwFCctKWx&$AM?UkIuJ^+_{AJQIN1QEmC}7Ob0SL2 zy~QPfaHeV6>%88*%-ky@Y$^CbzE)g1_2ZUNE%(s|Y{835Q{%(uvY+LK8J{RbkrhOm z+)z2N7`_zbOml5rR*>;FLuCJf9SDYTVdO44!Zi&fT$sJIh1?22wOZx~HcWbp6w{I0 zj^^n(?|eCV>YTB{@}f@AZa|REU^!ZVJ-gp!h@2%!nA+=Gf0-k93d~Q1WqAjXb(@s$ zD&M|G%09QeKukwTdq$wyNprF38|y({=*^deYZzWi^O$8qvDpbcO?xGlo+Kt#91H)J z%7z7Nmj{IE9E#BM<#$5kq`T7C#?I7CQfY~U;@)Qx4u#>p>uoQd-jjXa8!#P^1J!6G zqlX?at{vMRJiyg;s0s^6_Z%oarI7?PZ~f5-Ka?q*E@CI&bvNFgT1>{LI|~Wz@!LlB zevjrPA9XdpBoiB?Pe-QZm_s`Vp6a`vRRi)QxQdp1b82-nD&XpkxaM}BE>+U|CYGjK zXN}lsyt+Von>YRDazAyNRhv-FcfnR5(KYs1L21a*Ta8OL)qVlD4liLyTzX;sDFg~~ z8)sV#XAX`Od1z{$zM*1v+fKFL zaqn~p(F&Y)IgXF&2KO-kla7Y4 zhT(PH6)QbxprZgonXkz%-a>@}bV=}2R$+s^qqz2e^#J1<{h`3!{KlZ*U4XV<=ErM- z0vM5|s`ab5^GnFQ>E=+J7#1*iEa8)BUR+GC{Ia-q89f`|8#>CbStRnV1$OAomCH+> z`wF*MlTT#I7_C*md@`PO9@cCCgd+C5hy%w82yt2ntiAP19T8&&nX~oKCD@s=iYJ+xK8VT6>!Z+J`c{p&d#+;D zyPR^0j@9(8<15oshW?)5O(Ik4ejaymc-LxvXAs*t#|@dpj}}jPHb~XlM z^%aK9e=d-{v2B*qwO~yLglK@H~UrWsoL-(2@U-tNSGH`je4d(`!EdU zGAgE;j76T=*nt%!hha^jJLiLF=&^^9f+!ImvD?YgQBmQksgQ9$`ycxB?i(7z``sb4 zG0C-Z#7j?~np{}o*racwLdaD;+ctepNW#b_nIA2L%;NUj9EM@%XFXVP2m7G09NRIr zOHILt_Arj=o)nXY%F{79|HE1LwQBEOVXMZ{U_8~!d5qVqfv@uf?`iHS$Ne zaQacO{C8yWdY&tkrij~@-dE5PCYvU6^xkqWGS;Hu2$oHDvB(h4d zcZC&vBPc{DuKey9YIF^!lzt{WT0>6Yzxwls&-K7F$tq0KGdglA!aq7s@+ZI znsIeIZQgXP7f-(~hE4(Lsg5hxN}H{hR-W&oVl;0QHyf&2O7fC;oC*~vlD+?JGKkp! zTE^97lcTak+DI4{79?a76A9DrL)+NRdl^!YG7{WeegzXl`wx)+bO~DW{!Uy&>7NcX zC|aW$9x*MTl4L_-jp%CaW9@7k^c&L&l$BbvVzsZ{7-V*Y`tfaXuKpmGHd>#3im8K3i*ap}MK zFqefRINC!{7H+I?mR`nC`27x*=lGmNXy1#lAw*J1!9wwoWid5koSdSGC&M}a^~wkL z@KaPmHSSy$b%tj>y4GlC)iNAfxi=XwU8g8)LfBciT}1pQ=CZp(Sp49@g2bx4NC13& zE*`{Ss}Pmj^S%D+D6f6ee5LT*>3~W+No6}((hT}4mZd!bH=Dy$h|@;H6eIpf5o|y! z-cHTZRntN!TNdkHS>cn7M?8ITF|1WrU9uH@c4H5lc~z*X+*%iCGFX1I2Yl-lN~9J& z-0IghkE&BGkVj8rS80c$dy_q2oxCyUC-kXQzNOWcDi1H`hP*T4-k9Xk|GxGX0~}q~ ziC1ig+EaI`kfcjHcZLA&nCeqmpMbEyr(e|W_@h_~FLo!_2lM_+qYNfJIQnQY*%eyI ziwt)**`mUZ|9+ZRcdz>S;P|`D59lkg^T|M;w=be7z7<+1rj$e`UN0cP5WKQr8fP6$ zW^CkmG&YxuqkEI;0mW!kfvOYxI4Ye9d|&Bz*~sOzrU>$|!_4D=}L69Ad`zAMNkzB}t^-$dWPKF553u$j(n zmTosj3tqk3Y#l8m9jD(DHzE>~>1h^no7d2t`?9$*tQS;(k(Cp+wQ(2~70}GJX32cR z+_dVpXfICA3gE84&sY^(T;cHC!dl(@N_Ad1WCgjROZ3{#6F$nGDG*e;iXp79ez+@y z;1`ibZBCsMP-$)c3E(~UOW9zO*iG7x8x>?uY>J#Xc9)E5u`E{_voYqA>7|rUtp`7P zjQLEKiG|IkD;gziZl|wZO0pKVH zy&wd6K{~&~Jz52%3bEyyy_&(%pS#4=nD|zY@QI~DM!Cb3B8iAJWU7}eu~Gyyu#=)5 zJC&V2E(HU8idf*bcw{<@vl8(RNM>GjO|8!Rg4j3hr*uwoyiqlL<~Ll zYO0$%-)_L^vZVqeQQ3=A4eX24lkL12h^Ag1XN5~U>%(oF%<4KDD1E=Ui`s&wBi0a4 zds|vKqDLne05=B|G4j#XGHpf&2mU2b+*m!<_v?_9%>t#CMi7ViYKFGXxZCu{@0uaT zT>^lu+3!j)+w2w8W(mQm+=euKG>qmQg&DzQLKHIp$J4+=|Kn+B*~Lw#Lbr8DSJ@Rr z6j`QoI%6(#0^%icbe(C<>jk$o(CPJ8Mj!oO%%v7dq=jZTahXSGD2FkH=!&e)5^;Z= z*3#seCb_jkLb-@6wJ7E?uDes(%vwyV@b5FT<$P{tYLn%DESTrZk0^0zVWB?7=S=(DD&0hpz+F zM)onCd%4k8Zn5r=6!0K%^#tOu=i)-GAzqp79Ok{to_u?)&ueKg(0PQ)a&psazkFJU zaN0*0{NW7gVV(SGWxS5Ln1gMPoxM3Jv=BG+ZOCmsH6Gf;8P05-2zJ5xq*;d!Tr)H% zwo_c;c5-fX3w-wA6!W7Fw^aXVD5tgEa=@7ZINW!x|8-#vZ4dg1kg>YMYM&NGjVRN+ z**C@-i%9{=mQ0EZb2ty3A)ty7+(S6qLhN3gO&+3C%=Xpb>7vhUJl7Q=d&MdGn_N$O zjH`tDaUuX4U@x21^B~8IQ3sz-8yEH02n?s30)yx0^7d+6l`kfR@2E|>I+M5n>6k4Z zh{aP~(9zHo)Jdq#6|08tfmrry!;?YOt+1@C4piSx2`LO;e8QwQ34F;Qe4MYH=-ue( z_g)9x8~KujPS7SEK}IPjW9w@gg8L8+7Yj>A?Ji}(3-w_P6beO|FkAxcTgckJ!J_a? zlig`8{PO4XMHl$q;NYNGqYG@V+j&atN1Rt}b(c>>LGeU#g!hrYTi+=WR8{PN=2GnG zQ{ODrL!JrqyL+2YZ{ORF3Hb9B8`;wrxZxotUmRwBPmpIjPM64b`8peG-49j2v!LtN zM>&vV+6cXxoucu6FJ}1e>quG6rR=r2`%|qwElk%R(hUnavFdB*C2wkqPHYcxp6YVV zr9l`wD2s!|4l!NZI1hPniVg`)HKKc3sl!pz$8SGVT!@9BB^_&bxTH zfBnvg?|U@uQqIjmq2tJf)9T?FBe!oZPErSjo=}Zq+LIe6krGy{J@`skf=m9)m5F5P zB#5KMA9pDAF4el@VoHBFY-^h{{_5s%NL;)dE_2`j6d_`^xV5S44|TbP5WS2x=7hf^7zp~Sj6STk;{zse`Ow=hUwp&i{UXc!Z>Jf)@RhgGPa zB|jBeggow;iyzx@Z+Jm>+*>74`$1?`T`sb#Mi>&H_Jq=2Vd^g#TjbE-%G=5D@{ewUzx|E{*0W?POg-|b=U%9rwb8H2c=-%ds_;eRY7c`)K3Dd8*6V|-YD-I-b6x%Z@!Z%`*zC#px6V>= zoOFZVVZ7;TZYAE`?mN;OZDdS2s()6fR#Xi`<3EoWHlz*fyUc3}$Mf#LUp^hGlEvKe zdw3N6-IxCOczewS?x@%jPKP5+!Y6YO=lV-8jUbvd?Vx!nS6NCIJ~bTkyX2b1y5*;c zEJ<#x`4HK}g8MA*nkux;_3Crkov_?)(%r?-!Gf=#G_!2=)-EX)u)771+(RC2mT8sf zsJk0J+mklebhLlG;w6{r)^c7sB&Z*q&|FlIZt4~3=#!>liI&7x2@|88eQ!g124NtQ z-2BReyOe25(_8V0W^sl(bfJp-i+}dDF%HZXPZ!6IX+`BiS$Hr(TzSh&gJG6N0A+IF z52#pYuIwMplUl26l~?unKB^&!NO4X+opb9%R4(~K6{)=~~Ri(=tE%qdxU zpu15Sf%%mpF79raTP--u^FNmxYg3vg<;{G%8^_nBl)!7QU?+SXrEJyy^&YmAK;cWg ziCQG35z&S7z_dH0a4sJ6vK39XFYFMTT$_F60?}hq|VMnc69_;zg<2P=c4x5 zZ==`X?UH6m_xPX#v|F5dBp2*vu!33Vh4_*KTx62*u9`(4H8MwS*7<@-1L~{zKj8kVJz@s->f#-NWb zIDl8uDMKWkH&eYAUDa`_M7K(cIc7gVIxhxGH=pR#oM@ceW$#zRN}}2$v$#vA%h6CJ zR@%z_dz*U^xWgS$Op22~*6!Y?!Y|Oa@S|A_4i#K@X6Dm0dv5MJg=M{@IJw~udRdC& z_^Qw`bfw?S4PK67hHIsn=W$>q4s#pu>b0;_edwjH>36w!V6N~Z0uxI*4_Xo%d&;E2 zd*3rVF6mnCO1R(N0pwSG%eNrPb^twglb+aGX%LlU0Zyex|E5^pH1c>FiA(pR%-2&3sfy4$xmIj z=QH-JuR}l_rvEXckVpC|58Ts^=QM@(RN}5n0kL=OlI{R&|y_d6L~H=Ubm}MNTMQseI#Kx z#j#qkLgkOJOmUR6+lkbEaD8_wLsjiv>X&PQbLugd{6ee~(8i_P8?$ab;xv~S;52QS z4%s@f&K5_bXd=?AHvb-G$(yT{AZTykMHlkB*!l2$)-^-Z<47CnvMHX4EyKLq?I{2k z1M`CR5qVmD7rBQ+0SE7yZEace`$FQ{4|-?~UF__t<-at$tioa)#pZUD)?N%u__xaZcMaq|LV(j|YTA-JT&{Lq!$N}SvWVfF0TFbt(z6b`+_2H%DbcSR9V#Iz;5Qf4S~oY2(Tc>^PNpF<`5smuz5P!FprHHUcebYEm$cJI7x#L|CG3DYEgJ3(6#Lb6V!x}ez;*#ESAwdaJYWV;5ny zs!NzRGQH-Pxb=rRYfooI-=Xc^L7mm=A?C+lu1)KiE!qpp3-(?Q;J~Qapb`ToYB}bs zipXAoh4a>v&_6sg6Rzp3bMZcuQ~3J*^!+}EXmr+P%}Jg*W#12-Pir+T9M3{(O7#Wp zu!SLlLe}EJdlm7Qkhd2{O?LS-)m109A)M*e0kU=lh95baNG6k9H*z85E#eo=at3&* z<6T<|8%^)VPeHWiAGkKY}jCgzL z)30{XVL4N4w;pPjKl2~#Xm>QQ%}pg*f?bMUU83#OC6Y436pFu!65JoL1;B@i2E*8 zv&;!|Sa4Vs|0gpSy&*%p-0UE|9$l11By{Lc>5;tkV@5n{ZGmKi#-Lr8Nad^DHzd*G zW|;<3dA~9EEC+F-nUxi)tQt{JT+NsKq-R0jVn=5n5j$&*BZ0`A5*^_gva79Qy26w) z{dbr}^tJ0A_XZ9C{*_pv5@wwD-q%X8vi{ge7mZbgj~)zKr>eM{?kwlwmFW;>Jt-p335$gaIKsO9$BwNGu0 z`m1?fJq{7G5{6Dk9b#{Jvx9cX0ZhW}wzaIVt%#6KxMzcPP6!N`?KNIYU`cie1mB=l zwk2J~KJ_Hz#LwBUwmkJDH`*mT`lM>iowD2XA%;DR@&acMJm9h>JBj46XlY~MZwZ<6v~fV(?4rT=_nD_wE#)yC z9ih(HWi!r%>*fk&U#55u&v(Js$-%)W*B}kWJejUUQpzLS;>ZRIgEN zCQAYhIzx8PG=MjT>;a0aCBZ{FJ|Ud3d{w+E%b^F9cGV*euRSqr0(+=;nIc1LXDpIy zyCGX%0sG#?Fa~ZI2Hg}x11T?kw~wodW+V(IosiETrKzpnQyo&uRym4o4Un4*cQg!_ z1YLw26S@?a6o(xMxpE>MfOfnyzCyIKjhj#U|AisIxpSAT68$%Q0TzSWJ#X{=I6saO zH4ZF1ev>am_7AbMIrwFJr5<;ivNn9XA8MWu^|!aP< z!pzjl<7AVB#w?LpuD{SAK2j1vC6aE0MFAMEW$5(igv_whyN_S$^dsA!vB8dy;xt%8 zb)-E1_&7NCjk=R&$X_msm@tB&7(evEzd%Pv(O-yluXD<5{sry&|K9lj4C(*dheAR* z@)%4~kE&=IYX4Gh$T4YUWhe(|^3x_gw#p=iSLpu$+;|sZzKGO(svi3oumgyTv?-Y;pHBCP2_Q)>JL{bnUh*yS!sq$o*KBF3UOm6B zzcGff?i^A~6vp?I0)Oc~@N+yv#zlIIdE}=$V;#*Qtr{KRM08q>Er9zth`kg8;CbsS zG8;%)%QTp3ntI*-S{D;cTkLlySDspiG;S3YziC+2PHgM4~iAOujENrhhvno4P|fBl_GS@x;ir7spZ={$oAu*^|1($1ZL5u{Uc4 z*iz!R|TaYNkhZT**HJXolG&hcmH<)z<)Wa+(K)dtB~dR43F#9`(hDZ+%v(y-IqH9o!s4SkjZ=<9zb+$@ShJ`&_=s0C%}>gdc8mS*w0CNS z4jKTx1qk6@!rgwGz)SMvEg9~I$>#~Xt*ivzCXA~fnSM52^-kA>q5&f6%Ylme=WV@3 ziZoDHWgqK<)|*r^XX#P&nfHM-C?J@t`5EkT=aniDA9a9vqGj?06)y_1waj(#azVTJ zX?Hs73eO*oXt1vpu>dHr1Ba6t%c)dOSuN@ZcHbm&(GoZgDNUoyFf)Y=bbQI$^^XBK zXHorn{n_Mfe>@jnmBr7>odb=a5+N$cBKVW$72e8TdDirseuS0&Tir`;l4%_N=j##D z53D~Wg$1^%5$&&t`tq~tZ)oqe+}^MboP*nq432kqQ5&dmoML#8Fe0jfbX=I&#YQ<` zlH|)u@E{w~HDcR2(JC-aXX6Tgh91JHMR2y>0kAyM(jRL{8>^i>o*k^%2oaN=U!ERm z4(*r~_G`1gQLUz*fv*G;yp8?QQQPNqU&{wDiS&(ARj`9%b^X(mj3~$iJQPBg*!GDl z50}2SD;2&$!RS+yYNx^L%#zvQ5%Itz+93jaLLddz5rjzWK15oexSh7f9YB6X=Lcm8 z7huNn?Z6ZNm0kpwmT0XD*P53*T;kpJjx8J7!#|s>Ykn9@tR)09_dLWCJ2N_x%k!!_ z0>UQ3aQce(Q{#5u*2^!1F~EZi zq)dH${TWVYFMB!Gv?t{eJ8M6aNzG||SCNu3UG{S%v*y~(m!>|qrucBEu~Y(lfAN^y zWhNXOqAvA0#Jc+Ei>u4mBEOL&-wt09@}H>HI#+Li+{i*CnwBJ0uyCYR)B^Z+@V&z@ z@!O3yM{{cq3HFTd!HK?}^9_mcy#?W!j)G~rv;g6S#F3!~QC~Do8gI_|6YD~xX`u|k zZLP%y$3-`qa@r)`cOQLW@GA$f10ylT0vn&jK*2=J3ajeA!XsqouQ8wj%c$3}wSJN$ z;f9+)%Q&t9mKI`znv{X!End;gnVBP*VaA5L61ci7yl@cau;fSD&Ed*K4e-mjCU`n4 zloMTO#p1%DY~u`Moe|#4_oaG7;Oq#N;)N5J!G>2RM{CCSrTBc;d_PsA38Gn(P~Y{# zR(O)Tg#iVUv(Q7TEH!?_cyHq}K!3>}wot8{6Rmx#nddn8wZF%-v}D}EC|o+!#yq^| zQE%(cJqjARNKZSD8Az9c~eLq(d!%kxbKq4qdPTyQ?M zvboyLWa(}IIDScfbK*9ouSCf(Y>whv!F)w^$VsSpV#M0}%rL0rNeBGH!9cKEsjCSA zPjS&BS*_$p1Qf&1vz0re{1V``^$BPzcI~g739KUTv_EO_M}gsS{Zb6B@DIJ#`SvsA zc~u=_f&Sd8A>>UI6G)7 z?|ND1PWDF?n=v>d>0DK~kCJYQ2Ob-t^}zTZ!M7+KpVFBNVdpflOGV@#*O`-y78jAq zvb|7r40E|>!%*LhNYBU@WUf}aURt$f{f%)anu@If5VP+J?#|y0?x_ZXbYFgpp~E{S zoGfUA770H1v+NCTn!*r%&w2#1-C|xNB|dwH?cXp_9}}uuLA~u~*1xaQp)@`E-Q-M0 z%6lSnAi2mOtHqTrb38Q*Hf~K?THnU<$W)CNc})Ci`nl%GTqooF4X0w_2w0<@f39U% zkzKQ{?AIFNf=wxqT^(0So^%lJgEbm)?AVDX2V;2zo$+xmdFPyj9du+zrJbjM3~*(&hy#hM6IV zHIP^R`jB0#^E3Bz6(TIwl6~#cx4KoM^mxZ|1t&q;&@9u1%^JBmXL8v%%pBJTH$M)- z2!y`LB=Cu_xUK(ehXrJB{+S7@wamHP<+^)8=-y(jcZgnr6M>m1r|^9oQo!7fk+c-u z!akVKwJiA68yyBwM4qkK+0pwF%3>8EsrivPzr&5OwV`x-*`wp=6QtNJj3_{ai7M7q zpGhMkCx|2YYFIE6jYC1~?aUOgCzd0lR?R1F1Tcr|_!i2as|x2IJ|Vn`s#$pjcV+Jh z$dlIT$y!kGQT<<88SNM1*y9pB;VCm#lBe7fHP!o3TnW)8erJQ0n|2mDbhS8AIdtj8w3&?YDg90m z#~OBTtu0k=%8Q_qy()P3L*z6KXx7#rlBD|kz-5|el_LFcg78b8J7=5sArwqN>OB^~xY}^>E4GhMur*oyg}6b1FtWf)Ll8g86}sC#|ZmXm|s? z6{MXgO-;BH@+N_U!d}QXD`uVZOBTvq(-B`xCbF*z=yQcqou#Hqw!+s+4`Zq7J|-Qg z^i$Z)nG@@N+GnkL(fO`UW?hv<)g+?5>bQ@+a_9N^%8mO%)o~B_MbN$PbfZ`Q$N?*+6mv49Bg#YgcT??# zxUDkt#r7xtH(p*!3JG_jM=gIJZo0dpmESj$v>25aLqu+n51EaLG$2(hXp`NKqmd@e zDJjzh*OgE|qJG|~;U5!(8*3i*1)>Ef4_+s+Na!h!WB;KPJHn$8@V(EJg<|K(2u}jD zhO+Qxmw;9ze{Hp4V*J)^57<-fO{%eH5ppm^krqx{E`OE~T|^X~cmhSS{Db&^Dq|tIGFDxBkiPht%`hu zN>F;j$0tN`&^oZ9?6UZd`|^-SmyY%;L(B{Qda=y63=7Y-E_{WVc6^v<1={swhaLDN z(nQ^zf!p;4U-}$B?2B(J7CX11L1dR0`uL~+){_HYr^uFSUwlIIB|pGz^uzu03|E{6 zH>JM{3QqBC($J|+eVl4vx;oAr8X23QRgWhR66B`%rDUnZ*s-nZbj~5dCx_m%SN>KI z4~!4;YcygIu^31QOW(DEXf?pw7+WP0#X3arG3~yTFV=aN1j;5SO*Nkj^V+{XIj@~# z5219Upy#fvVbdAg26lrCE>F4^Mk?8}m-8PP${ zOHD5ZlEa@ZfBQv=fS{`jCa|;DsJ4cDn8j$xPpy48BIf0ZdoB(dpN`krA8G#ct}tB;a#bRZ80&GeXBWQrr-6Et&}D>m z^w}Fk(8FZ9yl6DUg@>D?8RYB{tnIKrz**v(!mkg6C@dU1FIayQh((erLMD$8eJ*qh zHguY1tA5v+5Auu-jUP>x=xQ7)NhC_Hz`tMPqjX^O-+%RZqv`hMvLia}D906t31g^B zZ*!QQ;SYb^B9JVxb?>aB4OI}pb8>L%`hkwo@pl(F;adu6^ zS=@bEHpx|dEOPWk<6?J*A{@d5rhc*qo>R)D7rxCLI2>#YI|-C11G~2qHp}xr(rPT- zDu41KA0}~vdskqYO~kC6^tzE}En{FRaJd3RY~G)JsmS`v>i@^yTSis6_TR#Sh@^BW z(%mK9NOyNhOGzv`rKBW8y1S&MOS%MUK|;EuOVV@Qd%Vv(&WH1C$N6x+{l7W3%f0S( z-BLrdRD{;bqeYkR?Eh&m%Lj18n$P;z`unUQ4h&jc?AcBc9 zR~f7L{e!M2I1VPBFTZDV4Ej|{+B)2VMAJ{q+{**IOqEaYMTj?&b^AYzF=`<_3keWm zz48ghQ@~R)`aamKb_4RE4B3$szG7#DZ?}4$?!G6m|NZ@<-Si@&JU4{9I#D|3WtYJ4 z%+QG~^~Uta+hFEuZyBHq3W&7Q%KY45JTMcMen$N4xbR6HpQof2=abeS<#U#UsL7GI zy6ao5TpvB_ZFJ}H1C6kGL8wF?{ID?&HQrJ#lE0c5e2%V-v<<;m@98YtWnT~RWux5u zE|ewgrJ|`XxWd7n5>9Kg%yxxbwKZOM;n=~03aL8Z(%Ic!YlsUk7gg`y(AKBC`PqbK zCLi5JqD$giFZC1h(>bfJS?~y`eZXTIo5Q=&_W5&V1dX4Y!}X2?$`gIwkuxKyVYiQ` zD@yZ0<6TX&mplE(Zu@*Ln=gAGWyXVsTw{abH=x*aoaJve~j%^f)mq9{3%UU^PXEA$T&DTZ~q!8eO?Z=C|dU?_!im z5Go47I<1KR9Jjvn>be%oKl7}v968ge(c`rluBQnbHXF5I5+F?AU-x)h>qsiHu}CIjjTa$l$angg zX`)4ZVhRov#?6}k&Nv#u^m9=DH$vBHHrh{&0yK2eaO7-c_wU@jC~rw2LArpONvWJ6%*@YDgaI{ z&S3$bWa8zw)2LxVedG7-kzi3|6Vg&K;+xlr)Y;R>hz7pYmu%FPw_FUN#j>|m1*-m; z&vGFzOE?1Soevh8s!2AdUmAD9su!pe7?zrX5_M{^F%%~NT1nV69+Q%Uq1B5drzlIn zY5C`Np7B?AHvu`uI`k*iwj!LQE?x8{!z$rbNQzqRz=MgC z9nLJ#(t%e#VOV2lJCl1#CTJ1x!!yH{<-86Hje40JV=Y?mpuG{=E~J0s88ttb)Drk- zvmh;d^NcWc6)kWjG%N2$-&|PHTaMxy*(|-)6~yfHEm{b^_bwbb5ucx;SacXgSW`16 z!t#v}{ftmfnKz~H&}8vkOj|(PcH{E3rwkbG;d)+ar@5NpX zXml&0lNy_Ig%*IQzLtH^{TKlalyz?9QVfk>-X^L9ex0YN{N4X9@ZuXS=}iCOmH-M@ z0$T9hm*1VN*=d{4>+I1)k;2XPSiouHx#8}GvA~Lhs$Ux<-1{oM$n*0e30xlvVS6m5 zA3nAI80GVu^$n|A^ThjLikEcrL-{O~O#6G!1C2()^zAXc+-LLFDnd*=}>A8@{n8+9goaUWr_}w2`&wziy zbQg1h>BZ@Zt+6m1&aHEu^*!iLRS74NkQ1627Ch9+z7H48LQcMA&Cow@(rflfLns#- zMrD>!%^1=Kn`FWcdgb@}9s|UTCoa;Pn~Cr4dwlAqU%E0!=?lny8u6SNo~iLn?n3@L zVZ&j{w1uO_rW|RNQws_CJ&L3c)G=AZ!rrN_YFKu2#D6e;h+akY7 z^71yB&&6^92CzBAC@z@+8Qo{XtmN)g>m1D_GhYv=&K2S;4#xMjPJpjp1m?Vu@)fY) zoT2{tsajR-cgW)G*XQv3c28N#H;(mAY#o#1OARFx8{X0WLQGtP@WPhX-aHa6$wcNZ z1q)B1wHSxxGe{<8O}Sy~y}3Hs7!Qh)o3Qb4%+~8gZ3m?B1Dd&gp&G9e z=nZ|)|5k|P`muBVv+Y9^D#VOvgaam&7W2V)9Rk*c?wZ8}x#*d_TFf$=#zHN}(Mx4X zQwx3OvLPRCr({9T6MiGhd;Um#W|VSN*=E3V&Fb2YY<;~sxOvw4{+Uo&>*;6KtOvGL zrt9K=BJuoHyPiGU^Nq)l-Z;H+PmrZBi{=un8LK=DlFW{{dQ&+c^o4?l`1A1?K~Q)u z<~CvGoL7l(k3r>i^GY_K$7ge4JM!&q(#fhgEi+Oz;SAa|l@u6r60^Q#(UkARA3YCU z5&Y8O{OCf2)-AKV>b%Q@Q{)=9+RHnMIR7a7GQ{Lw>7+*GFq#-c>sC3UJR-1^7mLsw-Y+|J>q+9CO}VwPKj~9A zc~5-*hN%*YNZ836b?O`{PW1MVkyVr8Y-sTmkt)oVCB|G=CPH|)2KlA9x*io75Zk^2sB{WL%SK zA@8mZR&x7PTc=(hh+#bDJqS~H{-{bn*VMm^5fBZJ2_rzZ(9Yz53w-3%h9w*itn`}$ zR*RPh_1|nUzdYEpiV(c;BxmqtUUAa7gp=}&d(tZDL>a^4FXZbO{lJtPsX^>d)a;Mn z$7?Rib>~hKehVBo%VOhRq>0*DL9(5U{PQI_L~dH+By}!^8}=~)o{wC#D8DrE@E>JT z-198S8qXTpe{3;?Z_lz@Z(7@Oh$LWr6879pXJ2h(hgYc?)RB4G9t&^&w65ew$q^@W zcU;5NRf%y>7+DQ{j(Y>QUY?M=(G)yv-LuI;I*SMDfVTX_Hrp3!<{#rK>+=R`TDs>} zKtpkO1mi_Ybz73`E16E8R{at@$(cV7A3XT;=ORBI@jvlUzcP>)gw(G(F(RnU3H-%% zDQct=3dD=4n9Xg;<>R>JRpT`B6(i&0DL#Iy_(nl-AJ>!hPAySfvOxUq{xrj@!euL? z>9YMW!@cS7&|5g`GNTD--_ox#Q)&grTrxSw(kb#O7X1I*+0o!ad9Y`oi~pk44QyNt zV^CqK;Qgywy6I0atsptbV)PNJJk3X-V&e^jSgIWmnXcsdnH=Vm!+3o@~&8=#1EUH$ZVMsMT({^l^t=w^P_?fsSj)>d}V6FKNg?z#Jr z^tUK*R1?-b@IQ_;R#+#;D9ZBl;=lNAHY@>Azt@25TjaZ2x@_mk0GfE4<6Z(~5l}Pm z_m>IWt@;A4QW<(do^zLgy{!Yj=I+$TfUu&m2VCbyHHQE1?j7ARFRg397{qhWnSVI> zzM1BjDGZwJ-3`IXcbRaGKZ`xEh4{=)+z|eA z(7abs2WAo}b0PUu5+IYZv57sbqBH6t%Z?u@6C@JuPy)0LGmJ3_K58 ze2mXJWrq6>xV!8dNPC_vVnPkJLU6l0fKTq=~fj9MsrLqk?-;fzZQ%?e& z<1F9UF#)hR+c<<;8YlZ)DR2d#dkfSz;oUd5ro18i%S49vu=R8Tg|VD%^8$p5YR&^@ zZg_3Fp3_l#`MkotfS~o~BeRMk#e91zu3jRSxW|YToj^;S)X^GsQ^?as-?oZZNeCR4 z1ov43-OryNdPp~s{@MO;?0dWK=+Af~3Td48y)lm$M-|}hqr$>IVOY|n%*B-fm(dr% zPS0(lTLr@(L<`BscunOslN_u@vK~Z*Z~yQ;O-Oc;rm^_4de&MN}O{zhQy z`xOu$#2P`t(EXVgbw#kg5yQA!cFaKVK{_GeajG2kHSRgE7BrTLDmd-r^@gl8qo$EH zn>wpDGM0yZkrf`p?Gp7V57frm5=njNR0j@%&S*#AX*eJ%=$F?!)Ah1}3ZSky{J3V> zH-;5^Jdv3bP;Duy(g`M6T;-9flGLxB4J+QzDUyevj>NWuI4DKCm#` zD=D-*S+eU<#kCdFy`b@Y4*sS>`-tih5KNc|85Jk_x}hzxbqVYi=ka4mEDJ_`U;SBS zZ1k>^F zJ@%0gOF5Ni{;c8GxFCJ3+&u0{g0s@`SQu={iE?3ur<1T-TjLu&C>r!;DU;{{-KWwV z;~PmIro@aLb?Vts$7{n@fA--v7R`A_OGW8r`c*bj^NZW;K%yh|Joj(5Q9Le4FRys$ zYX)|z&cBR!&d()%04H7LnMPo2tF~xzP(bz5Q2V{oohxw7h#F+e925$jT7<)ph-xcc zvsztpom4DPrGWoZ81Tg$>0#QchYuUPhnK0PS7p^!rgx#l6Fzf=<=W_z1DNrctVs=W z@j$9wm-+`kR>>K*lW7`B{MynZvT@smw_ZoOG3}2fuAMP80<QtKY8GkOvVC?s+cy!Y{Z1|gv(q@o4DXRRVKGQ-LFh5SX(ahNdDI2;d>R57;q8<@x zNabtQT4d|^h>#*<9{jf9HP=EXdrfqOTJ#mlD~#Dtte8Odhef5B?%))s8%B0Vh6uS& z6p>up@BVS~4oT1y<4D9sYA;py2d8A~*M0qk1BbeCDeBPOmZca);XDR+=48~2o3Bq& zWaci0leVbYh!BdslTC-va$oloc0aO=gkq7R8#WE4Jzg5>pc{l?z5@51vdrCEOH0Bb zHp?2Vhxm4No-5e(UVb#Uh3|O*(S@_Gvg9^F!=aya!Ry|n^ObW*&;TiPIPX@_9(gFH z^hbu^`#ytbd$>o(h?}-nYT$?ne8XTK1yWuRKh1!;e(vf6-LC8H^DDzjm zfX*_(mq|K;R)vamT9_@G_4nI?FCBtR;?p#Dxri;|np{ zdvh8O%@tYi(Q+GH)6fefwk0cWEf`u99B7Y|LJLT!^5dfog;i;W@++_0PTRjOADLUE8ExU3|dWA2Z7?2_Z&AO*4w zh$01I`vH2Hm^WxDPv_-ilacGVVZI3!3=7@u(1+;aMHC=m}2Q zjy*vUKlrE*$sd((&j!{8Ze^rTKvfx+n*CqKpD2>^Pn zJ$D|lzs)u{WHd38Og>4%RWH_$|80{0=SgEb^TGUhL9e5z@wX{wq64N}DE0RFU#Hv$ zaZ$;ce3KWde|ywGQ?M|eCT7pd|7~F?pg@i7*kY3{zyUw!f{6{~65xH-pKU1};g0v71@`_kkSHF;kdz_shyA({oc+ zV82d16&QaYHaj@HcRgs@Dtt`p!}bZtS8V_!lA5MEGCg(=PFscyzw4b1@1tCTHtPz@ zm2P>HR)l*XV5Yq%IesS{b*Z){Z=#1BRTT5$6Vdr5c5G)IPE~vI1;ut6+;A~#t}f0B zs7ti3n1%!_72~eSunCd>=@7UE!4Gi|e&z}UuLV%T+dlA@m)N)i;DJ-~e*KS$czKo- zsIxF%+MEsAsEeAamRk_ZY*uRCnNxrnwx0O$U5_{(K~fAJ>gQNFEws0!N2 z^gfW4XR3ZV52P#*YLHaF;5>W=z@nTBOK1cpAxtl`+PckD5cI zkDt4$@PivBsqo6u0G7#-7a(^M;m^f5{!Apb*X6Y?0MYU!NGjGea+}e$Dge*pA4?TQ z8K^7qkBoJdwF^KwC%ETk#KR3pnASzYpad_1z>AF%JZ%GZD7QrkGK|#&fZXY};Z5Gc zV*o|TPc{G%dBv8^^)Y}+UNBd6!hbIE2Tt7ujFqh%h9CxF2b}w|0Orxt1D(s0>Y(zC zUjbbj`ra9j!)V`JZ6pqHhfDtg_o}fvyQ@dESWj*_uk7J!@2i;K5 z;$nQLAQ*86u#*a?%ylYy-us^?fwd#&Ua;Z^BeD-~9$r%z0}M^=j1kn~9R*OJojvIO zumPH9`X7^pKl)xMZml(*RFSdYx_xgSgWi)m48U(oVA5xfg45q1MyFw(VuY~`4CSS1mhF{(9F{; z7wo1%`G6pDk+1utP2L3kwElKf9plR+w8b|@pSKlJ4k`6fG7&?pTXn^L0zA)` z{@37-ObE5G0Z?0+uQ2~HfNk9Azn4V@WXCA+c7-&v4f%fi2jqjw{6({9zR>vBfSWkr ze{X^p^Z_WCYxg|v9|m3_)27Elch2>&Mbh{O@WIe*C}7K>FNx-!r&H~N@FDetiZ$>i zn^`XU>#(y)Y)kV>i9l29-2{-Gs0pRh`3Gaolbsy_K=*|%85E<>*V0J9gd=uqtY<^O#q%>rI=Ia0; zO6Qq}h;%TxCh9)TNTwQ@MVlB|lOg1h6wV@z8dA^oxD0PcATZehZ6+RD4FY+R53a^< z02ZoL=ni^x)$W6xhS|gngx!)Y;mf8CradSWz=Fu+v#btm zT63}iV4O~4PZ(G)jF|%g48xM)Iw8Bkm+%Sg>CQv8;kPMAjmYGknY!*lbR>N@`w2Wp z%TZ*aO^(ewg%(E1e7yqWxb;A`IAP)LcMq^6GjTFJam`j5JJ1|#Q&Ny92U4CSy69-4 z9&Rs_`kswnZ5$|51ac3BHYcA%L$F-U+50cvK{WzHcZ!Z)>U^ktt4$eC;YWI2X>~&# zCnup(Kk6zQExHVU+Z1KY&{HPp9(wc(^qJcw+)$X2ScZat>BCb9#3E;VudCCz)P;l< zgm7K9qPe!C?C&K`(#3&_h^#;xC~~)>Tu{!E)g<8uj(e*TH2*{Gwq4EV7&L_JpDkQ@ z)9`dlzHQgzqkQQmzDFUKu(e5z1+@e_e8>YorIne&Tbe5tRPvSdE@vBsS0Oj4d?BAz zX^q&+Y8XcWL^sc;-AMO~e=EidUX{QPSnwCe{2Y%XKLUwN1*i8}z_E(*gCi$H;0~2a z54yGvJN=#7lfWgUY2 zC#(J1bABDVGJZRN=p}u<0$T9}yyuCYM7M{3?j2`6{T0$h$-n=qD(cWgYHq=1oEc)e zF*n~H2`a|?FOH1$UA zy`GYpkA`t~;|$;}6|%YQCM-e#b;a!+XeHTx8iQwR?wWDgzm7v^>FYie=GJt5r`k`j zB+u`J@@PF-LDE+w!Z(f`gEI)DZv2uyK{KPqr~BE`xAVFLU(?|4d@-#(WY8!O7P}8# zph&z^N)z+7AT)$0ueE5Mv81D+Pk<4^=Il^E(bu5tRVhgtH*%!er~8l~M-c~hDXMOz z4G&FR#Uf0y}G}cz@x2Iwrnc zyWZ{0^@7Xz@ycyGy{o3etF$%oG~`@!|B9X9_EOXoPJJoOb`SUM6^#AePSL66)AyZM7XaBh z!v5^FRwK2S0ifK1F{0D&^7vKdsh&R2 zpsv`xJSqP(H;27#v@dC+2%{NsR-HlZ9df!Z1_LxaWcpLnnU1w(A>rt801f_sloTZ$mMQ zp#eTgJ2O6@|bmU$%rOKC?s$fX}|N67m>tY+YKME z?x!T(|2P(&ZmSsg(R~!N{3$rDcBnDYpGE&5TQ=7T#_2eMTORaTQq7NNocX@ZmD*FV z+S^q%=o}lUw^G}%_q4+7LBP5Cv{(8wf`ejy^cP)sKwT|lh))-y-^1bWDVP~Y#OYa- z&}wQZO?DU}U#Qd^=YFDs7m*0w>9RWjT=@X6U^oji&~KE^m^8N^_PkHU)#bwN)nhZ8 z@abhVQe|K`x!Jn;!coq>Ds1QZWOt<*b0!EWk|=o^csh1@EqqfnA(Yfwp($iCTj~~E zKeVsBOtYnPL3lRJLHqSp&XJy&j2|ligQgLdz@F;9%WYYX%Yx0{k%p_QDqU@_R!+d; z%fDmbe|K&e^J3Tcn{~q|xL{OlJzbY}sCygV!a{}_bMe-#lT8pVICHA|%_3;23{|d@ zF{M~mV&vhDn?VYW59ju#Ah&$*EzA)Gq6zqyIWIXC&;w0}5ev+wx)o7m`Y6Yj&Fn0a z`QbzMMwPB`_dY6Y_vI}Sqy7C8e3--g*iC_#bLKu-z8>HY7XA%jw!5{3dUYEi=43cK~dUH3N zzVBlqu*o1F@TsE(zWLLoaM>(oG09AOvd>kpalAwM=46mI>G08=1pMIxU;YI*?VABu z)5s5>rRwI`{t|}sy;bk>=kR=&;R#>qi_#}bI1hkmq)#E_M(lbow1hIYB}Jk{^5;#j zN_Rdk z((_f{og#T*x#bA<6lf4{d4{JwE`PWtI2MS<62zEUq+{3U@}$@F^9OZJBPZhn z19-NLj9h{WCN+4v2{w~A85o&vz#hr@PIf5$GIRM@m`%MGY(+EyEAWRLhtx2oY8|(f zf>suENySWUG@Mn#Q9O80;tQ9(IhAmq&uKGMk;ZQ-Y56EpQMB=N>NsWYif&bz_rjcv3qT;D>Es&!eSley22C5eveH7*0$HEa5xb#>54|kma*iu2 z#zQz5m{1A7Z;@-Os9JI9if$ZZu4~TX416NRV$+<>$(4ATuHv&nzJPpU$Fu0F?pL~C zb)jaUzyH1OxRinsfjAx1TOGnQU*E`fUm2(jpGv2?>g<+#xvcbAiQLRp)VWA%!JKR; zUajEl@aUeoQ}Yts4dCgv_S4Cw-Iva!28|57_Uf>q%~N>OprxsF!qP7w*Q)Y#L{f4s zZJSn(UF>iOXd!cOy;Oz zIlfiKPRpBU1nS{ES0A{P76I#k3dd&5kDo( zdu=>j(-+ESQ@CAtW)8X^SV`V*xEqSH6DD?OW;N#wuFvq)BPf-&p)K^vbQ&=F0mTdEE$Q`vW9|V6y7qOp73Ta8=&++K{NJ ze@$%3|J@~HeSRbywl7cj6gFX@U6%(&icDo}i$eECvFU3hlPIEP9Xs}SFJ;~_%3LoX zU91wHl$Sbffj3(i>fGW^RCtIZ^*F(*GWHq%%pkUx;T|8d0yhMJDtf%0u!<5j)F>Kw zO7*x-+CW(#G3?tHjsCCy$SCZINT?B!yN9G7H}!k0))fv6J+@VlEkor0v2sc@5CmUV zxaJ6eCXqmcE3RK;bjpk~g{7Nh4G4};bvX4=z6vE?qai}l7nr5FD((9021=<>M5CZl zhW>-X&3JBq7-zP}dqSN}6k-Gwr$tcgvu%MG$g;ag-sw3wCqZrV9P5ZR1vm#e&IHwU zMsXIlJ?eK+k8F0P*2#lAru&#^D|YDdpDW*j3QS&=x4Y4{2HJEkoiE=*JP zk--k-g@cqCzob{)s4JSp2wl+a@$7!uohQA+)g_eaX_(G0j@b&o=GQ)RaY(qyzKYVB zaVbZ=`n3wh8Zx3JdNq})5}NA1|IA)1V-|~QHvbl4h4;Mt6{19!Si14uonYjLq?(4V znEuI*MF)N)X-4ql%U!QOS9F9%-WjB5zWDWW1@GLzr}m5)Gh6L2q$()E+B;071xhKx660uvR zy);+Tl-V4Mxv8K#Ebadwn0bSp`~-fyzw&)#AtmdQ%PK;yBhNbuR0(M`CN{U1QVA`x zy~zw1(B^BBdX3&Id{P3yMaGU+TYsG^YwcIex7LbrAtBiCO+cjcnO)y?DUZDKRWAU= z6`lmPic4U6OSt+*ZGwtU{=>M!jm+ly`rKH=teLr}AS%8SBH1r9`{ZL1TUYbbnIgHbmQmDQ1 zoi;2307)vAhu{1wIQ!mNSv~7I+$Ezno_)BytNDf3p7DMvC?jYweYfYb>@L`dUQ#5e zU4MoT6N|!lTh8Id~V0z*p#5b-;Wj0mDG& zzM$*Hcra;u@qpS2?dR5epdnF21)Q5Heh@U?KX^8e9V#O|QNt)J@1S!d5qcOb>4_Fad@>i>ozm19GOv zSXnQ5=B@KIG^R?bs^mkaZ9SR%HzDwr`SX;TS;d;DDRLYDPuuDMrd-e^*N$Ta@@?ra zBgJeX;CR^gJ6^Oh?I@7!eVZF|m*G6P0-~B+bIp-DxsVAK2*E1RQKpGm`fRKdT-5ct zK~SqPkCEuq4AOKREdtC&jlH{kFZFjdkmMD1VUnZYw7>MKwa`bxn&96nR~J^wK~m@y zb^j@y$3%zI1;mdFO%==D@;y)}G#;nskZtM`p3+JCoAzy{d?*w->QDq(0WYwtksL3m zxaLWL11mzM&64NVezUKsYG%ZHSI?*mD3e?k67x-R3^S>dFk0>vzL+(rU0|JwmVHd( zVKRGu9CRJ-6(PzWk{0ajFZc%_XL?BXYAb#9J_}vgmF7BVb`wBGn1{k1(F7j3w1Ka*Jhi=JXOE{*cOK))EuFkcFnqSGbS`{Pf({>2C4d(y zYhj7H;ZoF*c8RldiC24POG@|W0Ch!rYq2!?*B33MI#q5@QlZ_z=8XoJFwDQ_dcaA~? zs&6q59B#!0-sAB&>|Z}B|Kgbs+sQ*1f%kmDhPoB@m}*A|$I4F86!XPtO&Ys=n*>Ql~>e%J5!MBF6xgo<*pnsVn>A z%13SD2Q_r_t3KTN9oOOY;C`Nj9lCZcuxwxWgj z3J|uz)$UX~gDAxcfpL~@uQf{tiIl(prs-#q2!YMFWo`u)fS}P_CaN}|NeGGo-A=|A zqOCjA<{veD7>bmFb9E4?t=i7V_P1h365babwnF7I=A`gMy=e z@a%rhQZv>`usV8J#iLW*oW}azHH{ZN*X3&|ahi%jM7?!EO;!)CDX~I7#z^to+v~e9 z6OyV{DrvXtP~6|3Lq#0cz$02F{AI>(?5Gxx@oa?YGK(otmsjuj-!%>I z-{FA+V;*=)5qAk8uTCw24Nmqf**{ka(>cDn3zA#S)rF!B$srglw3Jpw)O;ctJH>=G zRy=ZHSOgL!1Spda4ww$?hr6v_4D*SeXY0xSg-9KDAQVC+*w;23CHWvj9#>N$g37@ zpD}dl?C6@o#b4ahvmvt~N=>^QUX2{j4yPL*MLU)zsVXiV)F>H}fTa~L(yy>QHk`eq zA7N#^d-IK3+Ed-3yh#&%fFgJW(PX@p5UC$U;d{;iX@gcdo!)69X&`Ysspg5sZV)qn-my8eS$;t z22`f7cAoZ@t_)p!J-S$(m;CKtOHV9vBgzdhjT=dbT^uc~dxhpksU-1JqR(ZlbsPZl z&VJSJMbeA=Tz|Jiu*XGHw;fnE9SWX;bm{ZkcxO_?-`2E>Oe_>tN3Zf_!>EB8d38N< zW_6YugQ3Rx>DrI13LXpzn~>6y|oCr>tyi>I4s z*w==udsY6etg_u}Bxti{UJOrnfao|@`nk+<{!`~RMF*xAQ9gBEF{b_vg#)E}_!Nzz zzflOw2Tw!c%O;Fi2ts}7Dw6^b?j^`7e{UyKT8jk?T=xG=QR4ro<2pa5^#~aGBx{@V znFTw>Noj;O*@`h(AqeMxz1zb_joOoOk0oLZ>Pg#!ksj5l|ny@V-}j+ecgR$u0Tx5fMEnLRtvxkCWzw4cwrgr4tg8 z_|*K>Y=+=wl7{iFI!f%y6P&&uBQz7bk1LLRo5+6m49%9&WQnpouyeMNzRQ{|nQ}*B zV0-%t|8+%h$xMUb7w~F5FK(}r#iCckt*qcqfI(Wpo_?j?+D3&{2~P2|ZFjSyfNb0( z9d^-G!5|z70ff@GR`ait)eG%h9^wpT&%Ql@@jYGtQPMJMHYrV}y)fyXKbog3^(V#8 zXlbkB2H!>bkwHv6sTqU8rTXPYVw2ApOiFd~@~rk|)ILGcp-ya>I8Oo|Wi&ZMzG;TY zZp5LPb8Y8D_hZ7tHO=eA#@~IP)G7ccWDdh01LcvW6f#<7B=@y&-CzQzf8F>1sddM( zvsyX&B#gz98ZA@8OwQy4x#zpANK?;meJ|s=} zt_Y-j1}T}`N*ptOmrw3_L8D#~!MhQ_*Kk?w8PoBml?`E_2^d*u-Q@``Rq^nBIwMlnujT?iwDgJ~ZS zf*Wt&Umg2oJrY3WaofR)@p$RiXaJH9Czba26aVNq_2Lab$^{-dgST&*9?K*mVu*@t%h<&`iws-W_pMCXfdVvXSX&bA1z){HcJfWi~pcAr6 z(?B}(TCxG|_jErN`vo`I%~qoUmpsxv^7Ennxi2EUW1lJH`DVq2Ud!EpjdX=xlh=jh z<kMeI7$P;1D;4e-No|`e1sx!(_zg z{%VVMzP#%j1B-0P7juRLBcc9`0lc_lEsJiPk1=izOB0}V;bkKO76sU4O*t0Rk`Zk1=D(h{f2$Pna8Hk}7uCu*jeBw<_E_NJ1HSi}T=-zPy&lL#A5f(~6RQWHhlvUl2Y0LRv% zWS}my3+{)IXZ5&;`hFRptfogOK19*hvv7-;6SJUgp?928uS1_6<=@A($L(4+0>?yl zNXxwQk(xb$#}e?G+$q3z1QMawKDS+xrhGr?%KKUp0?#^TK}l5OV3loaoh8bC!~7gJ zrCM;*l*<20)lMNllVl7`y2_5Cqz7-6uODb`aQSlWN?eDUo@pjccA<(g2omML!q=L$ z+3k9TXhm;r3>tu%N3YCYBcbVYk0%uw6(jc16S6UP+@4IU-)D-$_c9Gkczk*@E3jDs z8Z1ubsa+S7`6yzbo1v)&bTf=d(Xn`D^r;e(!McJjl_c0f@ex~=3qfG)GTGjj#$L8m z1aXq^FXU^BujwrRBqBzhp}GVqn|t@_MVB1;B}Js@ZFsQrc!_{A2HV+WqW_2wqnBw? z!{bmcB&n!^i@k8Aje-;;zrXR5weqJp86WLs-mj||i15KXNXCx#XWwTYANK?HK(U)C zR++r}TJYG(i159**F8)Hz+O~Z>JvLI3jb z;Rt`olE4CH5NtTEEKxHzEI1>ZinHdZew+hm7Ew8y1gdQlo|X=I&Oc&fUEiL} z4y-=NQ8j$7Dc|%@3vI=W5(l063~VKW!L(#d;WC`AN^dw8Sy3RctH6zq#PZ?+e#6;$ zACC1+393pW)T^dUSA)hG)LCUAk)k$x`d-CYMO&pWG({qy=1XU2I}dQhqX0b(Q=LS_ z8|?#~7<@q#PbM#O4kdLdX>-Qv)K<_Di3-g=5ovT1TyFM5&i$a67;`GMGGNaunj9b=iiieH z#U9{>@bBYpG7q(qTu`t*{L2SL;bGv*r&W)W|MI&bp&}6Q?n{~={`&xd2irv|9w^OQ zW&hiJ3$y_ec-#5NPPBg&%0u5bg|$sr16htXpZ@(F#ft7BkN}IzOGR}_E#sF`|2|e= zZdj**w_hKgn)$au5M>0T_4)1i%fF7nJPnxk_PK?$e|xFlAjR<~6E3X3j#fRS?oTV; z`RlZ=1G;D}Mnc7xf14)R_Fz8E)4cZj>n!;Msryl+Z~*uA??3&2hVpN-?f+sZJEK(0 zg@AU`2HfEsZ{`$~LF*?wJj6>55s!g3ZeJI_W#7vJ0PJQ*T}D|6QbLfcZ)Hj;()#|{ zq_XnqsDPu_+^|t1zLCdDN%MLBVpz@9{A_z7FrohN4<>su#Hc*qwpkeL2%4x|!O>gz zVn&B@A6(>JCi9dBYoYcXWytx~3p7M&+@pJ|5C3nXchC}uM!A>04D`)^1kgFSQ(KC! z>#&mv15Mkd4|qvDdwZO>HKDoHhwZVQ6E*>xHB5rK3;z)@Lv7DHpx^x9`@1z>XV9}+ z0Q8wc%+)>EHj&Krg#8~UpTYESbv5oGJ>eu|wtfb*!W;mJ@MlDYTiLD!%H%p=*Qm&f zcgO;G31qn4x4{lcIrya@7(+-QSTwnacL;jMQvzNz>JsAKaLx##i~PQcfCdkxAPO<0~# zX5c@}g^(FsF0>P})&@E)Bs%^|(4guJS*-e$P%-)cu9W!cy?kTTydE#l-{HNVy*MT9 z@*gjNZ%aUk%C9=3XZy(+wCrChSNiVT3iQ)9n|UfG7{~#+w&yiuXsrSd-LRo6_3O0> z$NWW5boj*q`}VAlQfMI6Nh!A+>eI`IoaR5zzIgtUZ?J*D6a-~u#XY~m0C($fFOP3W zBUZwk}+`_~a zO#GHeFf387M+lOT!?-H|*$_}YL64+9=P;mxjN*goe)`@VD($`e)nGJP|C{e8_Uj zNvKRx&OSN%mf^rS@N^TkAD@Z}AYlUk$`~|Ug1xHdl=A-=06d$_mQcO}<>@eY-=H#C zGUT|=KV&}ox!`Q%aI#PMEjz-`pwLpRN8*RzZ@4o6g%B;9eGBDlWX=fH#js6Ac~KZUv>;~-l2$&lpZ*5$^^yaFYew3DRicNe zxcFGb^4)aw?|U!INN;qMEvR9~P+ZY?OFfvSS&&kG(#6NM2TM91TKf-Y2@H9T`oAG~ zU_c(;nf4q*yWp~3b72Hom5GnfvDT62|8kE}75oE;i99Z{o;=B5NPAr1WO6&&KzP)8 zJzY790ow;X)}uSXR$mS>7MMN$4RoFE35e;6zHCD~AS}VvzT?Hj8^*vAs!jzX+)Zgx zU)QbrF|V0g?7o@MajGLAy5InY=A^skBpX7APIPNw$P6`z*fVOnmz z{hCrffHM0QB9?}?Qqjc$YM3$WG@?{7pl6w4F#$aMg=v)h0>0pCu_7SoxpFmaf}&T9 zHu(7$2{v7(Pk;ZF>baky_Wha7}*BfK>RmK9;;%R$8XW-V09s zACpXFZ-CmdGF=f^a@+w+G4veFok>59%E}hwOw07Fk_e{j9wc7x5Kw%z1c!!8ilCfC=ZjIuSbd_Dui;t**id-+S+$+~Y;u5)vc=Tt{~em5O-Bu3Kog9Q zI9bGrBt_^dJDSyZ@6S_bTMN_CV@WmRK29b#-4Hsrtvx^7P7`A)Ns$yCK&PeI%h*lJ zY2pG*=@rYG06H4rL>Jybd0Q4-^9=I|p#7yJ9WGK#I#^v<2sVO-J%qo;V@ZNTNbrw| z$1jq3UNjg5DH0u4=d;4Z2erd4U&upkfaH6`9dHIz271ySz$+u`e-ZZ9QB{5I+CL)F z-JMc`q;yJ4cPO1AUD6<_!lqm4l#uR{+;o?8w}5oRJNNTD&-p#$ea|@G{}`}*T;w{;MO@z>;*|g|7kEJZkGGd2Vf~J`NdLxc3!6Xg6o-W`G zLtFjMzZyY<6!wL*HnHt0&L?N|9cWM28)`Sg!N6gv2eu`3IjL)s68s5hxmP>be^)PK z+FVx+H7=0q^VEQ*l|&AXg;)O%_D+{scQr`8aNEQW;0&xM9@G*_=GKduI;XDM_#9TU z6*x$s=Zfe&^KnSW(^3__ffg-E-}6_N5vd(Z<;~*A=@3Hpk`>~l$!Cgb@`^3=P*q)C zI>$C-RFjpH0rusueb+GcYhHO$(BKzoy-^leYFZxGXa2J{IZEFO!%7TCaJON$50xUD zmoVOTL!v`7uvXqz%=vH?5aO$|mWNkiL_ zSLWgzxZn|$viNP)&1n>n>!!6f*`Z>v95h)0NOZEB>B*d4kjO8SD^9`p+<|gP& zWc_U&ZZbOjq*#4MI4^QH%f0*@I*`x2_SqXpA0a=xB{`}g&@?}%>7&hiHCcf;4BEEQ zuw8mY1F8byLIlolE}KrUyBVZ}*TtqD{2Glmzou(@1hzTO)Usxf1d*q;#G^dSm8$hH zjw&zDvn%54TkRE(>?j|=zmpQ3t}`olzuoaun6TR7kSce7ioJ=qGH)9cLGm5$<<_q- z@1#xT2`PHAB#gD4BUwgUEWf+DtWnr2ai!cqM_l;A)q~xG2$nc<;+=I@xr&O2*C|%L5`G1##O5x9je0OtZJz6yZW(`Vf?V|!172oJW0J97Q)pcQ`g zy>bLYR^JE@7fQSgLcB6^Bu739y>l8Z%VXmA=7G%mhp{L>&HP)KkI_$1X>LBi>{O~( zL&*|v>)*(j^s#1q=$)4|rym@CrHJta8V;6qYrRKWO?1AKsU~hRTKr5{Q8NK#?{i;c zlUy{s7ry>cCJIo6qs24cdgx~Rhu;WpSG)-F(2 zv8%dI#H9z}LO&Tl2_dpa=!pHM%#&zvO&zD~k{kCCcS)gY=G~32o86oFa6I@V`2#AGLWC>e@-!rjFXT#|21`@3je9(<3p>f- z1-#l2`2l7?bt0aJ=`9FvCRK;t4R8N*Z19dY(&kF%Ak$!H=?ar!5406#)7}& z37*Vw!hC_u#v49=>QzCOlhKL^N#KxBlb8|@t`Ss?#>S4;DARdr^FBm8;ez(E*eGKE zs2$}GsCzBG50rc>M=dkF{{^_3ynVMwjYyw$C7KM3zn~1HGfD0kqzDWvVfc{aMxKM; z`g#swiR>oQlWgfii07?qK3iQ@Z^t|Idrk=S#1AOV!$n<@D|a>M{uUzkyTQD=u(+SH z#9N%TNSgHBv}JJ=8>(cmFPy3;ZKM6tS-$}uBc5V8f}#CK!GUNwWbo&r%TB@i4eBC9 zP7Bj`nHkVOKIm@Pxm$>eZegsg?Hth#T)0_^W`y+jemAQ)x-Y1qtu4Vly}VA=1{7pc zg1~D^3xT-PxJ2v7o+&0Xz?Ey?q7ll4-+1Jm=73uDR=k3)xEbATMZI?p^O;J8N{Sx4ro^U zywUnd+%);%yRl^1RQ(+yAcdG2+r`+I8_s%;?5+i5C6c>)^uHMJ^^y#|tP6!#^hHO9 z?zgsQrVet-`-z@fl~0UFP;Qd#V{Zk#=DaJ%?aZXsaoq?i2M$`aoR0RW#8jEpbU%9c z@?Sw{!x2TN73;yViV(`g)kH7ePHylTJ?%yq>r#Shb=S4eyu(L%N_U3Cx}UwKGW^;} z!Ga;2J*PL%!g=?IJE%89aPxFLH51Tt2W*KE-gy?pt>X~;aM;8T8_%;nTceC?ut3B| zb(cV%-?i;g{0)O-B-!6)j2Q1&Jbz7rEU8n*er~HRe8RCXw+o3N#<%7Ct9zdO`P0Cmv6@w{*T21z%lA6oA17?3! z>EoF#*u6!4G(hcSiB-|(iEG*L0D1<>y02w-3VOq8_NVn-hQ{HTON;G0D+P}-Ow2=L zV;v{eWb=qxcBH2U1b^kUtck~Ul;F*#X`^)2qtfUvdZhxJo)tto31}a^qlMJh zBS+}{CC^ZUM5Mr6uI9@Kyl-%^K0%IvjZ7fd=}}F;vG%(3fv+M3zvPw z7sjRUU{Gz)G^~$+Z|h7EIFl>_JraJrW8iofDDAgPzK#fKEPfLs-Rj7#P2|YaUV?Vm zUS)ibZIhfiC@DvRf%jDZ@XgcC7HssMh7YnLi26<{CDc1B<*n%9Ql{dn1leXolH|j2 zqNpWu8Lpb<&xRgt8Q;Zp8+b6Pf^#dH5^%Ve=jJHtVTwqLO0;AlD5^V>nfd$`O4&dh z_npyv7^`J^W_12z00D!1)Q7fMUVSKs5F7qZ5L2aGI3}s~Li`L2o~~5>>SkMX*=5JL zNr>v*segsJ5fTFYb{_xGJ_}e}~S$ zScu#VtX0LGF*vFiqi(C~zYUb;Iq-AiB~%%k7UCh5veag~Wv-Mk)=isL^9}o(7+sbn zMS&yFJ^Tyo8GaaQKjgE0ne8FZjKV(9Ie12Cn*bLUri+#&O1W8mvsawdPcZE+v(;_u z55LVcAROHDYz|JCxfzHyURce`lDB$K*R(sg+RGSqx!h(na_Gy?4E3V2bCgHw-McNE zq{LspZN9!-GXEmsbv?*FL;REOPsQ$Y?1NbD%KR#PhK~+6!Ne@M9V-Nv$76_Ubs`rP zQ7=cMG!Fq|%=QVYF~|VCmb65R2!#8lF9G)pcFfs7%JBDT^e2HPiGG zNHV#Y$`D$sM@^v^0ESF|0^iyF_0;eQ2;Ygu-okTy9oWW)#Wdf6fnSB@_>`DN$9#1f zqNh9c$utb{t12IMFB#X`kKVwJPVf_&_vSKE8sapuBcha2oPnb!AQ6qiY$ZTwh{*3YHe zB*=@(B2C#NcQ*b!ws5bio9A}T8dIwnxjmgg zB+_@?f>%0om({T$p$$@l+m{V^5sTxrp^kO*M!y3En=`{gR z^8mZM7~QCY3?bqt;!$oH)K%+`o>qL9FJs7OMk?XV%vT-Qv{=_%DiA(ZQR(;U<`s9S(1PH-w^6-3J2|GV zH845v^ff)(0i_%50hyWCG}oYHYv=|?*dw!sxk-w?!OXKw!u=tR2-4Tg)9Yg?IqfEh zI-&vsW_^nc+++ zIL2&qE{|9V1vh2!Q-!gT;>A0=X=9D8Xj+v@liHs0Xal*ox%v#L-l7%sq3G=_W;StQ z)z0(HI}sc8qVdD=2V7!dGakVYVAP!t2(b}3hkT54rqvnNUPRk+Lu)-Dfs-Q6ZQhG- z*B=>#Cf|1tj$kVX)22AYpA>2EZh4YQOKTfmcEX3CpF-;R#?_D_?AmIBk zm7j?J!g-2f$-QCkLyIXPvnMFHNUTURNi4NXGBYx1CraIGm8^9N41&}6f`utVDVWw% zB5dAA!D)Y~K5bAJR-p)&W~Hb|FY$JFk>*&&a8pDe(0grqR;RmExdqp6jV{L7SmaIR zF>bP4I99+Tq7og&QWVBIN7x?~-SoV4fZ6xH+!FKBxvP`-B?Lp&JTui0G$#k1W2@4b zmzAbX4Q%QywutZog3~o#Db5Mr@MXCKR#4DgBr?e1m1*+`ZloHaoZKe19A~5*Eqg6G zZN7qSJ@|7TlxYwZlBQ0+Y1U%1geD`=&3KJ|3+yjRBP#{A-z4mHd%x+g(quBX_Q1@> zzyT!vL0^*&FrK4!k4Ram2HM}XMh0o^!!*R4FrS8m{96-a(&Hk;948ED0q3m`vFG1- zunk%Y9eoM^_fs;`g4L=vHW6pA$tzdS#zWwLEth!I1U8V5S%)&^ zaR;SM^TI}Zwd53khU|}m>~}?3l=!FN?+$3qGA)Y1Xknv?rr4eoYardNep|QU8G=XX zvHAhu#%YPKWJ9+AQpfyv2Y1W5=J zPGN!BeUzLCR5hL~@T}Pu)DL-M%axd~Jzl)un>mRGQ&NtKjaPbR5-l2t5ttYM+OGIF zKib=*cLvSUmmZ(2qy;~Kr2mZQ$FCg9_R4fWq^!aboS7!OBymwnS_^2R$F}(HAo@@J zCAUG&9!N?;LfP0L-w0`zXXdZZaM+#r9hvZ5!VwJ?zcOf9(0OiOT1C&aT-Q_Rw5{GG z@%;IQ{%2QqjF0gZN!8Fxi#3;FVQ{)=X_%t;Ty|Z}oMtB14}F%QvV$Yg1ks&1DmDs? zF1C^9l%d(Zin>d=g3v8tnj{qyitYdKYZwL`%AxCt9ubJCgBLHMB#92jE<8-Xb^|INdq>=vZ36!# zn#j|N7J%psJi*-MB126oa;xI}q9WNM2bRCMc|D}|)ak#6B(vOa_~`Nyq;j^4hqXJ^${N}5V%-j-8mCnz!-14PKIB$ubo~UV>tM!<2 zh#T{2YP5YzYiyY7vk#-{S^dI+mGv}hPn6F6mrfc1iS?P64(+hw{MK~GvGUX=r+eO4 z2RDS54S|Gy)t`Ovf?xbUp8t!6lsEvBIM92>p zhu9p~!03y-;k*V-eXTqV}rm{{OKedW)nZxGHEuD=5S6-vRGIJR6h{XPZiVJzazBzil0{h@V2{JALIgm}{A0zJ!0`gP-Au?&kXL z^Lca0oVl_b4t4@(uw*E$9pB+!3zLc|#vZaDy>lsi6GK3OH7?P1WGtye+ka101k&1Iu*Cf_8{a~sig zXFXz3Hdm_CZ}@(Dns11nMIHE6P=O1=f?Og=y#IdnrhJ&f#$c)7Mz?j#JZ+rkq_ZhV zn%wSf+UTpbitbOWmhc<BN}}JUr0hK5|P*hYr%o2Ga+I>hUKdE zvMgdTaxIFF7@1wQnl<*sWJd4N{=*>6r!hQV7KyYDE@#0%LIk_@X~Y43VI``n#}t*d z)*8r3RxS93XKK2+X-@En!o{VH9~z!Nb$e2ET!@9t)NY{D{MEIoC&nV=+=D&jSwAZz z3c01(;H=+(_<{+`CVx^TCV}OL*EzFUsJYfE`0^_Kx^!PcG>65=teCB|=Q(gYFii!` z@}j;v?_Y+%*}|B(b5-jFJgTR82+cBj!#r4jy$~%qhL+GFt%K{bk+e6!8w2>bN3r>~GR= z?AQGe*0xat-Y=Lo4?Y}X;G;!d^g)Tny&R+R3*yMUx{mlG*i-z3C=-kjs0;f#rpl*A3`v@s zSKhoF5BqMx2hT`WU{)Qy1^gVaoGCIXiW^vAo#V zMHHzW-1Tl<0-h0iT$x+2-%w1Qe|8FoaMDpC5P~Ctel4s=^ty?ueX7V+LosRXhZfWG zkxO+V4#bXbtEq1H!#DZXEvDmSmZ|DVR8em-m0*gLTmfI}bMOG=F9EOBJlWKRix^`D zd37w#4L%zEk;G$uC^y8qV_wSsdCGw5Z6DvMD`TaYMOX>YRQ}6R+=6n}^j@Fn}s@<8xFj%U|w)va!g1K;UOmS zJz+3|vTHdBF)$5ZrdofCK@SDdk;;=$p2~0fXwFLYtos(iZKmVM*WmO|9zD%}0$42H zpPd?*^ofymay;!ajrDhV262JH^nx&VFajmk+}+K5a>13 z@NxG>f{;(BY=1&#OUdIKxXVDMUX<&=?ae2T5#Pm^mfORq9+aLq5L_iP2IKM>J^i`c z*tibUjtwrDd6Km$k_9U(nf!JyVm62ZX3|8EZ97H%p=1XA1zz7%PyZW9svFgw`6tlk z!G5R+_C66WbH!;Z)xKg^BT)=eU7Ren{*=>aLvDwY3QH{<0*TF{@#iCMtxKf)4N#$k z9w)JcQe87%->H}EKF%{uO7~PZ51eTJ0JGgGu^2e_2`}-NY5TEDwtNL8PI+e0J!lQ7 z(PT~famUw*ES0tnDIwA3SRYhomnQT@S%AZhw(CMKte3X07lqKWjOPGe-OxK>oggxV zD609OQ#?x0t+E5uq8^`M8I^grtQWgoAxB1jc%YB(A20FyFWw{Yd@rwJ81Of}G6D)3 zH~9MgER9G4f+xit`EK4P?Qh+vM?-1R!k<|RZLR_@JZ6?f$Mu<7+I=XQ!yn`-M2gg4 z+G1toLE92w*!+C8={v^~iJpGk%#a(mWf^b)9A6}K0^{JJljcDMlwQTju!HGA#A%L_ z3T?YdHN@ap1QR6qdo{EM=WL~!Ku6=1H$Bm9;=F`ncDSmFp|s(GNMaLejM>#4f>e1J zrK+lM24GpTpuFY_3uD!%>+UE;4&2k%--7Sh6V$%Cm63bB;w%1M6~hN>GNC=j8+!Ta z9l`?J8R{w`@=Wa5u?&-%mD(~(S|NW@lf3yBbt%a=2~m$MjMt+#xr`##imvFx!0E2e zSsR%#MQL;c*L1L(@&|o%*~HhZC-NPQBq5$gj6}g>tvO%*5IDXhUPe6NLu4jKqCw^x z^nr)?rr-$o+Onow{&xScg_klbFgRZ*D6QQZin}aWWpR3roDva4gbaVqS?YIEBjS=Y zhCEcf%z1E$2gtekNy=S&JlEDc#>t2n7c3UM zG!v3bXO@6IY9J}(`l0Om4HP#GB?w2)dqYm5o9@UoV`@MmAS1p8L?N0$d%iI_VA!_< zF6!PfT!HFyv*s;l$&+hvFwH=NXQ}?=GDYqSr(c=PHZ$p@fK)yBMepu?Z?>am91?2e zSiNGn*UV3wRZTItG;)qGB48(Ya_|kKq3GG8CNenFz(ew{xTz&W5chOMn5veaie`+o z*8+UTVTs5Q3HvV6Vasas@U&{94zLwUGz8d)my!01DsIckU7J$@g z0)VCONJBYJLoQR=yrH(O`IkQ-uOuHg)2eC-^n|&p^k61I!xK6QTWNUR!{(rMD3)Oo zx6FRghquglECc#098dCWCh3?k)UPufGhH z@Yfr*=e@{Uwk_$sHvgsJL{(>o0?Zejq}i@yK7GkfIl2dD(sm6|ZJ$*c&xSF2-lL^V zbq`YLN2nWda3c|QAt3ziRY%v5T>*zUEEo~lKXWO%IjkahrkM=XCS)ZYiENx>xzRCp z_oaReSLYoC3PYTz)&)zkx>4kb;te<{A+Hu5K8u5jwj!D_xSbVV4=N3VyO=ZcjPOxA z^GTe2>du-_eBxjVqk#w3H8n`d)zUioW_r^DyULXc{DdOVOTQu#SExGzD}_~Jw(M(x z;24B+B4|S|IURaYFRH@}{hs=0Q6x8}11betspXoz^RKgd#GAOpm(fIts#z4BAuPU& z(KXW~Sn))de;wor^fnkB6u+H(ZVN3uZwOJ>(2oM!ZjeGig?`}$FBqX2zbZ(*hMCjb zF;4#4N<&;vYCC>3pOz&B&@s5Z=A*Q=U*CF7Evu-H# zK7~HW*%P=RvKT}~WYdOQy>9E|?t&D0wE8IcMmdYd%U=uN^XpUf7v1T%Qk8sLz}j*; zy{~6rb#95gu zT$6ajQZdqxpZ!EoJMur3C!8u(jUvSuan1X7V({5EhJeP29jtLecBAdEpb!&s83E(R z*Z0c2%x;p2yE5fSHDf}?p82L2u78{pxS`F>`0TSAdgsv^rDow_(-(!+C10oBa{(a`Q4{s-uK;jNyUq1gDh@t$iZ{u0+h#zCnsy zqn%+~oi`J|o|%(yK<(FySdu3uYQyJD*cA(%YnXM3p%*AVy9!3~XHQIdJr)9Z6ED8k z`oLJ(&ZiEiBwk9sSpj(5Yw@_o3~=|V9a0OsD&W{54_oGsWdl6z(XtLzP;D63kcfg5 zIcq`+=Yj%CH^gfpS?ks{_jLwlP|yyURHJ=CBwWAwGzK74!keHm8=w5h*6DA&m3-f< zRWQ(UmHCkOJI$Fv_^g+`4OA8yL^hVv_ZA+WQQchEV~$wm_XylT>UWGlYSWtP0U_To z%AniLfYd^UZHe(jhxZ!q0{{R5aNy(FLzChMM8am!(GoA zTrw=WNzR2Td~1A`j3k`RaR~jwsDhWmj404c@!c}Z86z&Ph;6(9%QrKcW$Th`3#qUxfw^r-mgv> zHrL1E52xc+&H1BR&y|y+EHi)9W80PVu6w;$U|KA>$jTyu6<-HIe{;%h5c}KcFnbXy$Xc``1qJ zsm88*);B^BUu=);UM@LfV)#~#&e87+HrDe_NyC@Q{-hS?9<83RmfF4o-}~wWU6JC2 zue`E|oC20zT1=w8YgzD$l_9=1dzj5C7< z%7!NvQe+QZjG|)?+nI0?K<2L~_+*ZDk7+v85@XX)hI_paJ(Q<6C7S&ukj*|dlAnjx z^ty2p>jE_|Vr#8Vm!Rhxo`+oK$|Am#y8qVH!=iA=sPw}%iR9A$G;;`9f=nw`hoBk) z?bZhP*Z}S->dr;$IH8=r;ey@&$;&)omll*ijlJGR|E3A@L*Nc?9gn*Mb60PW=y5ex z3g}k)@pXP6u%oC+)o6S9H4r-@iVlHX?PuRf!O$H6Vg*O7uvrW@P z9Tr&OAEJ7?<-R*QQUxFFDX=xoM~M%45HY> zh>@y)|N4IrB!i}~zN6FX&HMl1hyQV%oN+wN7JhS_`{~U0HGh+NG-A(f8H=e%*79 zesDJt&buk@|8!wE40Y-b+0Xe0E+#;Y0Yn>2Gl~89#}5ZL*%x6eMI(6jQdczQA0B&j z+Yi{|+@pX0@Q;rZ78#I8SQ1754^K%3hQg__zdrq^Z~;qZ*yF4=_GSI2r?dxqoL(s3 z{Qv1f1=!=vd$w=(kB<`uZ%rzU-u+J#(SO`vfSx>fCs<$VQYrjT6U%=L7lpy`z`l416u#n1qQGsRA>9nDIf?+({j0g? zHGU{L3w$;NtcOztV6?g%@B--1U9_=RyaKvP_q8K{{&$H-`TyT}2&6mU54wUlsa${{ zv5QPxtq~(-YiJV2n;nu+kfN zI&tIp8-Kb_6RgA12vBayl<@MaAGpa02(~O%78N(D6H>;%kNPg$zo8D#%Nw&X!fKH` zqq{YtU9byQ0M_`hT%&|>02c7$Cr~|J_3Q!>iUL@o%t2Jb&ro@re1nBX+VK9zf5n9T zS7buQzy0_dG(~NJSN-nnh~EXoMBI=aEahBfp#5am2<~g3*$((;Sp&0wmS<3f^@eqZzbeUl(nR$Vb%d>!6t98jn0LcCi*PE zvi$7oS3|Au&u3JCyvD8&{pUJ%SXU6HAQOep-*z{({7qtc{Z@kx3AC5-Y=8%^UI|1~ zRxq4v-UY1EU8Tu95QL9^WlaCgw+r)(+y$``EbCdLfKpjb{TD>pj2SWj1R|=?Spb2I zgCyWN?uP~E*e7EZJ(TZWcp-Ui!T1b{%|j4$kXqWfS9DTYH7rjzcEI=&v$0PJz#xfYkyFhLdvC*?AyQ9zFC)7jb?*j`3<@#$-3(%M| zz`~g+-T^R4mc9<`(*+G&hIp)NrnFgZl~p|ZHToYe0Rcnbs|C3w6T9e(-US)x5(qHpeCbd+8OH?L(X-((klJ?zNL;7R(OV zf^$F=Wzzj&II7Bun*=-Dwujs}NtG6)wYeVTdw?ml0QV2eIu1mjS&Mal}8#e&# zoc$cc7S6^t0Cpx&YLu+%uoBMq44Z8p1bXIyUKfMhBdn)Y;3dkddld7wf_GQrIe(1H zlA^`!!%je@mrU_`1cN06U0N<1-GcRu;e;VXYV7^AIEnh3m-hXkI z+c!dMvwo6){B6<=z@y*M3uKkb!pB|xy>l1(Rq3T@WQeL|9^EXca|C_kbf2;_coBm< zlzA6jC)T1RF;SpHnxu%oA$raopB})znjI#k)Q=4~;Aa8q_KyjiDQQ*HR(hc(L91my zyOiZ&=Lg~7Wv}~isPsbBdqTEAh4Gb|HcnqU?7wg1j@_F9@;Q7y__ShWa4HZW>Wgzl@V91Fs4qyy&?c} z_j_@qa*^X|MKBH08zzi@!)#!{p3S|!lOm8$$Z*lnaw=My66NKW?e=v5R+NnQE)NL% zP(hmYzTjb3Vb<4hUgyxFSZ4=PNDUq%M^NkiunJz{k z7k=4I9`W1qNQ?s(h1s7EB-Of49OlmjmJymIQRKZ5htKfh#V~1u<;97~(!sPW-ORNE z<3V)oA{_6)hlK|<57z89Ta1?|`D@y({XIeNA$7HD^Oqiv`LRA2B|M2k8;m9lgKDfF z)lHj0tH|o}`uhUppH0Jp`}n|7cqC9q!0w@gnjK-Ca7cU}#PrHB0HTmuBwJKe8l#pPdKe8NLmN-6nPr`0kjjZDV2`7y-B zveoq`H_QqJT27~uk2^V#q3)p^Zv)PE8&MWleOzuH&uMll1gH;J%OwhWI8dMO|+-jJyyz zEB0P{P?R^*s!4me0r59M2dEi&RUP6Pe!zATgY=pbt4ID4a@HnJE_Q>?#Pp&E)-BzQQEHh)^&nslUyj3 zi0KHXcSg%2QY^Tw7JjvP2_Q*BCL?D)j!#FXz1jK#vT1D%=qRdFdXd&%Z3;)fpW(~o z)s50h8%)&vNrzP{jk&YFd7lmI?fbD~M409RoCgqReA1o!nbpGDGp7H>BvxLSZXe6H zb5)N?kZ`xUh>&kCC2Kk>#c#&NI&Bek!X3!&_SLnySaOxngs;O`b)C&&clK5h*3MK` z^ofV7)0f>xb`T!lMZSBS1GIBN!o8=M)Ya#}1gb37dd&=a--Jfy@I`!eeo`jxl6T~i zV}$P9XOcJNBHk*Y)jPR0$@>;CP5AxJOBqehJZe+W4Sc>yf*XCPI&4ixD$3`p?~!3s zkV~ucW3=yKjqvTj3&Pwjw1Eix(OtIEyYal*n62~vR7X^H$E<^NZ7}5y3Hs>MpR7)= zzh%UsPr_KQp5zE`@~%I+yUJmAe=jfXsiYmN(kWyAWNC~uYuHG^avc3=&9JWCIAJ0V zf&`WEym~0>{0#5C`H^31*>u9e4_^W^?8!8?-eoArRFXF0US3v;z@zQQVd{w#MlTu2Cwk$hV#iSgZJ$hylx z6Q*R=#g81m_HpTC1+uQHsdeF_NY3Pf&aUlo@rLMnOfDs-%X@(FFKksw2Q7>$kXDb3!QvQN+i;Fe(jUr+R2ybw3TFYYGlriI-$({c-UyjgEsNdDiaPm9mh1dcZHZO3Nf zkFZev8(wo9Bv*vp=3L#0O2C=XcSWY@ND^M!USbOwb5fT3HhgSG&(O%rqLd#;*!jH6 zUKq!~`%#Xh^2?5~ThbeSWnv?BnF^YLC_C$?E+?QM;3ucLq;EKz6BU35^0)I(qHXcd zmgF+H&E4LJH3(%qvbenrSG#?&)iAAnDnfte#=8{0%~^OhH%;|KY*^eV9xt0)SHM*9 zcrAj(%{Nd?X1R=U^xTvD+*AFr_#2eM zMPmdrFEjapjV;kEXFXE(-)blA!<-UtPV;Mv2I3m*cx~5}%_j5@IwI#zcy=SJ1Z`q^uibKsV8*SWUHR}nD?*V9-Dgxni+_yF+sWhw9ImMR^gEOGI`@^5RW-}7Y-CGVr5LCu&)oZ z7eCuO35DJPgMLdUk#{M6=SP+H%dU=h!0&Vy zLr?qY-tRH(ej>Ki_O2+pzNO~Y)~ofiFQv&V`R{|rb~o)OB3aKVuw5+MI%isMxvGy& z-y65-K~Ih%;vh-7$8WCY8e2W!G>5RXKg_y0`?7uJ_Q=PKA?zU|+n%jBeczicQhryk z_r)qM{Nu_zxsftYZ_*~)p0==4&PcF-fAkb0?rM9h1~o!Z<`Q)F-U69oV|#;WIY!r9ry($gdrDMW(DV4ZZhiT@)cslK6m0ieF+@_yM27#hQ0&ps zk=>&(*_ib>>|=k@SqUjDy&m!6I>2-OE|y9|*vW|Ln<}EIlu7hj+0eu99kZie+GTPp zThI+)pM8n$#QS5B>wRZ`V%i^c`ZHwH!yk@Iu?Lbch8vcEs=?~}=bN)jsCztXFxB&oT%G!I z_Vg7jZ-@MyTATHIEG+!I2LmGkNy4lYb+-(=_^7m`lVh&oyAxYIzXs7GdGVBx9+6P1 zIk*9$j13FjW4mzwB@RdV6NE-mo4b@b;z}l+IkpbnF~4Ig6WjZ$E_um)7Gm+={gRPU z`z0ft{J-2Sf=-5KL^A4XM=s03CdsbSXjd^Kcw5+)8qY*qW7ehrzB^0TcV0nooI6y! zApmEV+j`%8Wmf4_=qS3}`!fzEzuz;a+j=nwoRior!}3|D-!r-ci^Y}SWT7p5)tgR~ z7>aM9*X_vEs1!q|{5R0`KvtrAackr=_#Mih8K;6N z{B!Bz?6mx&$N8@02PjR#-Cn`u2O=Y)b?-F;{YeZDGvlw_Mo9@D?7B(i(kIlq`@>bV z4WMf+xjr64YKKF7MZ`YNaY7n9oBd+L#oHw8s}$~jde`q+Q7ATAT>BX1)2S5MXM_0L&7zL`(A`K-35 z_g2iW!tSE4-tp~CxgS%+BdgMXH5rbRvPwDp;0>T;>mw1R9IQ|F+0~DFNu^=)Ace_J&-TvT);~7ycSlQK5$-1+;7RkMnriQ4?p*Y< zAR-w^WOsiGy&no=99aHA`DZD*2Bb&QS4~b&(24SId+uULx)n1ul2$g;yuf58-hO#L z-$M+YSje^WI|yili-~LG@Ts8L2W^9&{Q}x^#&;Z@aPrn&l=W9PhA0OZF``k^$bUJ! zi>nNthR7&$%<1_;B3v5GY@0C3Jul{vFYRVJjJb+_H!wBD8~?6(HCe_M+r(G41I12` zV+&=SVLI>I+Hkm?Mhj&FV1hzRwnnvcGuMQ)4v@si0 zgw>j_c)J`f6)H6-({j%bJcg|UTBlB8)njjLBG;R&CZ?k;4tTCFBFXJCj`Be<3L7xQ z7!llf%c;LVP{)%S4n`AP;w3`sMvgy?jSub>7)4k@9Pf<(qVkcG_{?@t8z$f`ziY-# zI8`rYigd|b zjLE{PD~{jkVk2*a)iykAxaHn4E>#2^mz&q3faO!Ef9^`TDL*P|=14f8)VAqD%W}cq zx%xDcbMVRYIQmcPp9HLLKQ6R#UqbtM=>d|zBTfYg!ZM?Dr5$WnEbMI{o`I0k&?kJ( ztNgk9|J7qnnjiX4TBF$3BQ}*j)E+HuIWS9Ve^1>$qnh6C@;Y>HUZ((~=GQQjyGUx$ zPbvt7%mHy3H_6nJ>Gq)Sg=w!TPuL=UMj-(yap~Xu8rLq=^lFdwi63~c{Yxjq$PO{e ze4C6~3lP?k`cazWj~Qgz`(isLot8K>w`&dvGB>J}@0wiZO4aG4$mBI%h!iLO6|cFw zA&{~=**6l84CH1sd_qO$tH>!Y8F_`o#AfAB`HtBN_O z>^|&N=P-%0~$K&|7(1bF$z0BW2)HefIF0 ztZu>TPPm%Wyqm>1;=7SRze~mkf1mm9rg-C&jm(d~rC@i*TI&9sM zp|Sj^&~{bBY0k-(f2iI=qA~B$)lYw++=VmTUq9SFQZ}dn)O`;2=XQk1|23HW8vk=J zZPm}7c?H1|c|H(*+d95(8cR1p=73i5rl|Cz7D&2cRJ5}29M)E__T;CsrBKZajmk}E z2k;NJ4Bm47dkXR_qv;nzXoT=d*?t83BJ-Llf5*D~+wk*WONQb`!6sBfQXeAiCYu*O z=_Ka5YS^y(+-04T6r7p%o@D*qNp+F9lWMA9kd`?l&#}E^AIw*k#c}A(rp@j8+!cb#64C6zW^G8w<4y9NGGSS)1xVn@5|UFqq9s69N1THZ9BDZSlTm2DnB1xFw?1FSc> zIF+Z}m%%a5j;yOn-!)!Uff7+raClWG>Se!fI7VWWU|we4v0I=9D3^EzG8Ol`@MV=o z>UDgkTg-j+uk#{Fau_yiME(zZZyi-twDt>&pwb;%LPBW-Hb^%}ODiE=0@4lA-Jx`Y zfQTS1<)&fNA>Aq6-EikV?>+b4@s9J|bH4xY7<(}IW3yOmt~sAMpXd37Yc8Y$hBR8z zTQD}m|E&)lrYv`ekyJ8@Dxw_Z8Y2k%7 zuuWBW&x<;P#*_*4ddOvd(KMLlaTs6uSzY1FE((#KyWWyKrtj&w=R+k9$0^!`x598V z$jAs82s~Jq-+gwmnbDsx)0)=0JehB8mmduCQF$Ctlf^`6P79b8dII|K}8n19L_2A0JU>vo52oiJ9J}KR=Cw zRYX9>=@=!5qkf+4FnUJ&D`L(-1e)vV*UGsZJ+0~dM|C((+?&vQD-q_j9oqV; zmP)2Op;YIha^>{*GXszcMNL0Ta?~Pb3+ZeeTEDED4EME^u^_KBKN4fb8}umdqE5&4 zKWE*zh$C0+O|Zf%&%TJNirJd;1v}WKbsQ# ztOTb*D2+HnB3P_MHWf&Se7<}DAbP9_mmdM02l*rt#6HMQR{Md(_;W_w zlPu$uzQ64r4=+8AGHU`?-Y_bLbf@9G8(*7~5iAYt&)(}?6935pjy+O!HQeDgnj^_y zL`e7I4Th=){9Vsgd>lFUZc^>b6_GZh9<>#BeI@tOoZuYbeLh>FG0+=9uYpYLe-0Zv zuP%z-u>x_zTzxODzDmSv&Z-R_BL0_lQ}2HrSOdkYD-M`{0_piZ{h_@KiNp%( z2%;ePx4n$m1L(fi@W^@EVoM~wQ=Hfw8M0?6H?wxH^vocCj79HEm9SS{Kz;cS9yczb z0Po^VvKqy0Z?nsK?mGF?xZ>i1y=AL+^z(J$LjiQcs;~FF!t#FEvpWB_=`&g%TX#Ym z9ZTUg1LqEv{|W-4YFd%g8kb$Dg=LP3oF3=u6JxzDFyWBri`Tj$QnDHSaEfw=*m6oQ zZZC<9$y#>d#=Yd#>e7E6HfBGL4#O6}8YU9ALldKnf2}zd+-$zb>Gjck|6>=ceg!Kcd#Y@gNBX%^(|p zI`z8@f)#d?41kS)_pGd#I}Gdl)2a?O<#kQU>cS#WN8U9o%5 zLp{AUzeg=N&nlXt>^T#f@*95^mBWuA;MD!wa23uJrxnj?<=JPl$J|%|7C*z_3IoWA zWrK~S6`@I$PE8=u}~0y{2Q7)E=YJ#&CA0@yF}amLh90>qVF8mP(W; zuu6rw9u4wOKgo?H;)SlArDUd^1Tqu0{(Ce)vUF4(P(OC2?-i)w9#DE(A@#=x91XP4 z*h$X$#XV{&YK+Mp`=E)QAG5&w?rM07ypilBL-Cu_= znV|C1@z~WTBE?@qgGh>}92}1(YQWP5RZ|G2P)9uJD>b3nXJ~79GQu9rqVK9ni{E>> zo~4g%@$G;%%Xm*rFF>CzPgct>(*G9 z^+oKVZX{QLG;82`+r_mPEUB-$@+&Q^TZblT=2-3SDunP^mC(%}eTDba6^A)2bp;5= z3KjL4f2Ol0#BlXz<#^Gz#ZFbXhbFDjy5Yerdvi6$nAK{_87g+TN6Q-MS_}qFS^I)G zjW16Ik#C9MS@j~!Wb@RfoAvzP+ucbgY;B#a-*2u^Xq&e=-jZ53M1>e2Dai^vbe4O# z5OQQQUW>Di9x3TnYc5h^CbF9|Hu&}}#{R*whNFG43HLw3MCX4G6Mxn<=N3~6*pB>- z=9)ilp;M zVe{y7Pi~iZ^LZo^eV#nZ;G{N4_RVE58(xdjv{o07I{wz+7Y;KTGA#2`jUM7=SvZCh z4Y$MV3_*!6iE)~bjagT@wRzt$>w32NLDH>v>kQpn%JuX8#UnYaNb~W@oT28pB-61% z5q-mkhv*UylZLaqnU+-rir ztXP#?{kafGUSk2FJ=~bjyP9h}Z!!{y6dqrq zK{KJn-}3cM7w!IjmR2|I3c+Rv%>1H{$rt@W7I*z z8I$PKiEm2FbvZF#V>)tLGC6x_^kjdJVri8hMOC~O>ugRQ1c-85U!0stm>Uio@9;YN z<81HGiDP5liK=d?Iwi1}%!yc2tev$X4i z{7Oh0C3P!u&^oSSykOPipM{q8>X^px@&=*(NGw(W9*U(Nh#JY;mG5Uh=9|bAMqS)l zzm{ul;*|cTw|L`+-7Ju*tbXYBc>NW#gz!_0l?}Nx#ikuB`p6Lblf*nkO^=)l(WB?- z*VA4P5luRT-^`93^%kBA6%uLbJ^AyFC!N#8U5Im&1a=NYJm__b1f4cTpL zi8puMkg&@%YqUB2{A$GouH~0bB-D){rG14>O;^qVwMDjh@Ph>58f0B>zt2MCFKN;I z5!Uz8#y;Y!XN~INcWI%IRJ)QSx^Lt4)C@s9(h@w$!J(KOj8Vl?i%)oI1&o7NA{2P$ zUU}Yw{;~27z-lddZ0YkcvI3bjB^LgYSI>y@Iq0p}7T&#!Wu-NJ#N2~qH`(DezpZzU zzQAX;8I{g&@~8TUnkf@d5JGlM=_yOzkqQv!8wU*T0|n8ZuR2=z!@v63!g|ZpBnzEz zI`i)mdT%3C3&~s(vN z`PBPowoM}PfS*@>>n{c(oDLv&%TYFWzgQo?kB|#*;`21S7JEYR*3D(KaF|mh-8DCg z8-4ReC;=X|%s1$+r~-ec>mL37C|yI`~Gx9l2P zE0>mzgYimyQ>dK^G)?(S#cnq%lv({`rwL}U4$pNjpT%!wZZ!_ECWH^g=A}mx<>S3d zfxQ(ES?}H=+mvCe%Y0zn3<$8~-UqXZ@b+YdD;vqiW>+XtA9tZ|$Wt^Noa^6uFDuN}ap!Eo9{0B9)O zT8X7;(4SDB(CWDb%de=H1IQDhsT)sI(N!Zb^CZqk5;(!gr!JTlX48idAV8%3)yA_= ztX&I}*$~)C$(nbhC=tWZZ)a}UMv>i|wXNvPEd0`QkGEsM60KjM|7DBNRFqp!u;YU} zAauT0@-65g8poJiXuH^64zmS36gBDRGnL(reGm>}I1$1mzKxfAe%*&N#z43Q@;s6I znEuKS+!l-Q+a|MEQMHUiEmn%Y@DRbj)P`zx@3|uHZ|^St*E$VGbi#K}?}7gO_`uwr zU5Ef-zJDV?3UVE#jAfSNG9=g=&dc*npgHB z{<`vO>C;2&%ueA<`qQQ#_Q!9sd)8SQvSyuq)k6HQsQ)-7WBw%+B21OHbZJn{K0Kf-87 zDA!-66!(-@(Pz4S6`5;xwf#k#MW-;o;bJ3vNOk0|@dkDk?gZ0;dDi~$yey-)KDM0T z10vfbL9=ZD;XtbsO>L6le!qIIO0=`l{9rNeVXJ*&LH*#x4d_(5zxZpmb?MJm^qdjr zXqKCtVb$_n=67pPF#qAwXI^Y22LKM+sEjn#NlJsLc)JuU$!nX<7~3jlMen=WS8y=D z^Fe#p89r|jW@dc8B2ws)f5;SQXwhRfv+gmu{)dzFn?m)9mOb(L+$`rVHZxS!Xwz}O zeG#JlLu9bX^kBKwv3COLI{Jy08b$V;AKocEx>89!sp6xTX8G~xsa9L&@6r`Hv!JcU zm4gWD60uujE8G~ei+)W*LT~Gmrne)LmEE4y^i_@!%NfuEpZ0g7{rT%@pKj{B;jAaU z926aCO}7|R35ey-M2-Lg+Tm%aUL1esU9!FE$l>8-;>v#YpfM!^61b|_mIxPeeQL`R zC7_q;YWV3{E9y5B&D$r&=x`E7A)e0yLjj$~Qfo9Pj?OAns1cTUW>z;w6Dgw}$sW3& zSNlKaR|pkTF$ioebJx@<9B9_f?b@uz_4T=`zb;$$6I?j?#)IYFaGbHPGrJ+hL+bIY zw?$P2TQdUTRZzUKxn(MNhABnD@C2j9lH7J)U~W^f!Q(U$$3}&rR8r%zqq>vT~!vzP3&@YSicjFe7>f3J@GzMhXP!<%WGHs#%q4L+*7?6=iR6P^!a^&QiVd}`6VNia&;8Gm&tgg6h> zWH}6Jy%t4*OJ^6|!_#JBDa2t%_=Mr*_gt6EKcdG*4hhX-2)ln>}VH`YMB)p9%-|4^e%3H5# zs2=`{54-(C4ig3PI@Vfp)J6Bzg*a8#DV9ATXd3j6u()hlh?)*k$L=Bv?htX45~9{e zZnay*|KdX6>g;HUYx!4{l(V44AC|X89F){9hwt3(1N5}yMK|5jb;ZN4X++?PevJ6a zX@Zg$^;M{$zW?HbDJvRWOx?1{j`-*Q=HqvWA4_cs^K^Fkci%+(V`{XnSgsem6mUyemF=_18X$0qoSf?TcHKd=XSZZ^PhZK0`anC?D*dxruV=37{E}NB*p>mLtvhH^FO*ejUBQd zFk)GYVgb1dhA;-{@*Q!{=ZTFKPJro&xw(YCNYj+of&~1)z0-i+@+EAQV3w%!}afu zk~)A%2Z9V&4bjdF<~u~#lP@EPP~4LS0Y-q!^>mGp)3pzgvfdnoh_KkWRHhNdc+9SR zVD&l9%t4SmmWXXUSIBtgdl0PmWx#R4+`L=&xhtpzr<@75zC8Tcni%hs+>R(4r`%<& z!B6V;z1s5*s!7jgoEg#(ezYB}t>X3z*Yh>CGjn^~)s~y>w|JW2yLLWKQX(VW^jSJL zfUm{EJ!{u?z_a!Aon9_JxnN_j+s6Z$TYSGuy6k$9h1G)Dh~_+0_L6(x=A>4Y;-SE# zR3DAn$LXhXy})`E>%Nu;i||G5BlZKCntn$IiM|f7XQAytxVS8rxHecG2GR=}=DiuH zX!yDZ{|4~Whax~VG@L+@H|GN3t8jB#>Vp;Jdvy9a#?$1mEJUc7b&z1;9O3Z9i^$na zJy)4K09US(sR_re896+PXP)=(<#vcW zFV@{L>%Lp3KzMG9aD8pZOB)qWz&|6e%>eAUZx@$?K6=GKh^E6a^Eh33{W1RN4KMJ& z3P;#xwaEC2IgCes;nJeCAQ@yj@2|WHpuEYhMfCM9pIr@w5CevxY&np{2yPmcl(`=* z5a!z92SB#)Cx91GXread0$SRHh)Qbh83H2@TYSWpEGK*g+dyQw%VId6lzCTyV2+v< z;P*ro+ylx#UQa?GVv0n78Sv~^XLfsyixsdOf0+r)7skKovkIi(BH$}xe}>jF-C5a- zspQKkY}&HrNw0)8{ZkUkA`L*b1w0t|eEwSnBSd0l9^B zBH*fa9RbW3JUS4+{o~jS@fq^|gU!Xb4dEGP^8)jZZ9wGyX7NRfW@Lr;jxi+}xz?S> zuQl67-RGzG*FN{Qbk`R^@3=NQ!shDc+H8@gJk#d6?04`M ziQ(u6#xK&1lF6umBb0nOoYc|;2SJ3SfIl16UK1Ymf~!<%Dcss_19>a?#x_;yo?1R+!2dWRr8 z#&04Do&Nh@x^d0b))M2Yy&jrItXRj*9!9&e09H=HHjD1xzXu>S2u$?eY_>e^dvo!V^nATRmpc4e;cJ+*$2NIb;mYoC`SQW7S z*l{Jj1L;2#q(!C*&OdeC1iPYZ0Xa#`L1^uuEC-zJWxTsJ-6@edf0g0 zz&1RJPtTnn!O z-?szQvf{ZOr#lVHb^yL$hJYXgt@*+}sB@T%Tu?A%KyZ@PaIUX7K3#2}9A+f+Gc?c0 z?d16lUn43a=;ROw|3gb)cgLo#WtyOaZ{6hgKngj22!R`9rYlqEeP?t}m@{4q zqp?A)2EcZ#n9j?MKxpOZAgbXxDCJZ<%#U@(WjKdWDXJ~^h)WuFOalIsy1|@Oj%6_3 zQCP(;h`}eKiDq?1?4Q5-X#*|R2zszGkS{YQD8Y;pY`6{F*Iu|%|FedaosNiJ8ZwCi z)Da09#nt6RnzMp~OqtS8Xj)6&o+H<4ahqtFw;-%rmKqcNhixDd#E&kT zfVoKr-~>Z@@M#@c`xxLz^( zgl)Twf@J5yDPS56Ik~nHi9MFd_^><3cGhJOJXNN7#!X@_v_n~mV6*x&eM1pHMT!XvX)v* zBMMtVaWsw0()!|E(8JTf0Jo7rKOy@Mojn00+Z>|EmK zBP8YTdFdfJn*ICNOg)4J&=EHFees&+kAhdc@MZOC1_qU8`78RNUwyyN&|=*Cj9V)e zE;(%a(QX6#8yqYWXUEp%Tz zEo}?m3I){cYKOqD`$9)u&nw|&OvqO>4~^-XVW_v*cnYwt*j0BgO@`AOM$#UCCu za~Kl9UM=S*yOFGTwRk1tD*DR)=!U6d_TIz1QolxWgi2t*LDPm5^Gzy>X)F$2IXR{l zo5xhIcrE@Ud)K#2!|yGaN_UU?R+w zB}eDyY!z_Ap7z`|-ckrPU(1C+QM-NO(q>=Gq6GgyxXLXFJl+B^#w%Oj#1wS}ms8RB z!6E$1B@J|L7N0-fY#!wa`W4#?(EaCoo3ugc;vE@XEGWh9vIsL=9pEPSfhtZXQ|N|G z2*gTpkovKj3ctw`-LtX^cNJ0`{^EjCA0L(RXWQOr1soarZ2Ng5za*bed$ec<<OGdnNJhb zZY1m(VqW(&SI~E@wI!^nGIgt--eMkRhgJ@um4EIXXt@}U)CUF>S7sOqAK~sE(ycJk ziW|2_^c3mNZ(v8Ar8{M}o_B!x^!+P(^W)VRlh_B2gz;UdQuK6RAQSz@!lqqEx0W`- zet(w0UiFc8AE?VNEdyyn{fo@2kP*ri)_DBl9a>mUe}YMu9A4U>wVNtu$ZOQ|O%1}y z*)Lw;Ze73ZYaX3Ozk4#?Xdjs;gv80q-f4JB=t#0{<^Zq)PZh}Iw5QdRMUL*Q1KGPF zEMeq$5&Jl4Yk7y|S95Q&sAvbr3d8S#%u2djq2`WspGhJNr;~%Ul{iV6uIYf zpmiMgC_Q{SO8R_%b!Q)XnsHu>o7#;r(XtT`@{5E}&xxNqx!Mb~Y&PA?1p=%AcI5a5 zc<7cgaRaf9>>JY=kO2`8wtCt=*DI9&bgzTk+M>>?s>m>|PJt z+zw@FWSx5=+KW)C44gN;F_9UGSL3SNpP zl}t4sJa`~4q2U85lP+|~+8w1e-5}NJLo=bR2>q%(>d2fvW_!C7bYQ)xd1@~VpZ$p3?KkMY4(S(tMmd-~xK`4Kw0xjq|LJy5>c18Y zdr8$L)gaqoc^(Cb-)&Df9skTw@dY$atf;U?wkZq=Xw}a1uR`k;#NT)Fa|y%=u6G>s?QTG2%BHA*_>cz5hnEWndl?Bo*4Me11-?k5&W#C_)d@XTFIrBCk3{jfT} z`aI*>!uH>oC4pd0!qfTW*ZDaPRBvt6CWEk6ML11V%9VSlNpzi5F8*enr!5_fex9fUAUU$gwJ926FKV*tp= zTkPwnSivGBmtrlSf2>r8_Z6HkK(AEcsPpb1m9qIY!(8$sY7b+=bn(M8ZySxHA`RXz zzV#DT07?62yeucX{9Q2Mi2&HglYE=4uYd7qVLmo8Jbeu+--s2%&w25}J0bunPCouR zR73FcT=QZ@bpfiW^_}I6^Qpy=L1$SPr}Z#*Q{H-p%^NGnY{id4E31BjwZYtK&nl|=*rNLS3?t;>G<%+c z`(a=55;HmBZA292&Ie3C8i9!r_Dh3~mjNw}66mj|wr>kGhK;*&*`zez$^Dj~ENgj8 z#zLy;+K+n6Z_JuEfEM6F5?ltSMhzjls&nTyaeq=AkJnS#9cAg2He&K-^F6mbtx(ZC zLK295yvpMvpo!XaK>fTEE+rNFMk-1ggh6ra2hlTLBxtnZ4XcED^p>X$eLLrBL|&wF z@5`SDjm!ll~ikk#nUz-@*tC$T{Zrp=?vHbf!$=~G%0p$|) z`}&T->?LjKQ|c|b1*Gr9diiK+*fEjTYWu8V_F}=shB5-&*urW>=;RnE2Lq(~LszaI= zR*@Rh4<4}55x#*&68hR(9|!md&Zg*KGpj>)++cjDue(Q}Hi6~F8e^t&>#!k(KaPC{rd;po8?ox~cq2Xzyk48v7{hMc* z2dKfCECccHth$W+#r67*Gb;?<;p%`K#rAEcP&{&^jdPLq6s;W&)Ib?MO}mJ#Rx7-hW?2Y z%hI}C33i_cw7q6wa^A3>_`$)7LSYW{|-+lz(b7Mw1^tImSbKW9k8Bi zIgDiYGShly{mHQPF&T5+VYOkbxSQ%gOwP!F(tHTi#6JBe_mThwNdT3g;^*27G_q4R zTu2RdM1=CX>TYe8V0kNE0LJbEWJ;JByDtsjs`Dh@%Ka5UpBbA%WlJQ7@$7G z-J8V=8}gxvEL2`L9I6nG+dY{op7u{+M4FnHGB?vgGGm5pDw#Vv1{gQd40=eU8QDDp zMrxutFGatb@rJm!K%dZ~JIXZ|K3rwVv3S~Mu<R?l- z!PrIcyjXMa=X(=Re+|0%8-^J-B@)9<*w>et((MyN`&0DImEi<5yU%aEVWbfP2CU+w zSM1X+b#{3F)B;dk8#%f6q_5)h5<%Aq>_XzTlDVRO;IZ~s=A&eyev3VjRzi6c+dsh< z%+V@~AHa2N&lX>>Np6nQS2$jq~X(4cu-61?mUy`MvZOh{uIuh)o}lm+N&MA2S!+{&w@yT1$V-8>4?2 zWD{Wj{WZ#mAm1p7Uo@8La(Q0D5Q636(Bw8|4Cot5rI`<(tY}~jM1Dm8_gKl3VPjv}YWgN~R@ZDn^0rc1e&z;0ax%TG>T(4y>un~`XOl94U^T|#7ZeW`eg`39gu5J^*b{_^h;Kl4ljHO4-L zCB5opoA2NAGzWTaMYr|Tw{Bu&C!BVb3B5X{*6z^HToOE;f27MRlDN6|vTm5{t{AU8 z*zEZR(x@YBFJ>M0)4Z>OxVdfcZ@hMp7D1ZC}Ssl9#H3w!Vb+mJ9e@77;Xnj`M79G+~KXHSaGdOPL*b3;d0hQ2pJ;y(MI7!n|9F_A8fWkx17&hzHHH z7G7WWI*%*H4lZBVh3pnUK!bm{o zewWc%B5;e7-(-Gtxxq0uqCOwI)V7%Vu#7sfFvw&DL{Z2*a7sjkqo1|ZhPochz$LN^ z`&Nps%y{<*WITpt>4P?SJWGq7Z`iTRlvYoFe39@1^;H5EwlA-L3M;*Rcv-h}^pOey zEP%c~V0Lo2G_{$%+fAS5VWoKUS`~*$zg!q*63z%&!5Tr|VQ3fphh3x;uejS`sk@Pt zIrg_asE4Fg*Kz`s_b(O8z(E9b$dX;EWAo##Lr&CYy=+8F6!+)5m-!1)gMOUPIKr2F zz#8C04q+rsqNLW?$z zG&0#QQ0m)&HfLidPj=axx#AQ(q;MV*Jzd6b#=oi-?hMK{$q6J9Z6H5Ae>=QWt#V{J zFQ1<7YOqvCfuXlsE%eT1KDL92fwRO2VmxGY!^@WQaQ9Z4o2uL)T*G84@433$)y>o| zuP4h6ADcA{rSau|)-3>?m0_ZP$d(*>k(=FNA4XIg`LeGP z0~>N~m)sq8TIemZB1O)wS%}+tPe&p3Xj=90oKBKu74*}UW#6)^2=xJT-!g2uBP%?3 zabyYR51kiayu7_?O!lShY?qVYB8C3Nqx|Z0sP$N){e3N?{CdL5YqRVq=x6F)nZA4a z%$iY>$dM|azF(@=PhG>!n2pu;&6m&?eFM)~9Fu1GJU@BaN@vU3@2#t(VmGJcjYAGE*8%cP359E$0NSXVt3N$UyT!07w6B%c?QesR#NH~3fMK#iO4FgmC_ zlfgBz8qqxjC=15nl?KHO>Z1OHmq*Ad8GWK1_8&?^Pa}o-u@s4{_1*;p6y`@zNWF~V zy_noORypYh#h`K+R(=}Vf$v7pfleUNi`SSO;XwmgHQTPtZ=xgU65@tQW_uBm6W)?|UUjKI1xhpqWF8i zv%~2;?2KP{rKe(tviaLLU*ARAwu78JRgfhBah3;y?GL%`24vW zSX*iu^w>lv)i0h0ei-1JQ z(+Hoo&Dy0Ivt+Vbc$tl_P4X&~7+pAF8mNeOCWOCovo`KJkP`JN_8J6&3U5|nsgCG; z0tH?icGW%V1v2!vwMUjkUk=z`vC;Ns-JfLc9)9HKbXYa?LPr2E$jaX6FfPd9;JkI1cKI8_EH72>y%4XQN*0c!2vav4Ig8fu_BC1<3 z|9t2=di=vJU((Z-n>q`STJ-<)2)QF0c*d*=X`78k#4PcUtE}ZXiQa6Pd0!uR9R83w zd+zMKdPP`yzTknOogue695Vc(nm?S__9B?l7%da+#aa@7LA9<@wycE(!Qi*<)F9FD zwxDa5=ccpZgZG?M9K-q=$Z82B}S8nd=iNo z6Rbtic-^6LAN=ClI;G>>N^=@gy%pk*G|K!MOP~`egI0aPEUkp z3Gw{%MvX|BJX6(=ta@km-(6}8*aFqdReE;6^q7? ze;|k{yx&%nQ}8CpV5ol%H>5@Qi(3tD2AO7Sg*mh&%cIyQmic*S|H7H;wk@xn&=%iz zOk&ACWqIl9ZAH0VS+znB4JCJ)k z{V;;O^^{#xal2vJdfzp3#QEhhkfPEi{;-ptC|qnUT#!;yXa)0QI#2)Xqm9=;OR;W1 zV4K}qo_@zg>n)k&)2Yt%6Evpt&?W@|Z&fyK|NIl^5|bMwx0OF=Me*XTml?sDF^UDD za-c)}`PZHUMfHez!-?#Eld&HS4qe+n70#_KZt3e8zotM3*G?tcy4pcIRf=OUC;mLy zbRHbg5|L)0UOrc}_ReerXA`ElvdLuPJQpN_sbBGEY2?b|K~Yt>@x|juMvS?!#{`;` z!JW+K({4v`G3nw1M>5}~)nBv($jQ<7o5{g5^hIzB)k-FR6jcYaBx+52{Ca~Fm6PBR zEz}BjdE1N4E$He(TL(Hc-i#4yOLOKfk;o`L8U6L#5XWP(`C|^Z;WaNe@x8DB;d=!( zug;=AQiZMW?o?OZE*cW!W#{7s)EJTv1JnaA{sIckZR`1{h`xvlOPRx;kh?><4M1|# z?_^J0ZPlnC9Tq3G|l=-*LDb`dg8Kwsz+=;C&gEW^1Sa$qsBmPYM?g<;?bDQ8ipzO3k!!OpA*#I zGrnjGNU4q$v$P=>7jL@iC8Oz=iLL)I$KAuS4%Ky?eQ|av^TgSUYoE&al+S1-UFYsb zQ)t3=JzBS9CM`aZc-8MyuO{QE=XLpapO|kxF7*c#V|azVzv5k-9#&cN(m;cMYDaY> zz>xLSx}Z%)1OZ)uTO|1DL(=8bRKoS*;5G~2g!8~pePK9kou2)rvHcV)H8X-1gti}i zguMm{%$bfay1oe`dEpIwd5x>-Joav-t!RPRrCu@QEv?=+{nQ?K(j&clK{iI{txECr z;pdNA^8zA0g0u;g)&zdS0H!-e-@lnrG!VNQwfCC1ZQiJqy^MTa&ukQ~XSi-496c9s zSRncMK=F&-{cN7dFGuWaJ}3xd)3!jM`_cR$;~{|!-F!7ZZGwx=j52?&m7%!btA^KO z9?qS5oYqWIJ+YFH%1!^)MUN2L{k10tv*l5RAnuYQf?Ktqb`NF}-8}+4Q~PQkuk8zY zga%3#mx(?0akxacub6)eX6p~ZG<|Ar_3G%n z{}PnH_qommFNqwiHB9WWnj)Xc^I^SXO@FrO)kSkZ{~6>fKt4e~m<3j-{GCRD&73}J z3~6gf%Piv;!^u;rU<#K>zq}0ar;$N~MVxyhiaCMOi0%R1neBV@t#M&5`^ZUrtI(54WGjGXcp9FT>KK&{n@j zY_s@D(Jrq`;m_5h9v#zePiWbm8P=UQT+}bO!CAS0qJyQ?(ig*Id|Q;$A}>`7EoihK zdDsF)s$8jcKXU~{LaR$2o;E+#e{p$7*2$%<`%)qi@BMUEBaT5nmRv#l1@u#i@KpI) za_7EbB(2Lcue&7CxaHZkffHv0_Nx*uK{j!H$>c>_RI!N>_SguU>b~LXbm3-lFr+zV zWYbjyaIgg%!+A0<>_hW8^Bt^SmEU?^oiiT^Yivt+l6tYV)($&y72^I7KC!X6X`5V7 zUqT2Vm&}`D=x0if_$-aS&db!K7uPCxAoYsI?ER5YDbqYn{uB6o3|nLs@JkRoK52bG zv%zf@6rV9C0C&~>Y`T7s6{+DeYv0LUZSwo&tHVz^>4oNMowOh~u}6k2W*+Fs^zgR- z4%7VUtAfG@jI08ZoGzeCKEjR91Nwj-DC1^=ydfTXXX{PJv*I?;Zly}#UB`UepyRd_ zgv{&)s*#554|-!@(BVXLTrSo*oqvZ0Y=dZ zN5pnHilDL7(h(tNK$nS$iAPt#D@Ro~L>nVtK#ixPN#%6&mqEpmWi47c&3Cfixn4g`+&BZ*pA(V>;{$KlBjV36H zyhiAoXOoCp1YH%1%DY~AEpldoWo)0W!7iv)YIL~b$f#k=-**4ar%)~ZdT5rnJ-?Oj znj7E=ayPNMa&T;I`MxzSZdfjX^}_N~_?jA#jJ)%_1TRH6TzYHl;l?XSO&X`Y?)N_z z)4jW{YEwfYM!41MLGRXy`5vo7en5DpuT5^`yp6llQypHbI@(dTbd-LtUIBQD5jsEw zsFKgl+Mak?0V@gIc(D-#yNLAxWNQQ>7Q>1nlKPJ%{Maypx5XJ&86VVPswknie2Kd6 zYvPB_Z$@llmtokxcH!JvqU`b=<3%kX)Nc=S?H$K);Ul9+a_uvT;`JKLMgqRL|0_Ma zKoloW1`bqc2)nGPy)S+&Z=6%8(a} zFLG5*UgpDZdfgAU=s@1(E54v~9?`PaNV!ykc=ESfvVW5E*R-ghC@) zpz~#JbHi)e0$mT>Cv;tDEi5h6G~Q=fMWQu?*v|==c@eucF%B5gL4G|oo?m5kWd!2x zErinK+s3LZF9g)H0#H)p6Z6i(*$L4hnaS5W00Z|w+B@s0s=jvLOGty#Al)E{gl5ll<1C+WFGB-^H{jW~&hnxeEH$qcpDs$D zvtU6&PGd-66- z*#DW&wfd((I-5lZTUsO4jBxt!l$p&Vg;s73)Ae>%S`E^NfjW{zkDU(lqr>2G@C=Mp1feUMs34a; zA=$!+R@Ir>zW>y&%CP&;&vYI55fG2?0t*2;c03`lk069b%r0#U*7j>&*S@ZtxU>55 z5+>9^aAzV!@CqrQ!`4}!wmOfzJ^>08if0#Ox+HC6<@_|K0&COelgrbkv2Hv@|UKb36cEZ-dbw{gh=PGVaSDL{ZmHg8=`i!E$M5-9#u3bcsN^OEZ@qG6eOL) z>V_arIeQl}yIJ3=iQUJ9l1&mnIr~A<v5o|%bu_<%m{naJs*3Yab)XW8foGy*nUe~{c%-i+1%lb$m{gbWy%1; ztH&ejpK1xiY$oYeSD_CN>;;Y z4o>HP_+Z5y=aYbVK>OX?;}gi0Y-cfoNU)Fl>5L!$2;fJ#{!NqEU^WhHDg%`prn@8| z+;)Mj5n~e(pGWUI#MPDIM2?UJfAD%LLJ+*T*PNF*czG+o$YRq?FA=Bh3_S>WEx-Bh zvh`Pkd}pFfarmt&mhkffdpR~mR^cCa`Z;oJ^3IE}^M_K|d-D)iysR2UH@{z>+h{vA zXmMa<{H}H-Y)lRILlj6Drv9Yw-ZZd?V$nxW3C&mKiI5lRf8ZuJ^{Pedlx#3vZpAqD zCrz6@1*@Q7VwwPs<%S|2gU{H}J##{N{Slv|a3qyExK^p4UZ=R`Um4`gO4YlhIK1)T z8V1k1j+0%CxxX&Ipl<@_zt zdX@bk&su>^?~u0bS{P6K*RdjbnP3vn(F*+t7ni9dy?&j4B*e)${`kx-WM4zYFd^4+ z_WkJt8B zs_xHQ5kR_5*ylBs|Lj`10)0DlX{}{2dub#W(B>P8dm{qdPZOKvsb;U#Ru4dnDh4nPoAWOl6T`P|7^$p1MUW#tXVO)bDx+`TYh3l>pP&-yySb+qugQ3VdUl zAN6$p2{-3nqkPE`0dr^qu4k*%Hc=}X3f)ZVH0~eMhmtgsd7#>W!oJ-*ppq2a$2V+| zrII06Bc&SCM>00{VLKBk3pXf zFkfkv4HuknadYVj9~s0$VzwB=#mWB8hQo7J>|NndeY@7~(Xl~EepNrS%sX6dSo4x= zIUCL7#0}@>A(0+g*9Me~tRgCbJNDGu39V?XQF-VosGIq4Gh*lw_ zX5m=)Sj(0j>5$-3Lc({M#!wBaZc-1M>Hz~%I`}Ns z5N}tOLq>?1?J&AU@i;tv1b~zwN%!hr&ah^Bo#wMhH;1QU-YS!Npu#A+GWk07^>Sj> zx9*F-(IrT81=#k*%d02IeWCYyUwe3yl*#Tknsv5%%n&3-#9|Ws#TxtGVO(&D6rF|Y zM$9b6j+h!a;p~UJJg5>p`6+?U+##0?H8#&Ma9HXaF`ZQZ*K~5cH(-nhyveul*5mK( zUoWQse!CnpZ4|AS-zgM)pu*fv!{nL#>Mzt)?-_E@rtxR?r;L?Kx@3{>kVSFn+8<}A z;|r%?bOgn*P|JOSe^+bz6uhi09sLeh2p6v|_g_P)i2bYB%uH)~E;?k6+tn1TcJ)C_ z*+tB!X8JkyzrSn$^>yh~z6gA4=@tc^81bSZyj(UAUkIp{{y@jCf3)GB{BR}l8SF{V zkhb*fzs8vLCJ@_XmQ0y99J0u)+z4}<1L`US`PU(DV;}uc8V0=bFwlyiFqgS<4vu*M zgSX5Pw&*#Y3`+-`I~*zanF_CvFQqt)HgHo?xVByc6pwNtLxR#`dH+}e_X@f3wR>Vt zmL-7o{)vVKzsf}e1bH;O(B>Y&eN=>7&r%q5SY^P`)JpZtx3BLIx8P7><;&#n9a zv5Uo6B-GD1{#GB;BCF7r{&>lzQd7*y_BXnj7P%6T-~}C7DSziK8l^`pjJvn%{?5Vw zbK?KgOaIdq|5v-BLg5#}?z9XDOcC+b4TqXQ3K6(T@B^=dj~$`5o-2Q;dHqFnbGY4h z@-@8$4y4Vj2o;)T#Qwy);*98^f0$^#vmAtb-((@=WoBv8WBUAkwmPjyPlfw=O_S2? zS^j#=e!lvp`{zpI5G-JuYk;$S@FIY5J3zppZ3SXU`GD~~-QK!ZkQ7GnlfjB!N70qE zw+bM+hd^4uFHGBnTJPF8fd@|8Il= z=*=&5QH;($Al)fMhM{sP2z+pY5s~E8~zKpgtLPpzXwx8dcV?08N6xSM6Wipf)Rw_(jB?WWMv#NBaA$u1<`lQ z=^ybPD=9I!G;V7L17Fqrvj$;y-RqgjAZ*T>^qPYo*uemvoEWb-itiFOvBKl3Oc)uA z#6W=P9AUyo-CXLopms`Q?yRc%3H5;wIuT;@gMp>1h>)`Yz>I@O6@y>Lub6ed>x~DGFkt{e}w4VX3FNkwaq5ZWl`dq^wA6?4LH^ zwd)9i+5qf?Y&(V`Y*c>@tpZru#t!US&3kQCm?CR;i8KQppC2~~O{6KzSjN4Hm%o6+ zZ1-}QOM~H%z}W9sdXC6h{ozv0b%Q@ykk+vgc%y`n(MHZKz|%zx0lKX9Z#NFBJmTt| zk#7Jw$yk)V=@>x(0uGM%$OwbmGVmWU!f5cxM-)8%bk~{DRP`j2C`5k9g#?kGZeucM z`K?-q^>GJCzHw6eBfF*mKHD)UXAXLq^a9lP1p)4y-%9XwOeGi>GS#wQ+@yleg}n zAPEL#KA+U$b*tuo;t5c9>`qcypO8Dk`uALcuMf$i z69yK^=Fg{1)AW3X);^5N^F1N!n*;ZdZcKZ)9m#G8MDK!f^6{Tdt-j>`A5uk@BtrBZ0*h1=@%7<}9C zggE$Hpf4R^{F^ItJV6}B^Lk*%TUZ?-zQ|{fDBj1~?l$hWXwN~2z3@G3QCaFy^s@Jz z(S0YUG|!rK>|Mh=@6Dll-1;s4`{^r z7Y8jUg{ufHjkaEiPTc;4cV)f@luPXrU-5xE&Vj#NZ)h3$3bOh_IwOlC&W{V?jRgK@ z5Ox;m=Qr2gSQE(~C>8h-I$QQK-y2Plv*5VYCntu?{1gao9>)oVA%SgF_q0HRxPGs$ zkZE+|6Go{;Z%L{#n|t^V z6Xy;#WIKJ#k#sRocF*RB(fPczKVIT>+Fo3U4c2`9qP`lRC^WOOvuNOKIEPF&QWem&t)7a3Qw-pA5QE?57ZtoEU zXKd$H3SuOJN8p7w1&+p3U6R)U(|VKaZ5wzFHDUO^wajAmve$hM2cX`;eF^L+Q`XR& z#i1>1)v}}SpkElr+Pg1P+a^b-aXxUD#@)*1XF6 zF-Xg+!}1!VL&ERD2%S=|)crFw)1R(;6f1IS?5R4Xn64vm#ms&xr?Izcm$3>Q!Dp6j zR`{$b&kBz+Mq2#+v3*Q#$x!xkf`oozM^|gzhBPH=|oM?ViBw|8Lru21_ zWp~+a)p*HY>i$~M&JeIlV5#qRAtYaR^mTqmKZ=?0T@&IU@wysK%z#hCH-HuQ2Ovef7VDJ65C~!XKuwsOr)8(O7 z68|AL8|S$400_bej}gZx$s0^sT6X_nm4zxXvl+f0Jykx<`AxP<>T8G3x=5$|LWPa# z$>c4#Z`5+W#EO#@?~p%Uv?WfpRpA8g8^hGX+t4Abjda+r%hKb4I=aH}hS9<4Tll`I z7OTXvYO`_oBMmS^HyC3q1v%A@e8|AjAo2oW{mHSqx^et~)jC&D3$;cNv-xC9Ki$o25D`IYlihVQs^jWNFa1t#udL4C{UEQu$k6 z7oHR_)!P4HSki8RlcxZ$u&;9YJJ=<5%ekLw_E-92^JR+%_XOJtS^VYTn-@|gHV27+5w@Ir&b_D2p;@XH`!>jgz!9# zX_ALQ9U~xtEuS#cle=7h=nitpQrHYh7m4X*`bWhb$H&uWgaYAlxzS>J=WkFxqtL{2 zo||F6GCw*z3LYcqlSlg-AShiaNS)r&p&(pkjxaVW*osePxL?cJhU@QmVi`5fjwnwW$**lUVXa7rf6mj?P7b7hxBvAlk8^4HkJu zN{9Bu@l|iqyt<8^(ve?AZ-orZ6xrxU&^Ogbvm_oaJoidr`HH=dAyZZPPvJD6B9Mbn zF9$w;T{FMrBJ2uTk&6ZeLZxg`+%IP>`i)XT!md?9A4YLm8@4 zhxqcOtM-DMBdRj&vFAOG7$KUFrYD{GoGk17EFErPN`Gb(XlABb1Dqc()_xZY6sIIu z;dayJY>0k&N!=cy>R%*ikhM!OIfag{VHv*SHfJUJZd2!t#sd-m_A&WQ@@W<(pNrhl?|hOmXfP`TQsiGd1dsSRIPq#_&vhivxu}RsWRH zAZ9MU(c1pSrJu{S>>1h7R40v&8FQ`h2s9y&ZbPr*!5iWb@uGWN4^+6>dqkbQsx;xRVtQZdmnU&DpMnTwyI(4I)3nuN6cvEA=~xyzc!ztGuU)A?$Jpu(3T z@>f(Kww*X;)US_$BCHtnfq_WR#l@L6C6wM8>V&l7O5$q}c@hz;9zA@E`nLaL5WRuc z!}j~HJZp4sue1CGqZ4X3=6E;eYI{nS77}=66^BYpbL$HW3QvUJ|2$qMkIS(X{gL5C z3@!dXag86EdjtDjPH#C?F5TAz922U)Ua|`B1~}Q#v%?4jd4Dfk&G!&N>slthy$?PY zAM#0ke`=|!VUgFEGRjM7>Ugfrk4H&?)baGxHgv|Jwl0dcrP`cLzn1983HIca_{Oz1 zkKTP;k5;nKHlv8l?6c4l@5`PH(%Gy6^5jfVViN)h9=@5)C^-8cE^;pcf-vDj6_9Ey$EfRU7YvT*hx{f;AVA=6%6SG*AWb1_!Io1+E z<97mzUeywMQ+0>7jvGgm&)6dv+^+{AK+6qh8Yf21Y?)t8fC)6{dU!(Js<2YF$!FesAA5F_;IiM9L9 zVV8&Ecip#P=iLn9Cp#t4xKa#)=k!Y~y_>4MN=}XC*MIVRONI9#aSR$plB9~tW|6e_ zkT7s0#NZ#wKNLQCXO=|7ZET6Et;o54ll=92pz#-9T_-C!CtPu1DH_idKH^C~Aj0;( z-=gGHp9o5@aMk$de9_xu?HlewdOlrfh0o&$d)=L8w>uwU&(FL{hVSRRFKmalVrB1j z;qj=@X|C1vMFh4V7bzR{7$@#O#h!f;enHvk5g1g|lS(=i%!Hbo6W47HAVftLTYOXyYP^BF#|iRe4eo*b+l^5EoMSEL}Tp;H>S)J_^h)rVH%C$?A@}Z=&)@Dl$iEfTA#5**k}eU6ff zTV1K5O7U$IBaw!(7TF@nePK?$C|Yq6&o?@{Yf_^7IDfoACW_;=vP22FX8Fvqr7E=Z zqUo_D92MH+JzHfR>y&YIXgKEY9`x71hz9&MFsQihe@<}WN)zbX?w#X@>NtKqQE4iq z43=GCR2@QDE=9>Vu&qqQ)F|7sj6w?=?ZuQ-Pk8OooTYaSgw1BySHK|Iyvj6HJ!LJF zeUV4t2fya4A(_Lr%Y(Rzwl(uzt*bGeQryIs&%!sQG#3X8h!%hE&Q@zs{iwo!NLFQ| z1^s3xrQ&`(z!$v1&^c}`7yxU-XJs!xp$aRWec>{YedJQz`|)#?xwo6i_d)5%HKJK) z)%^J|nd0IHSoCwR=xxgq%kjKae8emWMhDD-vTg;K7yQK*QtFpDt>p`&bXUsHG{%TDD5U+66NaAnGtH?p9NEl*zM5sn|2YdDVChVyD zjuzzi6ZdQsZ0;vT<&L;hAJ)|Q(^9VlMcOkEPz3X{h6Z73w71_ZU#Sv9m1kU6HqW0= zE8|Yf$#8B)t2IQ`pD1;4GTWVBs~52kOdTu|V8xE=df&!A$usmM5EbD>S2L2V{=}VD zcS^S75wVfuDSG2Crh#EbqE6I4!Io#>mV{=(6LoZ+XS3eCzn@577+P-G8L+4ui~rK- zMR)&bciL$q=2zUq>7_*6O*^IK$Y{3m04)!C)0qB(4rIAiwS6~8IZaHYXRGA!rb*jI3!OQb#fJ288hFt9j5ReCKU zstlDoHOEBNBWQ7}fM1ifJXuw!&+Y`4Icqwz=)4uGHN*7tnTW;tn^njgWX{4u6{8-H zw&NkuG+Z&5oKhvnIcCyo+;9ZXI#X5>sg5gMz19n3ClPl}(fzJMc- zd&ui1j}yk7MI?65Ka=XzOinW&$U)j5)lW%y+4eQwX9_9>o!XYEDr=`q;ml<|aoeli zh+05bKF9c~o4%izHx+PV-4tSIM3|W;;IMvC+yr+&JHdG@zjo8a`9oZVUP*%wBJnh# zOl`0tf3Tm6z45SIjI?<$&Rs_j_^yQmD4<$);j(t#IsKd(Rfu(tjBB_o`25|{$DQUh-?0+NR zT{wf5#lqH?lvoZn_&z;{91HOF{D`Ap`wahWsP zi_jGmB-r)9fYsE6*f5caJJn zc{x<)c)eTALJnb@#wmK{vNzXY!)aK~wF0&u3Z6R-`))PA&d+DeC}x*FM!O6B)Kcn{ zb*0|O-t)EPI&$qtoj5#ZrMQTLrfvTWv;AzU0V@ZQe?Oup*s)${5ZG4{l`?F)*waW= ztr!kHuyC%Z^%tXnrGYty;kWJC&NP%(%ORi0IC0B1fz-E7Vj9o4jL_^6O=u&R)`wnH zPhncafO2|SG&{)X;|&>%%yTAi-HNB8aFKa!}MdPh?U2H~~L*Dd;;8Q(U zLz=yP{{=?(<{PwUBpSTu)%{G4;gKP6uXx+XA1I}%IMJBS&jIB=nF?0p?@SCyV5u~w zxDC|-69DvQ`U)^@3QmRHkm363Z%dsTalUd`D!59W$q>oA3VU;OMEd|a`|#m`U25?} zwpjy^<&Cbz@BdtW!DxPYw8mQAsUq2-Hlt)@uC=ow>ShtCT=uwUG-yGR$!+?es%Gn5 zmAKvkI zffCe8$wgHd)Q#~Gh%KB_GOD5C?-jX8zT8Yk{S}tWiut++Y#O47#=#|WS0+so@5 zTSYY+Tim=UxKgm>%DnzSozs) zvcq`AuN@b!LYkYFq2t`=y9@Bm_j|Krcy32EiS8oS{>8R?(5-nTN+xfA@aEo4y}j8a zK}yQe7#jzRYKi(@5H(o+8T*a-}r_k0!(4Np{V ze;Q=H^;f#Cte)c^WzFAP@=|S5A z(p`mE9G+g*KAvvD>GahC6D3}K8+(UqHO_4EVWP

m^m=cWXDZ7b!E{*~tMYDl7vA zp)oGU6v$w9Z$V{jIP|TD6C7+Ca(<2(_>rh2rv7~Skjy$<28jsPEV4j=7B4!5S@AU~ zu+`%>-qnV(rC?NV51GZvzyA{&owCV=fF_(G5<&z8v$cmE?)@P37Rmc8VWGj^9!|F0 zY^;sY2;z-Jr%7!>hyxoo@4!n(dWia3%L;c|aj~Ts+2tfAs@fgv!!ZFh^w6$yeIO4_c#AMjQR%c~H1iEKRxYS*Rs{?WZCX2%f|$tK3|u=GUoFoon&EjekxZA- z-g(7!TLm1|F^!@;dc0(NOeUUVc9DsAcF!%|zFawJdpgI08wM$=k!B`d#Jc8xPi_H%E(1y{o* zuVT}@+lXLzFlQZW!U5}kQ1N~FzzEnxZ;H6u$n)?QXH(g>dTyGw6x`*G0~9tj`X%L} zhwX&s^L0+OujNMr{(ueB>>Ad+=c=qITEZ`_vF)17+c7ipz~=!}NcqD?)r9 z69Y9Ix5VR9)<4S7Z(w7@n^2%OzY&A)zxZNr}VZQ%)?3-c-;thvZzx(n5WhE%6C#m@ZwpVvOIeIvm zRKeC#Z!WLdNIyHt+^VJ6{s^y=m=Zxoq|Q2dwy+;pDQl0ozJ$FBWqZBClpZg0MJ?@; zrpCm@P{nwj`jN5LnUBs$BxkVWmiE%liWf1;S5kdy+2j$=Omew%m)~#I%le7MVc-af zY>hPT3>ll-KVhBp9Vr~El#McFQ?Q6djI0@&vcR6Yil~acI83_lP&kji?UCesRdIJv zSy|$zFIhF@N1woJ8-@&#U_uv&N&lX#g`Yz^B~9g|>gICyO+3hR0FEBHg~3{rd6R0p z*z|&if*3lZHxVz;QWf?E8SB2zSc@Zw?t4sJa_oK9H;)&ztaFIj)mLwb`Q1TG>!xjd zgru;bbU37S$CM%1>F<$a<=jj(v%UR|+fNUiqPP(rpCL)oEWe=xX5jwJaPLikU;a0j28*UFhKP(EKY5YRd%{W zp_YA#g?(nG(sC5MsN?QSyPKlP7vs#-*cnFgSTg76_Z6o%S`fzeUx`wA9<=iQJ~MTM z633|=3T)39H47Z;MD{(L9bTiTILjFZ{(;VCbW=$I}ov)ai&)3{X) zy*Bff*3eiLc0z6o3(xg39<++~=_}hZQVGH=DNC*`+2i(%wmVI6SxD%Y)XTvsmw}k^ zwjT4(3#*>4<~u74#Tsb^g{s98&ALA%S68EMv$52Ud~W5-PXk^AVxp)pcua9HvXw^Yx`P49gRA5ty%$GF>DmVp&og zl@nihK#WYdkWP{MojydBD=&&V+>U&0dAwNgi8S#(y@zQ{;B?Sn)8+(zX(cxk8! zIy6U&YVsJV=Z=F&5>gz8qipyY64h|mt*4`tfW~c+ra(>-R0Od|6{fxC3-cFi4l07J z``6-Qh=Ig_&plK`K=mJgq+$kn-L_bXVekisgkq7;z1Fqx|BGY6fKVY3L7j}I@Q^Te z;!u^0cH_nea;z8J@-dqG7C>e!#bQs6&d+&}(-SY!w9f4cgAJOFXVpEAg-<$};BL|Ff2 z7ySG8NRkZw`;Y(Sv0^Nk>D^mKl}fDt&6WP!l@LLNQU19#|EaktjG#lK$6sdr_m>LJ o2s#hve>(3!+?fB*citbA9?S~|bmwJBx3rQj2ZvX%Q literal 0 HcmV?d00001 diff --git a/docs/source/index.rst b/docs/source/index.rst index b0103a336..34eb23b28 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -43,27 +43,13 @@ Example .. ipython:: python - import datafusion - from datafusion import col - import pyarrow + from datafusion import SessionContext - # create a context - ctx = datafusion.SessionContext() + ctx = SessionContext() - # create a RecordBatch and a new DataFrame from it - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]], name="batch_array") + df = ctx.read_csv("pokemon.csv") - # create a new statement - df = df.select( - col("a") + col("b"), - col("a") - col("b"), - ) - - df + df.show() .. _toc.links: @@ -85,9 +71,10 @@ Example user-guide/introduction user-guide/basics - user-guide/configuration + user-guide/data-sources user-guide/common-operations/index user-guide/io/index + user-guide/configuration user-guide/sql diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index 3c97d1ef9..f37378a41 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -20,72 +20,76 @@ Concepts ======== -In this section, we will cover a basic example to introduce a few key concepts. +In this section, we will cover a basic example to introduce a few key concepts. We will use the same +source file as described in the :ref:`Introduction `, the Pokemon data set. -.. code-block:: python +.. ipython:: python - import datafusion - from datafusion import col - import pyarrow + from datafusion import SessionContext, col, lit, functions as f - # create a context - ctx = datafusion.SessionContext() + ctx = SessionContext() - # create a RecordBatch and a new DataFrame from it - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]]) + df = ctx.read_parquet("yellow_tripdata_2021-01.parquet") - # create a new statement df = df.select( - col("a") + col("b"), - col("a") - col("b"), + "trip_distance", + col("total_amount").alias("total"), + (f.round(lit(100.0) * col("tip_amount") / col("total_amount"), lit(1))).alias("tip_percent"), ) - # execute and collect the first (and only) batch - result = df.collect()[0] + df.show() -The first statement group: +Session Context +--------------- + +The first statement group creates a :py:class:`~datafusion.context.SessionContext`. .. code-block:: python # create a context ctx = datafusion.SessionContext() -creates a :py:class:`~datafusion.context.SessionContext`, that is, the main interface for executing queries with DataFusion. It maintains the state -of the connection between a user and an instance of the DataFusion engine. Additionally it provides the following functionality: +A Session Context is the main interface for executing queries with DataFusion. It maintains the state +of the connection between a user and an instance of the DataFusion engine. Additionally it provides +the following functionality: -- Create a DataFrame from a CSV or Parquet data source. -- Register a CSV or Parquet data source as a table that can be referenced from a SQL query. -- Register a custom data source that can be referenced from a SQL query. +- Create a DataFrame from a data source. +- Register a data source as a table that can be referenced from a SQL query. - Execute a SQL query +DataFrame +--------- + The second statement group creates a :code:`DataFrame`, .. code-block:: python - # create a RecordBatch and a new DataFrame from it - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]]) + # Create a DataFrame from a file + df = ctx.read_parquet("yellow_tripdata_2021-01.parquet") A DataFrame refers to a (logical) set of rows that share the same column names, similar to a `Pandas DataFrame `_. DataFrames are typically created by calling a method on :py:class:`~datafusion.context.SessionContext`, such as :code:`read_csv`, and can then be modified by calling the transformation methods, such as :py:func:`~datafusion.dataframe.DataFrame.filter`, :py:func:`~datafusion.dataframe.DataFrame.select`, :py:func:`~datafusion.dataframe.DataFrame.aggregate`, and :py:func:`~datafusion.dataframe.DataFrame.limit` to build up a query definition. -The third statement uses :code:`Expressions` to build up a query definition. +Expressions +----------- + +The third statement uses :code:`Expressions` to build up a query definition. You can find +explanations for what the functions below do in the user documentation for +:py:func:`~datafusion.col`, :py:func:`~datafusion.lit`, :py:func:`~datafusion.functions.round`, +and :py:func:`~datafusion.expr.Expr.alias`. .. code-block:: python df = df.select( - col("a") + col("b"), - col("a") - col("b"), + "trip_distance", + col("total_amount").alias("total"), + (f.round(lit(100.0) * col("tip_amount") / col("total_amount"), lit(1))).alias("tip_percent"), ) -Finally the :py:func:`~datafusion.dataframe.DataFrame.collect` method converts the logical plan represented by the DataFrame into a physical plan and execute it, -collecting all results into a list of `RecordBatch `_. +Finally the :py:func:`~datafusion.dataframe.DataFrame.show` method converts the logical plan +represented by the DataFrame into a physical plan and execute it, collecting all results and +displaying them to the user. It is important to note that DataFusion performs lazy evaluation +of the DataFrame. Until you call a method such as :py:func:`~datafusion.dataframe.DataFrame.show` +or :py:func:`~datafusion.dataframe.DataFrame.collect`, DataFusion will not perform the query. diff --git a/docs/source/user-guide/common-operations/aggregations.rst b/docs/source/user-guide/common-operations/aggregations.rst index 8fee26a15..e458e5fcb 100644 --- a/docs/source/user-guide/common-operations/aggregations.rst +++ b/docs/source/user-guide/common-operations/aggregations.rst @@ -26,15 +26,7 @@ to form a single summary value. For performing an aggregation, DataFusion provid .. ipython:: python - import urllib.request - from datafusion import SessionContext - from datafusion import col, lit - from datafusion import functions as f - - urllib.request.urlretrieve( - "https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv", - "pokemon.csv", - ) + from datafusion import SessionContext, col, lit, functions as f ctx = SessionContext() df = ctx.read_csv("pokemon.csv") diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index a0b95c908..8d6a80855 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -25,14 +25,8 @@ We'll use the pokemon dataset in the following examples. .. ipython:: python - import urllib.request from datafusion import SessionContext - urllib.request.urlretrieve( - "https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv", - "pokemon.csv", - ) - ctx = SessionContext() ctx.register_csv("pokemon", "pokemon.csv") df = ctx.table("pokemon") diff --git a/docs/source/user-guide/common-operations/index.rst b/docs/source/user-guide/common-operations/index.rst index b15b04c62..d7c708c21 100644 --- a/docs/source/user-guide/common-operations/index.rst +++ b/docs/source/user-guide/common-operations/index.rst @@ -18,6 +18,8 @@ Common Operations ================= +The contents of this section are designed to guide a new user through how to use DataFusion. + .. toctree:: :maxdepth: 2 diff --git a/docs/source/user-guide/common-operations/select-and-filter.rst b/docs/source/user-guide/common-operations/select-and-filter.rst index 075909129..083bcbbd2 100644 --- a/docs/source/user-guide/common-operations/select-and-filter.rst +++ b/docs/source/user-guide/common-operations/select-and-filter.rst @@ -21,18 +21,15 @@ Column Selections Use :py:func:`~datafusion.dataframe.DataFrame.select` for basic column selection. DataFusion can work with several file types, to start simple we can use a subset of the -`TLC Trip Record Data `_ +`TLC Trip Record Data `_, +which you can download `here `_. .. ipython:: python - - import urllib.request - from datafusion import SessionContext - urllib.request.urlretrieve("https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet", - "yellow_trip_data.parquet") + from datafusion import SessionContext ctx = SessionContext() - df = ctx.read_parquet("yellow_trip_data.parquet") + df = ctx.read_parquet("yellow_tripdata_2021-01.parquet") df.select("trip_distance", "passenger_count") For mathematical or logical operations use :py:func:`~datafusion.col` to select columns, and give meaningful names to the resulting diff --git a/docs/source/user-guide/common-operations/windows.rst b/docs/source/user-guide/common-operations/windows.rst index 609176897..8225d125a 100644 --- a/docs/source/user-guide/common-operations/windows.rst +++ b/docs/source/user-guide/common-operations/windows.rst @@ -30,16 +30,10 @@ We'll use the pokemon dataset (from Ritchie Vink) in the following examples. .. ipython:: python - import urllib.request from datafusion import SessionContext from datafusion import col from datafusion import functions as f - urllib.request.urlretrieve( - "https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv", - "pokemon.csv", - ) - ctx = SessionContext() df = ctx.read_csv("pokemon.csv") diff --git a/docs/source/user-guide/data-sources.rst b/docs/source/user-guide/data-sources.rst new file mode 100644 index 000000000..ba5967c97 --- /dev/null +++ b/docs/source/user-guide/data-sources.rst @@ -0,0 +1,187 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _user_guide_data_sources: + +Data Sources +============ + +DataFusion provides a wide variety of ways to get data into a DataFrame to perform operations. + +Local file +---------- + +DataFusion has the abilty to read from a variety of popular file formats, such as :ref:`Parquet `, +:ref:`CSV `, :ref:`JSON `, and :ref:`AVRO `. + +.. ipython:: python + + from datafusion import SessionContext + ctx = SessionContext() + df = ctx.read_csv("pokemon.csv") + df.show() + +Create in-memory +---------------- + +Sometimes it can be convenient to create a small DataFrame from a Python list or dictionary object. +To do this in DataFusion, you can use one of the three functions +:py:func:`~datafusion.context.SessionContext.from_pydict`, +:py:func:`~datafusion.context.SessionContext.from_pylist`, or +:py:func:`~datafusion.context.SessionContext.create_dataframe`. + +As their names suggest, ``from_pydict`` and ``from_pylist`` will create DataFrames from Python +dictionary and list objects, respectively. ``create_dataframe`` assumes you will pass in a list +of list of `PyArrow Record Batches `_. + +The following three examples all will create identical DataFrames: + +.. ipython:: python + + import pyarrow as pa + + ctx.from_pylist([ + { "a": 1, "b": 10.0, "c": "alpha" }, + { "a": 2, "b": 20.0, "c": "beta" }, + { "a": 3, "b": 30.0, "c": "gamma" }, + ]).show() + + ctx.from_pydict({ + "a": [1, 2, 3], + "b": [10.0, 20.0, 30.0], + "c": ["alpha", "beta", "gamma"], + }).show() + + batch = pa.RecordBatch.from_arrays( + [ + pa.array([1, 2, 3]), + pa.array([10.0, 20.0, 30.0]), + pa.array(["alpha", "beta", "gamma"]), + ], + names=["a", "b", "c"], + ) + + ctx.create_dataframe([[batch]]).show() + + +Object Store +------------ + +DataFusion has support for multiple storage options in addition to local files. +The example below requires an appropriate S3 account with access credentials. + +Supported Object Stores are + +- :py:class:`~datafusion.object_store.AmazonS3` +- :py:class:`~datafusion.object_store.GoogleCloud` +- :py:class:`~datafusion.object_store.Http` +- :py:class:`~datafusion.object_store.LocalFileSystem` +- :py:class:`~datafusion.object_store.MicrosoftAzure` + +.. code-block:: python + + from datafusion.object_store import AmazonS3 + + region = "us-east-1" + bucket_name = "yellow-trips" + + s3 = AmazonS3( + bucket_name=bucket_name, + region=region, + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + ) + + path = f"s3://{bucket_name}/" + ctx.register_object_store("s3://", s3, None) + + ctx.register_parquet("trips", path) + + ctx.table("trips").show() + +Other DataFrame Libraries +------------------------- + +DataFusion can import DataFrames directly from other libraries, such as +`Polars `_ and `Pandas `_. +Since DataFusion version 42.0.0, any DataFrame library that supports the Arrow FFI PyCapsule +interface can be imported to DataFusion using the +:py:func:`~datafusion.context.SessionContext.from_arrow` function. Older verions of Polars may +not support the arrow interface. In those cases, you can still import via the +:py:func:`~datafusion.context.SessionContext.from_polars` function. + +.. code-block:: python + + import pandas as pd + + data = { "a": [1, 2, 3], "b": [10.0, 20.0, 30.0], "c": ["alpha", "beta", "gamma"] } + pandas_df = pd.DataFrame(data) + + datafusion_df = ctx.from_arrow(pandas_df) + datafusion_df.show() + +.. code-block:: python + + import polars as pl + polars_df = pl.DataFrame(data) + + datafusion_df = ctx.from_arrow(polars_df) + datafusion_df.show() + +Delta Lake +---------- + +DataFusion 43.0.0 and later support the ability to register table providers from sources such +as Delta Lake. This will require a recent version of +`deltalake `_ to provide the required interfaces. + +.. code-block:: python + + from deltalake import DeltaTable + + delta_table = DeltaTable("path_to_table") + ctx.register_table_provider("my_delta_table", delta_table) + df = ctx.table("my_delta_table") + df.show() + +On older versions of ``deltalake`` (prior to 0.22) you can use the +`Arrow DataSet `_ +interface to import to DataFusion, but this does not support features such as filter push down +which can lead to a significant performance difference. + +.. code-block:: python + + from deltalake import DeltaTable + + delta_table = DeltaTable("path_to_table") + ctx.register_dataset("my_delta_table", delta_table.to_pyarrow_dataset()) + df = ctx.table("my_delta_table") + df.show() + +Iceberg +------- + +Coming soon! + +Custom Table Provider +--------------------- + +You can implement a custom Data Provider in Rust and expose it to DataFusion through the +the interface as describe in the :ref:`Custom Table Provider ` +section. This is an advanced topic, but a +`user example `_ +is provided in the DataFusion repository. diff --git a/docs/source/user-guide/introduction.rst b/docs/source/user-guide/introduction.rst index 8abb9113e..7b30ef2b2 100644 --- a/docs/source/user-guide/introduction.rst +++ b/docs/source/user-guide/introduction.rst @@ -39,5 +39,39 @@ You can verify the installation by running: import datafusion datafusion.__version__ +In this documentation we will also show some examples for how DataFusion integrates +with Jupyter notebooks. To install and start a Jupyter labs session use +.. code-block:: shell + + pip install jupyterlab + jupyter lab + +To demonstrate working with DataFusion, we need a data source. Later in the tutorial we will show +options for data sources. For our first example, we demonstrate using a Pokemon dataset that you +can download +`here `_. + +With that file in place you can use the following python example to view the DataFrame in +DataFusion. + +.. ipython:: python + + from datafusion import SessionContext + + ctx = SessionContext() + + df = ctx.read_csv("pokemon.csv") + + df.show() + +If you are working in a Jupyter notebook, you can also use the following to give you a table +display that may be easier to read. + +.. code-block:: shell + + display(df) +.. image:: ../images/jupyter_lab_df_view.png + :width: 800 + :alt: Rendered table showing Pokemon DataFrame diff --git a/docs/source/user-guide/io/avro.rst b/docs/source/user-guide/io/avro.rst index 5f1ff728e..66398ac7f 100644 --- a/docs/source/user-guide/io/avro.rst +++ b/docs/source/user-guide/io/avro.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _io_avro: + Avro ==== diff --git a/docs/source/user-guide/io/csv.rst b/docs/source/user-guide/io/csv.rst index d2a62bfec..144b6615c 100644 --- a/docs/source/user-guide/io/csv.rst +++ b/docs/source/user-guide/io/csv.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _io_csv: + CSV === diff --git a/docs/source/user-guide/io/json.rst b/docs/source/user-guide/io/json.rst index f9da3755a..39030db7f 100644 --- a/docs/source/user-guide/io/json.rst +++ b/docs/source/user-guide/io/json.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _io_json: + JSON ==== `JSON `_ (JavaScript Object Notation) is a lightweight data-interchange format. diff --git a/docs/source/user-guide/io/parquet.rst b/docs/source/user-guide/io/parquet.rst index 75bc981cc..c5b9ca3d4 100644 --- a/docs/source/user-guide/io/parquet.rst +++ b/docs/source/user-guide/io/parquet.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _io_parquet: + Parquet ======= @@ -22,7 +24,6 @@ It is quite simple to read a parquet file using the :py:func:`~datafusion.contex .. code-block:: python - from datafusion import SessionContext ctx = SessionContext() diff --git a/docs/source/user-guide/io/table_provider.rst b/docs/source/user-guide/io/table_provider.rst index 2ff9ae46f..bd1d6b80f 100644 --- a/docs/source/user-guide/io/table_provider.rst +++ b/docs/source/user-guide/io/table_provider.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _io_custom_table_provider: + Custom Table Provider ===================== diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index efd4038ae..e283f590e 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -473,7 +473,7 @@ def join_on( *on_exprs: Expr, how: Literal["inner", "left", "right", "full", "semi", "anti"] = "inner", ) -> DataFrame: - """Join two :py:class:`DataFrame`using the specified expressions. + """Join two :py:class:`DataFrame` using the specified expressions. On expressions are used to support in-equality predicates. Equality predicates are correctly optimized diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index 3836edec6..a71965f41 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -42,7 +42,7 @@ class LogicalPlan: (table) with a potentially different schema. Plans form a dataflow tree where data flows from leaves up to the root to produce the query result. - `LogicalPlan`s can be created by the SQL query planner, the DataFrame API, + A `LogicalPlan` can be created by the SQL query planner, the DataFrame API, or programmatically (for example custom query languages). """ @@ -107,7 +107,7 @@ def __init__(self, plan: df_internal.ExecutionPlan) -> None: self._raw_plan = plan def children(self) -> List[ExecutionPlan]: - """Get a list of children `ExecutionPlan`s that act as inputs to this plan. + """Get a list of children `ExecutionPlan` that act as inputs to this plan. The returned list will be empty for leaf nodes such as scans, will contain a single value for unary nodes, or two values for binary nodes (such as joins). From 2690e61b360a6224f1cd5b5bf29d8c082b87991d Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 30 Nov 2024 23:25:04 +0800 Subject: [PATCH 026/248] Add datafusion.extract (#959) * feat: add extract function as an alias for date_part * docs: update user guide to include examples for date_part and extract functions * fix: update examples in user guide to use f.to_timestamp for date extraction --- docs/source/user-guide/common-operations/functions.rst | 9 +++++++++ python/datafusion/functions.py | 9 +++++++++ python/tests/test_functions.py | 2 ++ 3 files changed, 20 insertions(+) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index 8d6a80855..ad71c72ac 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -72,6 +72,15 @@ Convert to timestamps using :py:func:`~datafusion.functions.to_timestamp` df.select(f.to_timestamp(col('"Total"')).alias("timestamp")) +Extracting parts of a date using :py:func:`~datafusion.functions.date_part` (alias :py:func:`~datafusion.functions.extract`) + +.. ipython:: python + + df.select( + f.date_part(literal("month"), f.to_timestamp(col('"Total"'))).alias("month"), + f.extract(literal("day"), f.to_timestamp(col('"Total"'))).alias("day") + ) + String ------ diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 6ad4c50c2..15ad8822f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -128,6 +128,7 @@ "empty", "encode", "ends_with", + "extract", "exp", "factorial", "find_in_set", @@ -994,6 +995,14 @@ def date_part(part: Expr, date: Expr) -> Expr: return Expr(f.date_part(part.expr, date.expr)) +def extract(part: Expr, date: Expr) -> Expr: + """Extracts a subfield from the date. + + This is an alias for :py:func:`date_part`. + """ + return date_part(part, date) + + def date_trunc(part: Expr, date: Expr) -> Expr: """Truncates the date to a specified level of precision.""" return Expr(f.date_trunc(part.expr, date.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 0d40032bb..0d2fa8f94 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -866,6 +866,7 @@ def test_temporal_functions(df): f.to_timestamp_seconds(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), + f.extract(literal("day"), column("d")), ) result = df.collect() assert len(result) == 1 @@ -903,6 +904,7 @@ def test_temporal_functions(df): assert result.column(9) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) + assert result.column(10) == pa.array([31, 26, 2], type=pa.float64()) def test_case(df): From 5c834934dec89bd96ff70df3b278e9d6fe78f7ec Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 2 Dec 2024 08:05:52 -0700 Subject: [PATCH 027/248] chore: Prepare 43.0.0 Release (#960) * Generate changelog * cargo update --- CHANGELOG.md | 695 +---------------------------------- Cargo.lock | 410 +++++++++++---------- Cargo.toml | 2 +- dev/changelog/43.0.0.md | 73 ++++ dev/changelog/pre-43.0.0.md | 715 ++++++++++++++++++++++++++++++++++++ 5 files changed, 1014 insertions(+), 881 deletions(-) create mode 100644 dev/changelog/43.0.0.md create mode 100644 dev/changelog/pre-43.0.0.md diff --git a/CHANGELOG.md b/CHANGELOG.md index ae3a2348a..ae40911d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,697 +19,4 @@ # DataFusion Python Changelog -## [42.0.0](https://github.com/apache/datafusion-python/tree/42.0.0) (2024-10-06) - -This release consists of 20 commits from 6 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: expose between [#868](https://github.com/apache/datafusion-python/pull/868) (mesejo) -- feat: make register_csv accept a list of paths [#883](https://github.com/apache/datafusion-python/pull/883) (mesejo) -- feat: expose http object store [#885](https://github.com/apache/datafusion-python/pull/885) (mesejo) - -**Fixed bugs:** - -- fix: Calling `count` on a pyarrow dataset results in an error [#843](https://github.com/apache/datafusion-python/pull/843) (Michael-J-Ward) - -**Other:** - -- Upgrade datafusion [#867](https://github.com/apache/datafusion-python/pull/867) (emgeee) -- Feature/aggregates as windows [#871](https://github.com/apache/datafusion-python/pull/871) (timsaucer) -- Fix regression on register_udaf [#878](https://github.com/apache/datafusion-python/pull/878) (timsaucer) -- build(deps): upgrade setup-protoc action and protoc version number [#873](https://github.com/apache/datafusion-python/pull/873) (Michael-J-Ward) -- build(deps): bump prost-types from 0.13.2 to 0.13.3 [#881](https://github.com/apache/datafusion-python/pull/881) (dependabot[bot]) -- build(deps): bump prost from 0.13.2 to 0.13.3 [#882](https://github.com/apache/datafusion-python/pull/882) (dependabot[bot]) -- chore: remove XFAIL from passing tests [#884](https://github.com/apache/datafusion-python/pull/884) (Michael-J-Ward) -- Add user defined window function support [#880](https://github.com/apache/datafusion-python/pull/880) (timsaucer) -- build(deps): bump syn from 2.0.77 to 2.0.79 [#886](https://github.com/apache/datafusion-python/pull/886) (dependabot[bot]) -- fix example of reading parquet from s3 [#896](https://github.com/apache/datafusion-python/pull/896) (sir-sigurd) -- release-testing [#889](https://github.com/apache/datafusion-python/pull/889) (Michael-J-Ward) -- chore(bench): fix create_tables.sql for tpch benchmark [#897](https://github.com/apache/datafusion-python/pull/897) (Michael-J-Ward) -- Add physical and logical plan conversion to and from protobuf [#892](https://github.com/apache/datafusion-python/pull/892) (timsaucer) -- Feature/instance udfs [#890](https://github.com/apache/datafusion-python/pull/890) (timsaucer) -- chore(ci): remove Mambaforge variant from CI [#894](https://github.com/apache/datafusion-python/pull/894) (Michael-J-Ward) -- Use OnceLock to store TokioRuntime [#895](https://github.com/apache/datafusion-python/pull/895) (Michael-J-Ward) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 7 Michael J Ward - 5 Tim Saucer - 3 Daniel Mesejo - 3 dependabot[bot] - 1 Matt Green - 1 Sergey Fedoseev -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - -## [41.0.0](https://github.com/apache/datafusion-python/tree/41.0.0) (2024-09-09) - -This release consists of 19 commits from 6 contributors. See credits at the end of this changelog for more information. - -**Implemented enhancements:** - -- feat: enable list of paths for read_csv [#824](https://github.com/apache/datafusion-python/pull/824) (mesejo) -- feat: better exception and message for table not found [#851](https://github.com/apache/datafusion-python/pull/851) (mesejo) -- feat: make cast accept built-in Python types [#858](https://github.com/apache/datafusion-python/pull/858) (mesejo) - -**Other:** - -- chore: Prepare for 40.0.0 release [#801](https://github.com/apache/datafusion-python/pull/801) (andygrove) -- Add typing-extensions dependency to pyproject [#805](https://github.com/apache/datafusion-python/pull/805) (timsaucer) -- Upgrade deps to datafusion 41 [#802](https://github.com/apache/datafusion-python/pull/802) (Michael-J-Ward) -- Fix SessionContext init with only SessionConfig [#827](https://github.com/apache/datafusion-python/pull/827) (jcrist) -- build(deps): upgrade actions/{upload,download}-artifact@v3 to v4 [#829](https://github.com/apache/datafusion-python/pull/829) (Michael-J-Ward) -- Run ruff format in CI [#837](https://github.com/apache/datafusion-python/pull/837) (timsaucer) -- Add PyCapsule support for Arrow import and export [#825](https://github.com/apache/datafusion-python/pull/825) (timsaucer) -- Feature/expose when function [#836](https://github.com/apache/datafusion-python/pull/836) (timsaucer) -- Add Window Functions for use with function builder [#808](https://github.com/apache/datafusion-python/pull/808) (timsaucer) -- chore: fix typos [#844](https://github.com/apache/datafusion-python/pull/844) (mesejo) -- build(ci): use proper mac runners [#841](https://github.com/apache/datafusion-python/pull/841) (Michael-J-Ward) -- Set of small features [#839](https://github.com/apache/datafusion-python/pull/839) (timsaucer) -- chore: fix docstrings, typos [#852](https://github.com/apache/datafusion-python/pull/852) (mesejo) -- chore: Use datafusion re-exported dependencies [#856](https://github.com/apache/datafusion-python/pull/856) (emgeee) -- add guidelines on separating python and rust code [#860](https://github.com/apache/datafusion-python/pull/860) (Michael-J-Ward) -- Update Aggregate functions to take builder parameters [#859](https://github.com/apache/datafusion-python/pull/859) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 7 Tim Saucer - 5 Daniel Mesejo - 4 Michael J Ward - 1 Andy Grove - 1 Jim Crist-Harif - 1 Matt Green -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - -## [40.0.0](https://github.com/apache/datafusion-python/tree/40.0.0) (2024-08-09) - -This release consists of 18 commits from 4 contributors. See credits at the end of this changelog for more information. - -- Update changelog for 39.0.0 [#742](https://github.com/apache/datafusion-python/pull/742) (andygrove) -- build(deps): bump uuid from 1.8.0 to 1.9.1 [#744](https://github.com/apache/datafusion-python/pull/744) (dependabot[bot]) -- build(deps): bump mimalloc from 0.1.42 to 0.1.43 [#745](https://github.com/apache/datafusion-python/pull/745) (dependabot[bot]) -- build(deps): bump syn from 2.0.67 to 2.0.68 [#746](https://github.com/apache/datafusion-python/pull/746) (dependabot[bot]) -- Tsaucer/find window fn [#747](https://github.com/apache/datafusion-python/pull/747) (timsaucer) -- Python wrapper classes for all user interfaces [#750](https://github.com/apache/datafusion-python/pull/750) (timsaucer) -- Expose array sort [#764](https://github.com/apache/datafusion-python/pull/764) (timsaucer) -- Upgrade protobuf and remove GH Action googletest-installer [#773](https://github.com/apache/datafusion-python/pull/773) (Michael-J-Ward) -- Upgrade Datafusion 40 [#771](https://github.com/apache/datafusion-python/pull/771) (Michael-J-Ward) -- Bugfix: Calling count with None arguments [#768](https://github.com/apache/datafusion-python/pull/768) (timsaucer) -- Add in user example that compares a two different approaches to UDFs [#770](https://github.com/apache/datafusion-python/pull/770) (timsaucer) -- Add missing exports for wrapper modules [#782](https://github.com/apache/datafusion-python/pull/782) (timsaucer) -- Add PyExpr to_variant conversions [#793](https://github.com/apache/datafusion-python/pull/793) (Michael-J-Ward) -- Add missing expressions to wrapper export [#795](https://github.com/apache/datafusion-python/pull/795) (timsaucer) -- Doc/cross reference [#791](https://github.com/apache/datafusion-python/pull/791) (timsaucer) -- Re-Enable `num_centroids` to `approx_percentile_cont` [#798](https://github.com/apache/datafusion-python/pull/798) (Michael-J-Ward) -- UDAF process all state variables [#799](https://github.com/apache/datafusion-python/pull/799) (timsaucer) - -## Credits - -Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. - -``` - 9 Tim Saucer - 4 Michael J Ward - 3 dependabot[bot] - 2 Andy Grove -``` - -Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. - -## [39.0.0](https://github.com/apache/datafusion-python/tree/39.0.0) (2024-06-25) - -**Merged pull requests:** - -- ci: add substrait feature to linux builds [#720](https://github.com/apache/datafusion-python/pull/720) (Michael-J-Ward) -- Docs deploy action [#721](https://github.com/apache/datafusion-python/pull/721) (Michael-J-Ward) -- update deps [#723](https://github.com/apache/datafusion-python/pull/723) (Michael-J-Ward) -- Upgrade maturin [#725](https://github.com/apache/datafusion-python/pull/725) (Michael-J-Ward) -- Upgrade datafusion 39 [#728](https://github.com/apache/datafusion-python/pull/728) (Michael-J-Ward) -- use ScalarValue::to_pyarrow to convert to python object [#731](https://github.com/apache/datafusion-python/pull/731) (Michael-J-Ward) -- Pyo3 `Bound<'py, T>` api [#734](https://github.com/apache/datafusion-python/pull/734) (Michael-J-Ward) -- github test action: drop python 3.7, add python 3.12 [#736](https://github.com/apache/datafusion-python/pull/736) (Michael-J-Ward) -- Pyarrow filter pushdowns [#735](https://github.com/apache/datafusion-python/pull/735) (Michael-J-Ward) -- build(deps): bump syn from 2.0.66 to 2.0.67 [#738](https://github.com/apache/datafusion-python/pull/738) (dependabot[bot]) -- Pyo3 refactorings [#740](https://github.com/apache/datafusion-python/pull/740) (Michael-J-Ward) -- UDAF `sum` workaround [#741](https://github.com/apache/datafusion-python/pull/741) (Michael-J-Ward) - -## [38.0.1](https://github.com/apache/datafusion-python/tree/38.0.1) (2024-05-25) - -**Implemented enhancements:** - -- feat: add python bindings for ends_with function [#693](https://github.com/apache/datafusion-python/pull/693) (richtia) -- feat: expose `named_struct` in python [#700](https://github.com/apache/datafusion-python/pull/700) (Michael-J-Ward) - -**Merged pull requests:** - -- Add document about basics of working with expressions [#668](https://github.com/apache/datafusion-python/pull/668) (timsaucer) -- chore: Update Python release process now that DataFusion is TLP [#674](https://github.com/apache/datafusion-python/pull/674) (andygrove) -- Fix Docs [#676](https://github.com/apache/datafusion-python/pull/676) (Michael-J-Ward) -- Add examples from TPC-H [#666](https://github.com/apache/datafusion-python/pull/666) (timsaucer) -- fix conda nightly builds, attempt 2 [#689](https://github.com/apache/datafusion-python/pull/689) (Michael-J-Ward) -- Upgrade to datafusion 38 [#691](https://github.com/apache/datafusion-python/pull/691) (Michael-J-Ward) -- chore: update to maturin's recommended project layout for rust/python… [#695](https://github.com/apache/datafusion-python/pull/695) (Michael-J-Ward) -- chore: update cargo deps [#698](https://github.com/apache/datafusion-python/pull/698) (Michael-J-Ward) -- feat: add python bindings for ends_with function [#693](https://github.com/apache/datafusion-python/pull/693) (richtia) -- feat: expose `named_struct` in python [#700](https://github.com/apache/datafusion-python/pull/700) (Michael-J-Ward) -- Website fixes [#702](https://github.com/apache/datafusion-python/pull/702) (Michael-J-Ward) - -## [37.1.0](https://github.com/apache/datafusion-python/tree/37.1.0) (2024-05-08) - -**Implemented enhancements:** - -- feat: add execute_stream and execute_stream_partitioned [#610](https://github.com/apache/datafusion-python/pull/610) (mesejo) - -**Documentation updates:** - -- docs: update docs CI to install python-311 requirements [#661](https://github.com/apache/datafusion-python/pull/661) (Michael-J-Ward) - -**Merged pull requests:** - -- Switch to Ruff for Python linting [#529](https://github.com/apache/datafusion-python/pull/529) (andygrove) -- Remove sql-on-pandas/polars/cudf examples [#602](https://github.com/apache/datafusion-python/pull/602) (andygrove) -- build(deps): bump object_store from 0.9.0 to 0.9.1 [#611](https://github.com/apache/datafusion-python/pull/611) (dependabot[bot]) -- More missing array funcs [#605](https://github.com/apache/datafusion-python/pull/605) (judahrand) -- feat: add execute_stream and execute_stream_partitioned [#610](https://github.com/apache/datafusion-python/pull/610) (mesejo) -- build(deps): bump uuid from 1.7.0 to 1.8.0 [#615](https://github.com/apache/datafusion-python/pull/615) (dependabot[bot]) -- Bind SQLOptions and relative ctx method #567 [#588](https://github.com/apache/datafusion-python/pull/588) (giacomorebecchi) -- bugfix: no panic on empty table [#613](https://github.com/apache/datafusion-python/pull/613) (mesejo) -- Expose `register_listing_table` [#618](https://github.com/apache/datafusion-python/pull/618) (henrifroese) -- Expose unnest feature [#641](https://github.com/apache/datafusion-python/pull/641) (timsaucer) -- Update domain names and paths in asf yaml [#643](https://github.com/apache/datafusion-python/pull/643) (andygrove) -- use python 3.11 to publish docs [#645](https://github.com/apache/datafusion-python/pull/645) (andygrove) -- docs: update docs CI to install python-311 requirements [#661](https://github.com/apache/datafusion-python/pull/661) (Michael-J-Ward) -- Upgrade Datafusion to v37.1.0 [#669](https://github.com/apache/datafusion-python/pull/669) (Michael-J-Ward) - -## [36.0.0](https://github.com/apache/datafusion-python/tree/36.0.0) (2024-03-02) - -**Implemented enhancements:** - -- feat: Add `flatten` array function [#562](https://github.com/apache/datafusion-python/pull/562) (mobley-trent) - -**Documentation updates:** - -- docs: Add ASF attribution [#580](https://github.com/apache/datafusion-python/pull/580) (simicd) - -**Merged pull requests:** - -- Allow PyDataFrame to be used from other projects [#582](https://github.com/apache/datafusion-python/pull/582) (andygrove) -- docs: Add ASF attribution [#580](https://github.com/apache/datafusion-python/pull/580) (simicd) -- Add array functions [#560](https://github.com/apache/datafusion-python/pull/560) (ongchi) -- feat: Add `flatten` array function [#562](https://github.com/apache/datafusion-python/pull/562) (mobley-trent) - -## [35.0.0](https://github.com/apache/datafusion-python/tree/35.0.0) (2024-01-20) - -**Merged pull requests:** - -- build(deps): bump syn from 2.0.41 to 2.0.43 [#559](https://github.com/apache/datafusion-python/pull/559) (dependabot[bot]) -- build(deps): bump tokio from 1.35.0 to 1.35.1 [#558](https://github.com/apache/datafusion-python/pull/558) (dependabot[bot]) -- build(deps): bump async-trait from 0.1.74 to 0.1.77 [#556](https://github.com/apache/datafusion-python/pull/556) (dependabot[bot]) -- build(deps): bump pyo3 from 0.20.0 to 0.20.2 [#557](https://github.com/apache/datafusion-python/pull/557) (dependabot[bot]) - -## [34.0.0](https://github.com/apache/datafusion-python/tree/34.0.0) (2023-12-28) - -**Merged pull requests:** - -- Adjust visibility of crate private members & Functions [#537](https://github.com/apache/datafusion-python/pull/537) (jdye64) -- Update json.rst [#538](https://github.com/apache/datafusion-python/pull/538) (ray-andrew) -- Enable mimalloc local_dynamic_tls feature [#540](https://github.com/apache/datafusion-python/pull/540) (jdye64) -- Enable substrait feature to be built by default in CI, for nightlies … [#544](https://github.com/apache/datafusion-python/pull/544) (jdye64) - -## [33.0.0](https://github.com/apache/datafusion-python/tree/33.0.0) (2023-11-16) - -**Merged pull requests:** - -- First pass at getting architectured builds working [#350](https://github.com/apache/datafusion-python/pull/350) (charlesbluca) -- Remove libprotobuf dep [#527](https://github.com/apache/datafusion-python/pull/527) (jdye64) - -## [32.0.0](https://github.com/apache/datafusion-python/tree/32.0.0) (2023-10-21) - -**Implemented enhancements:** - -- feat: expose PyWindowFrame [#509](https://github.com/apache/datafusion-python/pull/509) (dlovell) -- add Binary String Functions;encode,decode [#494](https://github.com/apache/datafusion-python/pull/494) (jiangzhx) -- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/datafusion-python/pull/496) (jiangzhx) -- add first_value last_value [#498](https://github.com/apache/datafusion-python/pull/498) (jiangzhx) -- add regr\_\* functions [#499](https://github.com/apache/datafusion-python/pull/499) (jiangzhx) -- Add random missing bindings [#522](https://github.com/apache/datafusion-python/pull/522) (jdye64) -- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/datafusion-python/pull/519) (jdye64) -- Add support for window function bindings [#521](https://github.com/apache/datafusion-python/pull/521) (jdye64) - -**Merged pull requests:** - -- Prepare 31.0.0 release [#500](https://github.com/apache/datafusion-python/pull/500) (andygrove) -- Improve release process documentation [#505](https://github.com/apache/datafusion-python/pull/505) (andygrove) -- add Binary String Functions;encode,decode [#494](https://github.com/apache/datafusion-python/pull/494) (jiangzhx) -- build(deps): bump mimalloc from 0.1.38 to 0.1.39 [#502](https://github.com/apache/datafusion-python/pull/502) (dependabot[bot]) -- build(deps): bump syn from 2.0.32 to 2.0.35 [#503](https://github.com/apache/datafusion-python/pull/503) (dependabot[bot]) -- build(deps): bump syn from 2.0.35 to 2.0.37 [#506](https://github.com/apache/datafusion-python/pull/506) (dependabot[bot]) -- Use latest DataFusion [#511](https://github.com/apache/datafusion-python/pull/511) (andygrove) -- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/datafusion-python/pull/496) (jiangzhx) -- use DataFusion 32 [#515](https://github.com/apache/datafusion-python/pull/515) (andygrove) -- add first_value last_value [#498](https://github.com/apache/datafusion-python/pull/498) (jiangzhx) -- build(deps): bump regex-syntax from 0.7.5 to 0.8.1 [#517](https://github.com/apache/datafusion-python/pull/517) (dependabot[bot]) -- build(deps): bump pyo3-build-config from 0.19.2 to 0.20.0 [#516](https://github.com/apache/datafusion-python/pull/516) (dependabot[bot]) -- add regr\_\* functions [#499](https://github.com/apache/datafusion-python/pull/499) (jiangzhx) -- Add random missing bindings [#522](https://github.com/apache/datafusion-python/pull/522) (jdye64) -- build(deps): bump rustix from 0.38.18 to 0.38.19 [#523](https://github.com/apache/datafusion-python/pull/523) (dependabot[bot]) -- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/datafusion-python/pull/519) (jdye64) -- Add support for window function bindings [#521](https://github.com/apache/datafusion-python/pull/521) (jdye64) -- Small clippy fix [#524](https://github.com/apache/datafusion-python/pull/524) (andygrove) - -## [31.0.0](https://github.com/apache/datafusion-python/tree/31.0.0) (2023-09-12) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/28.0.0...31.0.0) - -**Implemented enhancements:** - -- feat: add case function (#447) [#448](https://github.com/apache/datafusion-python/pull/448) (mesejo) -- feat: add compression options [#456](https://github.com/apache/datafusion-python/pull/456) (mesejo) -- feat: add register_json [#458](https://github.com/apache/datafusion-python/pull/458) (mesejo) -- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/datafusion-python/pull/459) (mesejo) -- feat: add example of reading parquet from s3 [#460](https://github.com/apache/datafusion-python/pull/460) (mesejo) -- feat: add register_avro and read_table [#461](https://github.com/apache/datafusion-python/pull/461) (mesejo) -- feat: add missing scalar math functions [#465](https://github.com/apache/datafusion-python/pull/465) (mesejo) - -**Documentation updates:** - -- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/datafusion-python/pull/455) (mesejo) - -**Merged pull requests:** - -- Build Linux aarch64 wheel [#443](https://github.com/apache/datafusion-python/pull/443) (gokselk) -- feat: add case function (#447) [#448](https://github.com/apache/datafusion-python/pull/448) (mesejo) -- enhancement(docs): Add user guide (#432) [#445](https://github.com/apache/datafusion-python/pull/445) (mesejo) -- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/datafusion-python/pull/455) (mesejo) -- feat: add compression options [#456](https://github.com/apache/datafusion-python/pull/456) (mesejo) -- Upgrade to DF 28.0.0-rc1 [#457](https://github.com/apache/datafusion-python/pull/457) (andygrove) -- feat: add register_json [#458](https://github.com/apache/datafusion-python/pull/458) (mesejo) -- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/datafusion-python/pull/459) (mesejo) -- feat: add example of reading parquet from s3 [#460](https://github.com/apache/datafusion-python/pull/460) (mesejo) -- feat: add register_avro and read_table [#461](https://github.com/apache/datafusion-python/pull/461) (mesejo) -- feat: add missing scalar math functions [#465](https://github.com/apache/datafusion-python/pull/465) (mesejo) -- build(deps): bump arduino/setup-protoc from 1 to 2 [#452](https://github.com/apache/datafusion-python/pull/452) (dependabot[bot]) -- Revert "build(deps): bump arduino/setup-protoc from 1 to 2 (#452)" [#474](https://github.com/apache/datafusion-python/pull/474) (viirya) -- Minor: fix wrongly copied function description [#497](https://github.com/apache/datafusion-python/pull/497) (viirya) -- Upgrade to Datafusion 31.0.0 [#491](https://github.com/apache/datafusion-python/pull/491) (judahrand) -- Add `isnan` and `iszero` [#495](https://github.com/apache/datafusion-python/pull/495) (judahrand) - -## 30.0.0 - -- Skipped due to a breaking change in DataFusion - -## 29.0.0 - -- Skipped - -## [28.0.0](https://github.com/apache/datafusion-python/tree/28.0.0) (2023-07-25) - -**Implemented enhancements:** - -- feat: expose offset in python API [#437](https://github.com/apache/datafusion-python/pull/437) (cpcloud) - -**Merged pull requests:** - -- File based input utils [#433](https://github.com/apache/datafusion-python/pull/433) (jdye64) -- Upgrade to 28.0.0-rc1 [#434](https://github.com/apache/datafusion-python/pull/434) (andygrove) -- Introduces utility for obtaining SqlTable information from a file like location [#398](https://github.com/apache/datafusion-python/pull/398) (jdye64) -- feat: expose offset in python API [#437](https://github.com/apache/datafusion-python/pull/437) (cpcloud) -- Use DataFusion 28 [#439](https://github.com/apache/datafusion-python/pull/439) (andygrove) - -## [27.0.0](https://github.com/apache/datafusion-python/tree/27.0.0) (2023-07-03) - -**Merged pull requests:** - -- LogicalPlan.to_variant() make public [#412](https://github.com/apache/datafusion-python/pull/412) (jdye64) -- Prepare 27.0.0 release [#423](https://github.com/apache/datafusion-python/pull/423) (andygrove) - -## [26.0.0](https://github.com/apache/datafusion-python/tree/26.0.0) (2023-06-11) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/25.0.0...26.0.0) - -**Merged pull requests:** - -- Add Expr::Case when_then_else support to rex_call_operands function [#388](https://github.com/apache/datafusion-python/pull/388) (jdye64) -- Introduce BaseSessionContext abstract class [#390](https://github.com/apache/datafusion-python/pull/390) (jdye64) -- CRUD Schema support for `BaseSessionContext` [#392](https://github.com/apache/datafusion-python/pull/392) (jdye64) -- CRUD Table support for `BaseSessionContext` [#394](https://github.com/apache/datafusion-python/pull/394) (jdye64) - -## [25.0.0](https://github.com/apache/datafusion-python/tree/25.0.0) (2023-05-23) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/24.0.0...25.0.0) - -**Merged pull requests:** - -- Prepare 24.0.0 Release [#376](https://github.com/apache/datafusion-python/pull/376) (andygrove) -- build(deps): bump uuid from 1.3.1 to 1.3.2 [#359](https://github.com/apache/datafusion-python/pull/359) (dependabot[bot]) -- build(deps): bump mimalloc from 0.1.36 to 0.1.37 [#361](https://github.com/apache/datafusion-python/pull/361) (dependabot[bot]) -- build(deps): bump regex-syntax from 0.6.29 to 0.7.1 [#334](https://github.com/apache/datafusion-python/pull/334) (dependabot[bot]) -- upgrade maturin to 0.15.1 [#379](https://github.com/apache/datafusion-python/pull/379) (Jimexist) -- Expand Expr to include RexType basic support [#378](https://github.com/apache/datafusion-python/pull/378) (jdye64) -- Add Python script for generating changelog [#383](https://github.com/apache/datafusion-python/pull/383) (andygrove) - -## [24.0.0](https://github.com/apache/datafusion-python/tree/24.0.0) (2023-05-09) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/23.0.0...24.0.0) - -**Documentation updates:** - -- Fix link to user guide [#354](https://github.com/apache/datafusion-python/pull/354) (andygrove) - -**Merged pull requests:** - -- Add interface to serialize Substrait plans to Python Bytes. [#344](https://github.com/apache/datafusion-python/pull/344) (kylebrooks-8451) -- Add partition_count property to ExecutionPlan. [#346](https://github.com/apache/datafusion-python/pull/346) (kylebrooks-8451) -- Remove unsendable from all Rust pyclass types. [#348](https://github.com/apache/datafusion-python/pull/348) (kylebrooks-8451) -- Fix link to user guide [#354](https://github.com/apache/datafusion-python/pull/354) (andygrove) -- Fix SessionContext execute. [#353](https://github.com/apache/datafusion-python/pull/353) (kylebrooks-8451) -- Pub mod expr in lib.rs [#357](https://github.com/apache/datafusion-python/pull/357) (jdye64) -- Add benchmark derived from TPC-H [#355](https://github.com/apache/datafusion-python/pull/355) (andygrove) -- Add db-benchmark [#365](https://github.com/apache/datafusion-python/pull/365) (andygrove) -- First pass of documentation in mdBook [#364](https://github.com/apache/datafusion-python/pull/364) (MrPowers) -- Add 'pub' and '#[pyo3(get, set)]' to DataTypeMap [#371](https://github.com/apache/datafusion-python/pull/371) (jdye64) -- Fix db-benchmark [#369](https://github.com/apache/datafusion-python/pull/369) (andygrove) -- Docs explaining how to view query plans [#373](https://github.com/apache/datafusion-python/pull/373) (andygrove) -- Improve db-benchmark [#372](https://github.com/apache/datafusion-python/pull/372) (andygrove) -- Make expr member of PyExpr public [#375](https://github.com/apache/datafusion-python/pull/375) (jdye64) - -## [23.0.0](https://github.com/apache/datafusion-python/tree/23.0.0) (2023-04-23) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/22.0.0...23.0.0) - -**Merged pull requests:** - -- Improve API docs, README, and examples for configuring context [#321](https://github.com/apache/datafusion-python/pull/321) (andygrove) -- Osx build linker args [#330](https://github.com/apache/datafusion-python/pull/330) (jdye64) -- Add requirements file for python 3.11 [#332](https://github.com/apache/datafusion-python/pull/332) (r4ntix) -- mac arm64 build [#338](https://github.com/apache/datafusion-python/pull/338) (andygrove) -- Add conda.yaml baseline workflow file [#281](https://github.com/apache/datafusion-python/pull/281) (jdye64) -- Prepare for 23.0.0 release [#335](https://github.com/apache/datafusion-python/pull/335) (andygrove) -- Reuse the Tokio Runtime [#341](https://github.com/apache/datafusion-python/pull/341) (kylebrooks-8451) - -## [22.0.0](https://github.com/apache/datafusion-python/tree/22.0.0) (2023-04-10) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/21.0.0...22.0.0) - -**Merged pull requests:** - -- Fix invalid build yaml [#308](https://github.com/apache/datafusion-python/pull/308) (andygrove) -- Try fix release build [#309](https://github.com/apache/datafusion-python/pull/309) (andygrove) -- Fix release build [#310](https://github.com/apache/datafusion-python/pull/310) (andygrove) -- Enable datafusion-substrait protoc feature, to remove compile-time dependency on protoc [#312](https://github.com/apache/datafusion-python/pull/312) (andygrove) -- Fix Mac/Win release builds in CI [#313](https://github.com/apache/datafusion-python/pull/313) (andygrove) -- install protoc in docs workflow [#314](https://github.com/apache/datafusion-python/pull/314) (andygrove) -- Fix documentation generation in CI [#315](https://github.com/apache/datafusion-python/pull/315) (andygrove) -- Source wheel fix [#319](https://github.com/apache/datafusion-python/pull/319) (andygrove) - -## [21.0.0](https://github.com/apache/datafusion-python/tree/21.0.0) (2023-03-30) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/20.0.0...21.0.0) - -**Merged pull requests:** - -- minor: Fix minor warning on unused import [#289](https://github.com/apache/datafusion-python/pull/289) (viirya) -- feature: Implement `describe()` method [#293](https://github.com/apache/datafusion-python/pull/293) (simicd) -- fix: Printed results not visible in debugger & notebooks [#296](https://github.com/apache/datafusion-python/pull/296) (simicd) -- add package.include and remove wildcard dependency [#295](https://github.com/apache/datafusion-python/pull/295) (andygrove) -- Update main branch name in docs workflow [#303](https://github.com/apache/datafusion-python/pull/303) (andygrove) -- Upgrade to DF 21 [#301](https://github.com/apache/datafusion-python/pull/301) (andygrove) - -## [20.0.0](https://github.com/apache/datafusion-python/tree/20.0.0) (2023-03-17) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.8.0...20.0.0) - -**Implemented enhancements:** - -- Empty relation bindings [#208](https://github.com/apache/datafusion-python/pull/208) (jdye64) -- wrap display_name and canonical_name functions [#214](https://github.com/apache/datafusion-python/pull/214) (jdye64) -- Add PyAlias bindings [#216](https://github.com/apache/datafusion-python/pull/216) (jdye64) -- Add bindings for scalar_variable [#218](https://github.com/apache/datafusion-python/pull/218) (jdye64) -- Bindings for LIKE type expressions [#220](https://github.com/apache/datafusion-python/pull/220) (jdye64) -- Bool expr bindings [#223](https://github.com/apache/datafusion-python/pull/223) (jdye64) -- Between bindings [#229](https://github.com/apache/datafusion-python/pull/229) (jdye64) -- Add bindings for GetIndexedField [#227](https://github.com/apache/datafusion-python/pull/227) (jdye64) -- Add bindings for case, cast, and trycast [#232](https://github.com/apache/datafusion-python/pull/232) (jdye64) -- add remaining expr bindings [#233](https://github.com/apache/datafusion-python/pull/233) (jdye64) -- feature: Additional export methods [#236](https://github.com/apache/datafusion-python/pull/236) (simicd) -- Add Python wrapper for LogicalPlan::Union [#240](https://github.com/apache/datafusion-python/pull/240) (iajoiner) -- feature: Create dataframe from pandas, polars, dictionary, list or pyarrow Table [#242](https://github.com/apache/datafusion-python/pull/242) (simicd) -- Add Python wrappers for `LogicalPlan::Join` and `LogicalPlan::CrossJoin` [#246](https://github.com/apache/datafusion-python/pull/246) (iajoiner) -- feature: Set table name from ctx functions [#260](https://github.com/apache/datafusion-python/pull/260) (simicd) -- Explain bindings [#264](https://github.com/apache/datafusion-python/pull/264) (jdye64) -- Extension bindings [#266](https://github.com/apache/datafusion-python/pull/266) (jdye64) -- Subquery alias bindings [#269](https://github.com/apache/datafusion-python/pull/269) (jdye64) -- Create memory table [#271](https://github.com/apache/datafusion-python/pull/271) (jdye64) -- Create view bindings [#273](https://github.com/apache/datafusion-python/pull/273) (jdye64) -- Re-export Datafusion dependencies [#277](https://github.com/apache/datafusion-python/pull/277) (jdye64) -- Distinct bindings [#275](https://github.com/apache/datafusion-python/pull/275) (jdye64) -- Drop table bindings [#283](https://github.com/apache/datafusion-python/pull/283) (jdye64) -- Bindings for LogicalPlan::Repartition [#285](https://github.com/apache/datafusion-python/pull/285) (jdye64) -- Expand Rust return type support for Arrow DataTypes in ScalarValue [#287](https://github.com/apache/datafusion-python/pull/287) (jdye64) - -**Documentation updates:** - -- docs: Example of calling Python UDF & UDAF in SQL [#258](https://github.com/apache/datafusion-python/pull/258) (simicd) - -**Merged pull requests:** - -- Minor docs updates [#210](https://github.com/apache/datafusion-python/pull/210) (andygrove) -- Empty relation bindings [#208](https://github.com/apache/datafusion-python/pull/208) (jdye64) -- wrap display_name and canonical_name functions [#214](https://github.com/apache/datafusion-python/pull/214) (jdye64) -- Add PyAlias bindings [#216](https://github.com/apache/datafusion-python/pull/216) (jdye64) -- Add bindings for scalar_variable [#218](https://github.com/apache/datafusion-python/pull/218) (jdye64) -- Bindings for LIKE type expressions [#220](https://github.com/apache/datafusion-python/pull/220) (jdye64) -- Bool expr bindings [#223](https://github.com/apache/datafusion-python/pull/223) (jdye64) -- Between bindings [#229](https://github.com/apache/datafusion-python/pull/229) (jdye64) -- Add bindings for GetIndexedField [#227](https://github.com/apache/datafusion-python/pull/227) (jdye64) -- Add bindings for case, cast, and trycast [#232](https://github.com/apache/datafusion-python/pull/232) (jdye64) -- add remaining expr bindings [#233](https://github.com/apache/datafusion-python/pull/233) (jdye64) -- Pre-commit hooks [#228](https://github.com/apache/datafusion-python/pull/228) (jdye64) -- Implement new release process [#149](https://github.com/apache/datafusion-python/pull/149) (andygrove) -- feature: Additional export methods [#236](https://github.com/apache/datafusion-python/pull/236) (simicd) -- Add Python wrapper for LogicalPlan::Union [#240](https://github.com/apache/datafusion-python/pull/240) (iajoiner) -- feature: Create dataframe from pandas, polars, dictionary, list or pyarrow Table [#242](https://github.com/apache/datafusion-python/pull/242) (simicd) -- Fix release instructions [#238](https://github.com/apache/datafusion-python/pull/238) (andygrove) -- Add Python wrappers for `LogicalPlan::Join` and `LogicalPlan::CrossJoin` [#246](https://github.com/apache/datafusion-python/pull/246) (iajoiner) -- docs: Example of calling Python UDF & UDAF in SQL [#258](https://github.com/apache/datafusion-python/pull/258) (simicd) -- feature: Set table name from ctx functions [#260](https://github.com/apache/datafusion-python/pull/260) (simicd) -- Upgrade to DataFusion 19 [#262](https://github.com/apache/datafusion-python/pull/262) (andygrove) -- Explain bindings [#264](https://github.com/apache/datafusion-python/pull/264) (jdye64) -- Extension bindings [#266](https://github.com/apache/datafusion-python/pull/266) (jdye64) -- Subquery alias bindings [#269](https://github.com/apache/datafusion-python/pull/269) (jdye64) -- Create memory table [#271](https://github.com/apache/datafusion-python/pull/271) (jdye64) -- Create view bindings [#273](https://github.com/apache/datafusion-python/pull/273) (jdye64) -- Re-export Datafusion dependencies [#277](https://github.com/apache/datafusion-python/pull/277) (jdye64) -- Distinct bindings [#275](https://github.com/apache/datafusion-python/pull/275) (jdye64) -- build(deps): bump actions/checkout from 2 to 3 [#244](https://github.com/apache/datafusion-python/pull/244) (dependabot[bot]) -- build(deps): bump actions/upload-artifact from 2 to 3 [#245](https://github.com/apache/datafusion-python/pull/245) (dependabot[bot]) -- build(deps): bump actions/download-artifact from 2 to 3 [#243](https://github.com/apache/datafusion-python/pull/243) (dependabot[bot]) -- Use DataFusion 20 [#278](https://github.com/apache/datafusion-python/pull/278) (andygrove) -- Drop table bindings [#283](https://github.com/apache/datafusion-python/pull/283) (jdye64) -- Bindings for LogicalPlan::Repartition [#285](https://github.com/apache/datafusion-python/pull/285) (jdye64) -- Expand Rust return type support for Arrow DataTypes in ScalarValue [#287](https://github.com/apache/datafusion-python/pull/287) (jdye64) - -## [0.8.0](https://github.com/apache/datafusion-python/tree/0.8.0) (2023-02-22) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.8.0-rc1...0.8.0) - -**Implemented enhancements:** - -- Add support for cuDF physical execution engine [\#202](https://github.com/apache/datafusion-python/issues/202) -- Make it easier to create a Pandas dataframe from DataFusion query results [\#139](https://github.com/apache/datafusion-python/issues/139) - -**Fixed bugs:** - -- Build error: could not compile `thiserror` due to 2 previous errors [\#69](https://github.com/apache/datafusion-python/issues/69) - -**Closed issues:** - -- Integrate with the new `object_store` crate [\#22](https://github.com/apache/datafusion-python/issues/22) - -**Merged pull requests:** - -- Update README in preparation for 0.8 release [\#206](https://github.com/apache/datafusion-python/pull/206) ([andygrove](https://github.com/andygrove)) -- Add support for cudf as a physical execution engine [\#205](https://github.com/apache/datafusion-python/pull/205) ([jdye64](https://github.com/jdye64)) -- Run `maturin develop` instead of `cargo build` in verification script [\#200](https://github.com/apache/datafusion-python/pull/200) ([andygrove](https://github.com/andygrove)) -- Add tests for recently added functionality [\#199](https://github.com/apache/datafusion-python/pull/199) ([andygrove](https://github.com/andygrove)) -- Implement `to_pandas()` [\#197](https://github.com/apache/datafusion-python/pull/197) ([simicd](https://github.com/simicd)) -- Add Python wrapper for LogicalPlan::Sort [\#196](https://github.com/apache/datafusion-python/pull/196) ([andygrove](https://github.com/andygrove)) -- Add Python wrapper for LogicalPlan::Aggregate [\#195](https://github.com/apache/datafusion-python/pull/195) ([andygrove](https://github.com/andygrove)) -- Add Python wrapper for LogicalPlan::Limit [\#193](https://github.com/apache/datafusion-python/pull/193) ([andygrove](https://github.com/andygrove)) -- Add Python wrapper for LogicalPlan::Filter [\#192](https://github.com/apache/datafusion-python/pull/192) ([andygrove](https://github.com/andygrove)) -- Add experimental support for executing SQL with Polars and Pandas [\#190](https://github.com/apache/datafusion-python/pull/190) ([andygrove](https://github.com/andygrove)) -- Update changelog for 0.8 release [\#188](https://github.com/apache/datafusion-python/pull/188) ([andygrove](https://github.com/andygrove)) -- Add ability to execute ExecutionPlan and get a stream of RecordBatch [\#186](https://github.com/apache/datafusion-python/pull/186) ([andygrove](https://github.com/andygrove)) -- Dffield bindings [\#185](https://github.com/apache/datafusion-python/pull/185) ([jdye64](https://github.com/jdye64)) -- Add bindings for DFSchema [\#183](https://github.com/apache/datafusion-python/pull/183) ([jdye64](https://github.com/jdye64)) -- test: Window functions [\#182](https://github.com/apache/datafusion-python/pull/182) ([simicd](https://github.com/simicd)) -- Add bindings for Projection [\#180](https://github.com/apache/datafusion-python/pull/180) ([jdye64](https://github.com/jdye64)) -- Table scan bindings [\#178](https://github.com/apache/datafusion-python/pull/178) ([jdye64](https://github.com/jdye64)) -- Make session configurable [\#176](https://github.com/apache/datafusion-python/pull/176) ([andygrove](https://github.com/andygrove)) -- Upgrade to DataFusion 18.0.0 [\#175](https://github.com/apache/datafusion-python/pull/175) ([andygrove](https://github.com/andygrove)) -- Use latest DataFusion rev in preparation for DF 18 release [\#174](https://github.com/apache/datafusion-python/pull/174) ([andygrove](https://github.com/andygrove)) -- Arrow type bindings [\#173](https://github.com/apache/datafusion-python/pull/173) ([jdye64](https://github.com/jdye64)) -- Pyo3 bump [\#171](https://github.com/apache/datafusion-python/pull/171) ([jdye64](https://github.com/jdye64)) -- feature: Add additional aggregation functions [\#170](https://github.com/apache/datafusion-python/pull/170) ([simicd](https://github.com/simicd)) -- Make from_substrait_plan return DataFrame instead of LogicalPlan [\#164](https://github.com/apache/datafusion-python/pull/164) ([andygrove](https://github.com/andygrove)) -- feature: Implement count method [\#163](https://github.com/apache/datafusion-python/pull/163) ([simicd](https://github.com/simicd)) -- CI Fixes [\#162](https://github.com/apache/datafusion-python/pull/162) ([jdye64](https://github.com/jdye64)) -- Upgrade to DataFusion 17 [\#160](https://github.com/apache/datafusion-python/pull/160) ([andygrove](https://github.com/andygrove)) -- feature: Improve string representation of datafusion classes [\#159](https://github.com/apache/datafusion-python/pull/159) ([simicd](https://github.com/simicd)) -- Make PyExecutionPlan.plan public [\#156](https://github.com/apache/datafusion-python/pull/156) ([andygrove](https://github.com/andygrove)) -- Expose methods on logical and execution plans [\#155](https://github.com/apache/datafusion-python/pull/155) ([andygrove](https://github.com/andygrove)) -- Fix clippy for new Rust version [\#154](https://github.com/apache/datafusion-python/pull/154) ([andygrove](https://github.com/andygrove)) -- Add DataFrame methods for accessing plans [\#153](https://github.com/apache/datafusion-python/pull/153) ([andygrove](https://github.com/andygrove)) -- Use DataFusion rev 5238e8c97f998b4d2cb9fab85fb182f325a1a7fb [\#150](https://github.com/apache/datafusion-python/pull/150) ([andygrove](https://github.com/andygrove)) -- build\(deps\): bump async-trait from 0.1.61 to 0.1.62 [\#148](https://github.com/apache/datafusion-python/pull/148) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Rename default branch from master to main [\#147](https://github.com/apache/datafusion-python/pull/147) ([andygrove](https://github.com/andygrove)) -- Substrait bindings [\#145](https://github.com/apache/datafusion-python/pull/145) ([jdye64](https://github.com/jdye64)) -- build\(deps\): bump uuid from 0.8.2 to 1.2.2 [\#143](https://github.com/apache/datafusion-python/pull/143) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Prepare for 0.8.0 release [\#141](https://github.com/apache/datafusion-python/pull/141) ([andygrove](https://github.com/andygrove)) -- Improve README and add more examples [\#137](https://github.com/apache/datafusion-python/pull/137) ([andygrove](https://github.com/andygrove)) -- test: Expand tests for built-in functions [\#129](https://github.com/apache/datafusion-python/pull/129) ([simicd](https://github.com/simicd)) -- build\(deps\): bump object_store from 0.5.2 to 0.5.3 [\#126](https://github.com/apache/datafusion-python/pull/126) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump mimalloc from 0.1.32 to 0.1.34 [\#125](https://github.com/apache/datafusion-python/pull/125) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Introduce conda directory containing datafusion-dev.yaml conda enviro… [\#124](https://github.com/apache/datafusion-python/pull/124) ([jdye64](https://github.com/jdye64)) -- build\(deps\): bump bzip2 from 0.4.3 to 0.4.4 [\#121](https://github.com/apache/datafusion-python/pull/121) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump tokio from 1.23.0 to 1.24.1 [\#119](https://github.com/apache/datafusion-python/pull/119) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump async-trait from 0.1.60 to 0.1.61 [\#118](https://github.com/apache/datafusion-python/pull/118) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Upgrade to DataFusion 16.0.0 [\#115](https://github.com/apache/datafusion-python/pull/115) ([andygrove](https://github.com/andygrove)) -- Bump async-trait from 0.1.57 to 0.1.60 [\#114](https://github.com/apache/datafusion-python/pull/114) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump object_store from 0.5.1 to 0.5.2 [\#112](https://github.com/apache/datafusion-python/pull/112) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump tokio from 1.21.2 to 1.23.0 [\#109](https://github.com/apache/datafusion-python/pull/109) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Add entries for publishing production \(asf-site\) and staging docs [\#107](https://github.com/apache/datafusion-python/pull/107) ([martin-g](https://github.com/martin-g)) -- Add a workflow that builds the docs and deploys them at staged or production [\#104](https://github.com/apache/datafusion-python/pull/104) ([martin-g](https://github.com/martin-g)) -- Upgrade to DataFusion 15.0.0 [\#103](https://github.com/apache/datafusion-python/pull/103) ([andygrove](https://github.com/andygrove)) -- build\(deps\): bump futures from 0.3.24 to 0.3.25 [\#102](https://github.com/apache/datafusion-python/pull/102) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump pyo3 from 0.17.2 to 0.17.3 [\#101](https://github.com/apache/datafusion-python/pull/101) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump mimalloc from 0.1.30 to 0.1.32 [\#98](https://github.com/apache/datafusion-python/pull/98) ([dependabot[bot]](https://github.com/apps/dependabot)) -- build\(deps\): bump rand from 0.7.3 to 0.8.5 [\#97](https://github.com/apache/datafusion-python/pull/97) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Fix GitHub actions warnings [\#95](https://github.com/apache/datafusion-python/pull/95) ([martin-g](https://github.com/martin-g)) -- Fixes \#81 - Add CI workflow for source distribution [\#93](https://github.com/apache/datafusion-python/pull/93) ([martin-g](https://github.com/martin-g)) -- post-release updates [\#91](https://github.com/apache/datafusion-python/pull/91) ([andygrove](https://github.com/andygrove)) -- Build for manylinux 2014 [\#88](https://github.com/apache/datafusion-python/pull/88) ([martin-g](https://github.com/martin-g)) -- update release readme tag [\#86](https://github.com/apache/datafusion-python/pull/86) ([Jimexist](https://github.com/Jimexist)) -- Upgrade Maturin to 0.14.2 [\#85](https://github.com/apache/datafusion-python/pull/85) ([martin-g](https://github.com/martin-g)) -- Update release instructions [\#83](https://github.com/apache/datafusion-python/pull/83) ([andygrove](https://github.com/andygrove)) -- \[Functions\] - Add python function binding to `functions` [\#73](https://github.com/apache/datafusion-python/pull/73) ([francis-du](https://github.com/francis-du)) - -## [0.8.0-rc1](https://github.com/apache/datafusion-python/tree/0.8.0-rc1) (2023-02-17) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.7.0-rc2...0.8.0-rc1) - -**Implemented enhancements:** - -- Add bindings for datafusion_common::DFField [\#184](https://github.com/apache/datafusion-python/issues/184) -- Add bindings for DFSchema/DFSchemaRef [\#181](https://github.com/apache/datafusion-python/issues/181) -- Add bindings for datafusion_expr Projection [\#179](https://github.com/apache/datafusion-python/issues/179) -- Add bindings for `TableScan` struct from `datafusion_expr::TableScan` [\#177](https://github.com/apache/datafusion-python/issues/177) -- Add a "mapping" struct for types [\#172](https://github.com/apache/datafusion-python/issues/172) -- Improve string representation of datafusion classes \(dataframe, context, expression, ...\) [\#158](https://github.com/apache/datafusion-python/issues/158) -- Add DataFrame count method [\#151](https://github.com/apache/datafusion-python/issues/151) -- \[REQUEST\] Github Actions Improvements [\#146](https://github.com/apache/datafusion-python/issues/146) -- Change default branch name from master to main [\#144](https://github.com/apache/datafusion-python/issues/144) -- Bump pyo3 to 0.18.0 [\#140](https://github.com/apache/datafusion-python/issues/140) -- Add script for Python linting [\#134](https://github.com/apache/datafusion-python/issues/134) -- Add Python bindings for substrait module [\#132](https://github.com/apache/datafusion-python/issues/132) -- Expand unit tests for built-in functions [\#128](https://github.com/apache/datafusion-python/issues/128) -- support creating arrow-datafusion-python conda environment [\#122](https://github.com/apache/datafusion-python/issues/122) -- Build Python source distribution in GitHub workflow [\#81](https://github.com/apache/datafusion-python/issues/81) -- EPIC: Add all functions to python binding `functions` [\#72](https://github.com/apache/datafusion-python/issues/72) - -**Fixed bugs:** - -- Build is broken [\#161](https://github.com/apache/datafusion-python/issues/161) -- Out of memory when sorting [\#157](https://github.com/apache/datafusion-python/issues/157) -- window_lead test appears to be non-deterministic [\#135](https://github.com/apache/datafusion-python/issues/135) -- Reading csv does not work [\#130](https://github.com/apache/datafusion-python/issues/130) -- Github actions produce a lot of warnings [\#94](https://github.com/apache/datafusion-python/issues/94) -- ASF source release tarball has wrong directory name [\#90](https://github.com/apache/datafusion-python/issues/90) -- Python Release Build failing after upgrading to maturin 14.2 [\#87](https://github.com/apache/datafusion-python/issues/87) -- Maturin build hangs on Linux ARM64 [\#84](https://github.com/apache/datafusion-python/issues/84) -- Cannot install on Mac M1 from source tarball from testpypi [\#82](https://github.com/apache/datafusion-python/issues/82) -- ImportPathMismatchError when running pytest locally [\#77](https://github.com/apache/datafusion-python/issues/77) - -**Closed issues:** - -- Publish documentation for Python bindings [\#39](https://github.com/apache/datafusion-python/issues/39) -- Add Python binding for `approx_median` [\#32](https://github.com/apache/datafusion-python/issues/32) -- Release version 0.7.0 [\#7](https://github.com/apache/datafusion-python/issues/7) - -## [0.7.0-rc2](https://github.com/apache/datafusion-python/tree/0.7.0-rc2) (2022-11-26) - -[Full Changelog](https://github.com/apache/datafusion-python/compare/0.7.0...0.7.0-rc2) - -## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) - -**Merged pull requests:** - -- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) -- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) -- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) -- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) -- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) -- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) -- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) -- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) -- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) -- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) -- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) -- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) -- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) -- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) -- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) -- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) -- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) -- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) -- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) -- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) -- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) -- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) -- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) -- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) - -## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) - -## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) - -## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) - -## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) - -**Closed issues:** - -- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) -- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) - -## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) - -**Closed issues:** - -- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) -- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) -- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) -- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) -- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) +The changelogs have now moved [here](./dev/changelog). diff --git a/Cargo.lock b/Cargo.lock index 7b57b330a..d1f291be9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611cc2ae7d2e242c457e4be7f97036b8ad9ca152b499f53faf99b1ed8fc2553f" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" [[package]] name = "android-tzdata" @@ -157,7 +157,7 @@ dependencies = [ "snap", "strum 0.25.0", "strum_macros 0.25.3", - "thiserror", + "thiserror 1.0.69", "typed-builder", "uuid", "xz2", @@ -178,9 +178,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" dependencies = [ "arrow-arith", "arrow-array", @@ -200,9 +200,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" dependencies = [ "ahash", "arrow-buffer", @@ -226,15 +226,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "num", ] [[package]] name = "arrow-buffer" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,9 +283,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" dependencies = [ "arrow-buffer", "arrow-schema", @@ -295,9 +295,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" dependencies = [ "arrow-array", "arrow-buffer", @@ -310,9 +310,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" dependencies = [ "arrow-array", "arrow-buffer", @@ -330,9 +330,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" dependencies = [ "arrow-array", "arrow-buffer", @@ -345,9 +345,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" dependencies = [ "ahash", "arrow-array", @@ -359,18 +359,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" dependencies = [ "ahash", "arrow-array", @@ -382,9 +382,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" dependencies = [ "arrow-array", "arrow-buffer", @@ -411,9 +411,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.17" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ "bzip2", "flate2", @@ -444,7 +444,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -455,7 +455,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -529,9 +529,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ "arrayref", "arrayvec", @@ -584,9 +584,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "bzip2" @@ -611,9 +611,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.37" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" +checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" dependencies = [ "jobserver", "libc", @@ -668,18 +668,18 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.51" +version = "0.1.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" dependencies = [ "cc", ] [[package]] name = "comfy-table" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "strum 0.26.3", "strum_macros 0.26.4", @@ -720,9 +720,9 @@ checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "core-foundation" -version = "0.9.4" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" dependencies = [ "core-foundation-sys", "libc", @@ -760,9 +760,9 @@ checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -809,9 +809,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -1288,7 +1288,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "42.0.0" +version = "43.0.0" dependencies = [ "arrow", "async-trait", @@ -1364,7 +1364,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -1393,12 +1393,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1425,9 +1425,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.34" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1504,7 +1504,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -1563,8 +1563,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1581,9 +1583,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", @@ -1621,9 +1623,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.1" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" [[package]] name = "heck" @@ -1697,9 +1699,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" dependencies = [ "bytes", "futures-channel", @@ -1890,7 +1892,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -1921,7 +1923,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.1", + "hashbrown 0.15.2", ] [[package]] @@ -1965,9 +1967,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jobserver" @@ -1980,10 +1982,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -2059,9 +2062,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.162" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libflate" @@ -2121,9 +2124,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "litemap" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "lock_api" @@ -2212,11 +2215,10 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi", "libc", "wasi", "windows-sys 0.52.0", @@ -2398,9 +2400,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.2.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", "arrow-array", @@ -2417,7 +2419,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "lz4_flex", "num", "num-bigint", @@ -2558,9 +2560,9 @@ checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "portable-atomic" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "ppv-lite86" @@ -2578,14 +2580,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -2617,7 +2619,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.87", + "syn 2.0.90", "tempfile", ] @@ -2631,7 +2633,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -2699,7 +2701,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -2712,7 +2714,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -2733,9 +2735,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ "bytes", "pin-project-lite", @@ -2744,26 +2746,29 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.3", "tokio", "tracing", ] [[package]] name = "quinn-proto" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", + "getrandom", "rand", "ring", "rustc-hash", "rustls", + "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.3", "tinyvec", "tracing", + "web-time", ] [[package]] @@ -2842,9 +2847,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -2956,9 +2961,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" [[package]] name = "rustc_version" @@ -2971,9 +2976,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.39" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags 2.6.0", "errno", @@ -2984,9 +2989,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.16" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "once_cell", "ring", @@ -2998,12 +3003,11 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" dependencies = [ "openssl-probe", - "rustls-pemfile", "rustls-pki-types", "schannel", "security-framework", @@ -3023,6 +3027,9 @@ name = "rustls-pki-types" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -3058,9 +3065,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -3086,7 +3093,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3097,9 +3104,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "2.11.1" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" dependencies = [ "bitflags 2.6.0", "core-foundation", @@ -3135,22 +3142,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3161,14 +3168,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -3185,7 +3192,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3269,7 +3276,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3280,9 +3287,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -3312,7 +3319,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3352,7 +3359,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3365,7 +3372,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3389,7 +3396,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.87", + "syn 2.0.90", "typify", "walkdir", ] @@ -3413,9 +3420,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.87" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -3424,9 +3431,9 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" dependencies = [ "futures-core", ] @@ -3439,7 +3446,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3463,22 +3470,42 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.68" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.3", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", ] [[package]] name = "thiserror-impl" -version = "1.0.68" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3550,7 +3577,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3585,9 +3612,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -3596,20 +3623,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] @@ -3668,7 +3695,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] @@ -3702,8 +3729,8 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.87", - "thiserror", + "syn 2.0.90", + "thiserror 1.0.69", "unicode-ident", ] @@ -3720,15 +3747,15 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.87", + "syn 2.0.90", "typify-impl", ] [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-segmentation" @@ -3738,9 +3765,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "unindent" @@ -3762,9 +3789,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.3" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -3826,9 +3853,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" dependencies = [ "cfg-if", "once_cell", @@ -3837,36 +3864,37 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3874,22 +3902,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" [[package]] name = "wasm-streams" @@ -3906,9 +3934,19 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", @@ -4089,9 +4127,9 @@ dependencies = [ [[package]] name = "yoke" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" dependencies = [ "serde", "stable_deref_trait", @@ -4101,13 +4139,13 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", "synstructure", ] @@ -4129,27 +4167,27 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] name = "zerofrom" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", "synstructure", ] @@ -4178,7 +4216,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.90", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 02707b957..703fc5a26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "42.0.0" +version = "43.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/43.0.0.md b/dev/changelog/43.0.0.md new file mode 100644 index 000000000..bbb766910 --- /dev/null +++ b/dev/changelog/43.0.0.md @@ -0,0 +1,73 @@ + + +# Apache DataFusion Python 43.0.0 Changelog + +This release consists of 26 commits from 7 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: expose `drop` method [#913](https://github.com/apache/datafusion-python/pull/913) (ion-elgreco) +- feat: expose `join_on` [#914](https://github.com/apache/datafusion-python/pull/914) (ion-elgreco) +- feat: add fill_null/nan expressions [#919](https://github.com/apache/datafusion-python/pull/919) (ion-elgreco) +- feat: add `with_columns` [#909](https://github.com/apache/datafusion-python/pull/909) (ion-elgreco) +- feat: add `cast` to DataFrame [#916](https://github.com/apache/datafusion-python/pull/916) (ion-elgreco) +- feat: add `head`, `tail` methods [#915](https://github.com/apache/datafusion-python/pull/915) (ion-elgreco) + +**Fixed bugs:** + +- fix: remove use of deprecated `make_scalar_function` [#906](https://github.com/apache/datafusion-python/pull/906) (Michael-J-Ward) +- fix: udwf example [#948](https://github.com/apache/datafusion-python/pull/948) (mesejo) + +**Other:** + +- Ts/minor updates release process [#903](https://github.com/apache/datafusion-python/pull/903) (timsaucer) +- build(deps): bump pyo3 from 0.22.3 to 0.22.4 [#910](https://github.com/apache/datafusion-python/pull/910) (dependabot[bot]) +- refactor: `from_arrow` use protocol typehints [#917](https://github.com/apache/datafusion-python/pull/917) (ion-elgreco) +- Change requires-python version in pyproject.toml [#924](https://github.com/apache/datafusion-python/pull/924) (kosiew) +- chore: deprecate `select_columns` [#911](https://github.com/apache/datafusion-python/pull/911) (ion-elgreco) +- build(deps): bump uuid from 1.10.0 to 1.11.0 [#927](https://github.com/apache/datafusion-python/pull/927) (dependabot[bot]) +- Add array_empty scalar function [#931](https://github.com/apache/datafusion-python/pull/931) (kosiew) +- add `cardinality` function to calculate total distinct elements in an array [#937](https://github.com/apache/datafusion-python/pull/937) (kosiew) +- Add empty scalar function (alias of array_empty), fix a small typo [#938](https://github.com/apache/datafusion-python/pull/938) (kosiew) +- README How to develop section now also works on Apple M1 [#940](https://github.com/apache/datafusion-python/pull/940) (drauschenbach) +- refactor: dataframe `join` params [#912](https://github.com/apache/datafusion-python/pull/912) (ion-elgreco) +- Upgrade to Datafusion 43 [#905](https://github.com/apache/datafusion-python/pull/905) (Michael-J-Ward) +- build(deps): bump tokio from 1.40.0 to 1.41.1 [#946](https://github.com/apache/datafusion-python/pull/946) (dependabot[bot]) +- Add list_cat, list_concat, list_repeat [#942](https://github.com/apache/datafusion-python/pull/942) (kosiew) +- Add foreign table providers [#921](https://github.com/apache/datafusion-python/pull/921) (timsaucer) +- Add make_list and tests for make_list, make_array [#949](https://github.com/apache/datafusion-python/pull/949) (kosiew) +- Documentation updates: simplify examples and add section on data sources [#955](https://github.com/apache/datafusion-python/pull/955) (timsaucer) +- Add datafusion.extract [#959](https://github.com/apache/datafusion-python/pull/959) (kosiew) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 9 Ion Koutsouris + 7 kosiew + 3 Tim Saucer + 3 dependabot[bot] + 2 Michael J Ward + 1 Daniel Mesejo + 1 David Rauschenbach +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/dev/changelog/pre-43.0.0.md b/dev/changelog/pre-43.0.0.md new file mode 100644 index 000000000..ae3a2348a --- /dev/null +++ b/dev/changelog/pre-43.0.0.md @@ -0,0 +1,715 @@ + + +# DataFusion Python Changelog + +## [42.0.0](https://github.com/apache/datafusion-python/tree/42.0.0) (2024-10-06) + +This release consists of 20 commits from 6 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: expose between [#868](https://github.com/apache/datafusion-python/pull/868) (mesejo) +- feat: make register_csv accept a list of paths [#883](https://github.com/apache/datafusion-python/pull/883) (mesejo) +- feat: expose http object store [#885](https://github.com/apache/datafusion-python/pull/885) (mesejo) + +**Fixed bugs:** + +- fix: Calling `count` on a pyarrow dataset results in an error [#843](https://github.com/apache/datafusion-python/pull/843) (Michael-J-Ward) + +**Other:** + +- Upgrade datafusion [#867](https://github.com/apache/datafusion-python/pull/867) (emgeee) +- Feature/aggregates as windows [#871](https://github.com/apache/datafusion-python/pull/871) (timsaucer) +- Fix regression on register_udaf [#878](https://github.com/apache/datafusion-python/pull/878) (timsaucer) +- build(deps): upgrade setup-protoc action and protoc version number [#873](https://github.com/apache/datafusion-python/pull/873) (Michael-J-Ward) +- build(deps): bump prost-types from 0.13.2 to 0.13.3 [#881](https://github.com/apache/datafusion-python/pull/881) (dependabot[bot]) +- build(deps): bump prost from 0.13.2 to 0.13.3 [#882](https://github.com/apache/datafusion-python/pull/882) (dependabot[bot]) +- chore: remove XFAIL from passing tests [#884](https://github.com/apache/datafusion-python/pull/884) (Michael-J-Ward) +- Add user defined window function support [#880](https://github.com/apache/datafusion-python/pull/880) (timsaucer) +- build(deps): bump syn from 2.0.77 to 2.0.79 [#886](https://github.com/apache/datafusion-python/pull/886) (dependabot[bot]) +- fix example of reading parquet from s3 [#896](https://github.com/apache/datafusion-python/pull/896) (sir-sigurd) +- release-testing [#889](https://github.com/apache/datafusion-python/pull/889) (Michael-J-Ward) +- chore(bench): fix create_tables.sql for tpch benchmark [#897](https://github.com/apache/datafusion-python/pull/897) (Michael-J-Ward) +- Add physical and logical plan conversion to and from protobuf [#892](https://github.com/apache/datafusion-python/pull/892) (timsaucer) +- Feature/instance udfs [#890](https://github.com/apache/datafusion-python/pull/890) (timsaucer) +- chore(ci): remove Mambaforge variant from CI [#894](https://github.com/apache/datafusion-python/pull/894) (Michael-J-Ward) +- Use OnceLock to store TokioRuntime [#895](https://github.com/apache/datafusion-python/pull/895) (Michael-J-Ward) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 7 Michael J Ward + 5 Tim Saucer + 3 Daniel Mesejo + 3 dependabot[bot] + 1 Matt Green + 1 Sergey Fedoseev +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + +## [41.0.0](https://github.com/apache/datafusion-python/tree/41.0.0) (2024-09-09) + +This release consists of 19 commits from 6 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: enable list of paths for read_csv [#824](https://github.com/apache/datafusion-python/pull/824) (mesejo) +- feat: better exception and message for table not found [#851](https://github.com/apache/datafusion-python/pull/851) (mesejo) +- feat: make cast accept built-in Python types [#858](https://github.com/apache/datafusion-python/pull/858) (mesejo) + +**Other:** + +- chore: Prepare for 40.0.0 release [#801](https://github.com/apache/datafusion-python/pull/801) (andygrove) +- Add typing-extensions dependency to pyproject [#805](https://github.com/apache/datafusion-python/pull/805) (timsaucer) +- Upgrade deps to datafusion 41 [#802](https://github.com/apache/datafusion-python/pull/802) (Michael-J-Ward) +- Fix SessionContext init with only SessionConfig [#827](https://github.com/apache/datafusion-python/pull/827) (jcrist) +- build(deps): upgrade actions/{upload,download}-artifact@v3 to v4 [#829](https://github.com/apache/datafusion-python/pull/829) (Michael-J-Ward) +- Run ruff format in CI [#837](https://github.com/apache/datafusion-python/pull/837) (timsaucer) +- Add PyCapsule support for Arrow import and export [#825](https://github.com/apache/datafusion-python/pull/825) (timsaucer) +- Feature/expose when function [#836](https://github.com/apache/datafusion-python/pull/836) (timsaucer) +- Add Window Functions for use with function builder [#808](https://github.com/apache/datafusion-python/pull/808) (timsaucer) +- chore: fix typos [#844](https://github.com/apache/datafusion-python/pull/844) (mesejo) +- build(ci): use proper mac runners [#841](https://github.com/apache/datafusion-python/pull/841) (Michael-J-Ward) +- Set of small features [#839](https://github.com/apache/datafusion-python/pull/839) (timsaucer) +- chore: fix docstrings, typos [#852](https://github.com/apache/datafusion-python/pull/852) (mesejo) +- chore: Use datafusion re-exported dependencies [#856](https://github.com/apache/datafusion-python/pull/856) (emgeee) +- add guidelines on separating python and rust code [#860](https://github.com/apache/datafusion-python/pull/860) (Michael-J-Ward) +- Update Aggregate functions to take builder parameters [#859](https://github.com/apache/datafusion-python/pull/859) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 7 Tim Saucer + 5 Daniel Mesejo + 4 Michael J Ward + 1 Andy Grove + 1 Jim Crist-Harif + 1 Matt Green +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + +## [40.0.0](https://github.com/apache/datafusion-python/tree/40.0.0) (2024-08-09) + +This release consists of 18 commits from 4 contributors. See credits at the end of this changelog for more information. + +- Update changelog for 39.0.0 [#742](https://github.com/apache/datafusion-python/pull/742) (andygrove) +- build(deps): bump uuid from 1.8.0 to 1.9.1 [#744](https://github.com/apache/datafusion-python/pull/744) (dependabot[bot]) +- build(deps): bump mimalloc from 0.1.42 to 0.1.43 [#745](https://github.com/apache/datafusion-python/pull/745) (dependabot[bot]) +- build(deps): bump syn from 2.0.67 to 2.0.68 [#746](https://github.com/apache/datafusion-python/pull/746) (dependabot[bot]) +- Tsaucer/find window fn [#747](https://github.com/apache/datafusion-python/pull/747) (timsaucer) +- Python wrapper classes for all user interfaces [#750](https://github.com/apache/datafusion-python/pull/750) (timsaucer) +- Expose array sort [#764](https://github.com/apache/datafusion-python/pull/764) (timsaucer) +- Upgrade protobuf and remove GH Action googletest-installer [#773](https://github.com/apache/datafusion-python/pull/773) (Michael-J-Ward) +- Upgrade Datafusion 40 [#771](https://github.com/apache/datafusion-python/pull/771) (Michael-J-Ward) +- Bugfix: Calling count with None arguments [#768](https://github.com/apache/datafusion-python/pull/768) (timsaucer) +- Add in user example that compares a two different approaches to UDFs [#770](https://github.com/apache/datafusion-python/pull/770) (timsaucer) +- Add missing exports for wrapper modules [#782](https://github.com/apache/datafusion-python/pull/782) (timsaucer) +- Add PyExpr to_variant conversions [#793](https://github.com/apache/datafusion-python/pull/793) (Michael-J-Ward) +- Add missing expressions to wrapper export [#795](https://github.com/apache/datafusion-python/pull/795) (timsaucer) +- Doc/cross reference [#791](https://github.com/apache/datafusion-python/pull/791) (timsaucer) +- Re-Enable `num_centroids` to `approx_percentile_cont` [#798](https://github.com/apache/datafusion-python/pull/798) (Michael-J-Ward) +- UDAF process all state variables [#799](https://github.com/apache/datafusion-python/pull/799) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 9 Tim Saucer + 4 Michael J Ward + 3 dependabot[bot] + 2 Andy Grove +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + +## [39.0.0](https://github.com/apache/datafusion-python/tree/39.0.0) (2024-06-25) + +**Merged pull requests:** + +- ci: add substrait feature to linux builds [#720](https://github.com/apache/datafusion-python/pull/720) (Michael-J-Ward) +- Docs deploy action [#721](https://github.com/apache/datafusion-python/pull/721) (Michael-J-Ward) +- update deps [#723](https://github.com/apache/datafusion-python/pull/723) (Michael-J-Ward) +- Upgrade maturin [#725](https://github.com/apache/datafusion-python/pull/725) (Michael-J-Ward) +- Upgrade datafusion 39 [#728](https://github.com/apache/datafusion-python/pull/728) (Michael-J-Ward) +- use ScalarValue::to_pyarrow to convert to python object [#731](https://github.com/apache/datafusion-python/pull/731) (Michael-J-Ward) +- Pyo3 `Bound<'py, T>` api [#734](https://github.com/apache/datafusion-python/pull/734) (Michael-J-Ward) +- github test action: drop python 3.7, add python 3.12 [#736](https://github.com/apache/datafusion-python/pull/736) (Michael-J-Ward) +- Pyarrow filter pushdowns [#735](https://github.com/apache/datafusion-python/pull/735) (Michael-J-Ward) +- build(deps): bump syn from 2.0.66 to 2.0.67 [#738](https://github.com/apache/datafusion-python/pull/738) (dependabot[bot]) +- Pyo3 refactorings [#740](https://github.com/apache/datafusion-python/pull/740) (Michael-J-Ward) +- UDAF `sum` workaround [#741](https://github.com/apache/datafusion-python/pull/741) (Michael-J-Ward) + +## [38.0.1](https://github.com/apache/datafusion-python/tree/38.0.1) (2024-05-25) + +**Implemented enhancements:** + +- feat: add python bindings for ends_with function [#693](https://github.com/apache/datafusion-python/pull/693) (richtia) +- feat: expose `named_struct` in python [#700](https://github.com/apache/datafusion-python/pull/700) (Michael-J-Ward) + +**Merged pull requests:** + +- Add document about basics of working with expressions [#668](https://github.com/apache/datafusion-python/pull/668) (timsaucer) +- chore: Update Python release process now that DataFusion is TLP [#674](https://github.com/apache/datafusion-python/pull/674) (andygrove) +- Fix Docs [#676](https://github.com/apache/datafusion-python/pull/676) (Michael-J-Ward) +- Add examples from TPC-H [#666](https://github.com/apache/datafusion-python/pull/666) (timsaucer) +- fix conda nightly builds, attempt 2 [#689](https://github.com/apache/datafusion-python/pull/689) (Michael-J-Ward) +- Upgrade to datafusion 38 [#691](https://github.com/apache/datafusion-python/pull/691) (Michael-J-Ward) +- chore: update to maturin's recommended project layout for rust/python… [#695](https://github.com/apache/datafusion-python/pull/695) (Michael-J-Ward) +- chore: update cargo deps [#698](https://github.com/apache/datafusion-python/pull/698) (Michael-J-Ward) +- feat: add python bindings for ends_with function [#693](https://github.com/apache/datafusion-python/pull/693) (richtia) +- feat: expose `named_struct` in python [#700](https://github.com/apache/datafusion-python/pull/700) (Michael-J-Ward) +- Website fixes [#702](https://github.com/apache/datafusion-python/pull/702) (Michael-J-Ward) + +## [37.1.0](https://github.com/apache/datafusion-python/tree/37.1.0) (2024-05-08) + +**Implemented enhancements:** + +- feat: add execute_stream and execute_stream_partitioned [#610](https://github.com/apache/datafusion-python/pull/610) (mesejo) + +**Documentation updates:** + +- docs: update docs CI to install python-311 requirements [#661](https://github.com/apache/datafusion-python/pull/661) (Michael-J-Ward) + +**Merged pull requests:** + +- Switch to Ruff for Python linting [#529](https://github.com/apache/datafusion-python/pull/529) (andygrove) +- Remove sql-on-pandas/polars/cudf examples [#602](https://github.com/apache/datafusion-python/pull/602) (andygrove) +- build(deps): bump object_store from 0.9.0 to 0.9.1 [#611](https://github.com/apache/datafusion-python/pull/611) (dependabot[bot]) +- More missing array funcs [#605](https://github.com/apache/datafusion-python/pull/605) (judahrand) +- feat: add execute_stream and execute_stream_partitioned [#610](https://github.com/apache/datafusion-python/pull/610) (mesejo) +- build(deps): bump uuid from 1.7.0 to 1.8.0 [#615](https://github.com/apache/datafusion-python/pull/615) (dependabot[bot]) +- Bind SQLOptions and relative ctx method #567 [#588](https://github.com/apache/datafusion-python/pull/588) (giacomorebecchi) +- bugfix: no panic on empty table [#613](https://github.com/apache/datafusion-python/pull/613) (mesejo) +- Expose `register_listing_table` [#618](https://github.com/apache/datafusion-python/pull/618) (henrifroese) +- Expose unnest feature [#641](https://github.com/apache/datafusion-python/pull/641) (timsaucer) +- Update domain names and paths in asf yaml [#643](https://github.com/apache/datafusion-python/pull/643) (andygrove) +- use python 3.11 to publish docs [#645](https://github.com/apache/datafusion-python/pull/645) (andygrove) +- docs: update docs CI to install python-311 requirements [#661](https://github.com/apache/datafusion-python/pull/661) (Michael-J-Ward) +- Upgrade Datafusion to v37.1.0 [#669](https://github.com/apache/datafusion-python/pull/669) (Michael-J-Ward) + +## [36.0.0](https://github.com/apache/datafusion-python/tree/36.0.0) (2024-03-02) + +**Implemented enhancements:** + +- feat: Add `flatten` array function [#562](https://github.com/apache/datafusion-python/pull/562) (mobley-trent) + +**Documentation updates:** + +- docs: Add ASF attribution [#580](https://github.com/apache/datafusion-python/pull/580) (simicd) + +**Merged pull requests:** + +- Allow PyDataFrame to be used from other projects [#582](https://github.com/apache/datafusion-python/pull/582) (andygrove) +- docs: Add ASF attribution [#580](https://github.com/apache/datafusion-python/pull/580) (simicd) +- Add array functions [#560](https://github.com/apache/datafusion-python/pull/560) (ongchi) +- feat: Add `flatten` array function [#562](https://github.com/apache/datafusion-python/pull/562) (mobley-trent) + +## [35.0.0](https://github.com/apache/datafusion-python/tree/35.0.0) (2024-01-20) + +**Merged pull requests:** + +- build(deps): bump syn from 2.0.41 to 2.0.43 [#559](https://github.com/apache/datafusion-python/pull/559) (dependabot[bot]) +- build(deps): bump tokio from 1.35.0 to 1.35.1 [#558](https://github.com/apache/datafusion-python/pull/558) (dependabot[bot]) +- build(deps): bump async-trait from 0.1.74 to 0.1.77 [#556](https://github.com/apache/datafusion-python/pull/556) (dependabot[bot]) +- build(deps): bump pyo3 from 0.20.0 to 0.20.2 [#557](https://github.com/apache/datafusion-python/pull/557) (dependabot[bot]) + +## [34.0.0](https://github.com/apache/datafusion-python/tree/34.0.0) (2023-12-28) + +**Merged pull requests:** + +- Adjust visibility of crate private members & Functions [#537](https://github.com/apache/datafusion-python/pull/537) (jdye64) +- Update json.rst [#538](https://github.com/apache/datafusion-python/pull/538) (ray-andrew) +- Enable mimalloc local_dynamic_tls feature [#540](https://github.com/apache/datafusion-python/pull/540) (jdye64) +- Enable substrait feature to be built by default in CI, for nightlies … [#544](https://github.com/apache/datafusion-python/pull/544) (jdye64) + +## [33.0.0](https://github.com/apache/datafusion-python/tree/33.0.0) (2023-11-16) + +**Merged pull requests:** + +- First pass at getting architectured builds working [#350](https://github.com/apache/datafusion-python/pull/350) (charlesbluca) +- Remove libprotobuf dep [#527](https://github.com/apache/datafusion-python/pull/527) (jdye64) + +## [32.0.0](https://github.com/apache/datafusion-python/tree/32.0.0) (2023-10-21) + +**Implemented enhancements:** + +- feat: expose PyWindowFrame [#509](https://github.com/apache/datafusion-python/pull/509) (dlovell) +- add Binary String Functions;encode,decode [#494](https://github.com/apache/datafusion-python/pull/494) (jiangzhx) +- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/datafusion-python/pull/496) (jiangzhx) +- add first_value last_value [#498](https://github.com/apache/datafusion-python/pull/498) (jiangzhx) +- add regr\_\* functions [#499](https://github.com/apache/datafusion-python/pull/499) (jiangzhx) +- Add random missing bindings [#522](https://github.com/apache/datafusion-python/pull/522) (jdye64) +- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/datafusion-python/pull/519) (jdye64) +- Add support for window function bindings [#521](https://github.com/apache/datafusion-python/pull/521) (jdye64) + +**Merged pull requests:** + +- Prepare 31.0.0 release [#500](https://github.com/apache/datafusion-python/pull/500) (andygrove) +- Improve release process documentation [#505](https://github.com/apache/datafusion-python/pull/505) (andygrove) +- add Binary String Functions;encode,decode [#494](https://github.com/apache/datafusion-python/pull/494) (jiangzhx) +- build(deps): bump mimalloc from 0.1.38 to 0.1.39 [#502](https://github.com/apache/datafusion-python/pull/502) (dependabot[bot]) +- build(deps): bump syn from 2.0.32 to 2.0.35 [#503](https://github.com/apache/datafusion-python/pull/503) (dependabot[bot]) +- build(deps): bump syn from 2.0.35 to 2.0.37 [#506](https://github.com/apache/datafusion-python/pull/506) (dependabot[bot]) +- Use latest DataFusion [#511](https://github.com/apache/datafusion-python/pull/511) (andygrove) +- add bit_and,bit_or,bit_xor,bool_add,bool_or [#496](https://github.com/apache/datafusion-python/pull/496) (jiangzhx) +- use DataFusion 32 [#515](https://github.com/apache/datafusion-python/pull/515) (andygrove) +- add first_value last_value [#498](https://github.com/apache/datafusion-python/pull/498) (jiangzhx) +- build(deps): bump regex-syntax from 0.7.5 to 0.8.1 [#517](https://github.com/apache/datafusion-python/pull/517) (dependabot[bot]) +- build(deps): bump pyo3-build-config from 0.19.2 to 0.20.0 [#516](https://github.com/apache/datafusion-python/pull/516) (dependabot[bot]) +- add regr\_\* functions [#499](https://github.com/apache/datafusion-python/pull/499) (jiangzhx) +- Add random missing bindings [#522](https://github.com/apache/datafusion-python/pull/522) (jdye64) +- build(deps): bump rustix from 0.38.18 to 0.38.19 [#523](https://github.com/apache/datafusion-python/pull/523) (dependabot[bot]) +- Allow for multiple input files per table instead of a single file [#519](https://github.com/apache/datafusion-python/pull/519) (jdye64) +- Add support for window function bindings [#521](https://github.com/apache/datafusion-python/pull/521) (jdye64) +- Small clippy fix [#524](https://github.com/apache/datafusion-python/pull/524) (andygrove) + +## [31.0.0](https://github.com/apache/datafusion-python/tree/31.0.0) (2023-09-12) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/28.0.0...31.0.0) + +**Implemented enhancements:** + +- feat: add case function (#447) [#448](https://github.com/apache/datafusion-python/pull/448) (mesejo) +- feat: add compression options [#456](https://github.com/apache/datafusion-python/pull/456) (mesejo) +- feat: add register_json [#458](https://github.com/apache/datafusion-python/pull/458) (mesejo) +- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/datafusion-python/pull/459) (mesejo) +- feat: add example of reading parquet from s3 [#460](https://github.com/apache/datafusion-python/pull/460) (mesejo) +- feat: add register_avro and read_table [#461](https://github.com/apache/datafusion-python/pull/461) (mesejo) +- feat: add missing scalar math functions [#465](https://github.com/apache/datafusion-python/pull/465) (mesejo) + +**Documentation updates:** + +- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/datafusion-python/pull/455) (mesejo) + +**Merged pull requests:** + +- Build Linux aarch64 wheel [#443](https://github.com/apache/datafusion-python/pull/443) (gokselk) +- feat: add case function (#447) [#448](https://github.com/apache/datafusion-python/pull/448) (mesejo) +- enhancement(docs): Add user guide (#432) [#445](https://github.com/apache/datafusion-python/pull/445) (mesejo) +- docs: include pre-commit hooks section in contributor guide [#455](https://github.com/apache/datafusion-python/pull/455) (mesejo) +- feat: add compression options [#456](https://github.com/apache/datafusion-python/pull/456) (mesejo) +- Upgrade to DF 28.0.0-rc1 [#457](https://github.com/apache/datafusion-python/pull/457) (andygrove) +- feat: add register_json [#458](https://github.com/apache/datafusion-python/pull/458) (mesejo) +- feat: add basic compression configuration to write_parquet [#459](https://github.com/apache/datafusion-python/pull/459) (mesejo) +- feat: add example of reading parquet from s3 [#460](https://github.com/apache/datafusion-python/pull/460) (mesejo) +- feat: add register_avro and read_table [#461](https://github.com/apache/datafusion-python/pull/461) (mesejo) +- feat: add missing scalar math functions [#465](https://github.com/apache/datafusion-python/pull/465) (mesejo) +- build(deps): bump arduino/setup-protoc from 1 to 2 [#452](https://github.com/apache/datafusion-python/pull/452) (dependabot[bot]) +- Revert "build(deps): bump arduino/setup-protoc from 1 to 2 (#452)" [#474](https://github.com/apache/datafusion-python/pull/474) (viirya) +- Minor: fix wrongly copied function description [#497](https://github.com/apache/datafusion-python/pull/497) (viirya) +- Upgrade to Datafusion 31.0.0 [#491](https://github.com/apache/datafusion-python/pull/491) (judahrand) +- Add `isnan` and `iszero` [#495](https://github.com/apache/datafusion-python/pull/495) (judahrand) + +## 30.0.0 + +- Skipped due to a breaking change in DataFusion + +## 29.0.0 + +- Skipped + +## [28.0.0](https://github.com/apache/datafusion-python/tree/28.0.0) (2023-07-25) + +**Implemented enhancements:** + +- feat: expose offset in python API [#437](https://github.com/apache/datafusion-python/pull/437) (cpcloud) + +**Merged pull requests:** + +- File based input utils [#433](https://github.com/apache/datafusion-python/pull/433) (jdye64) +- Upgrade to 28.0.0-rc1 [#434](https://github.com/apache/datafusion-python/pull/434) (andygrove) +- Introduces utility for obtaining SqlTable information from a file like location [#398](https://github.com/apache/datafusion-python/pull/398) (jdye64) +- feat: expose offset in python API [#437](https://github.com/apache/datafusion-python/pull/437) (cpcloud) +- Use DataFusion 28 [#439](https://github.com/apache/datafusion-python/pull/439) (andygrove) + +## [27.0.0](https://github.com/apache/datafusion-python/tree/27.0.0) (2023-07-03) + +**Merged pull requests:** + +- LogicalPlan.to_variant() make public [#412](https://github.com/apache/datafusion-python/pull/412) (jdye64) +- Prepare 27.0.0 release [#423](https://github.com/apache/datafusion-python/pull/423) (andygrove) + +## [26.0.0](https://github.com/apache/datafusion-python/tree/26.0.0) (2023-06-11) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/25.0.0...26.0.0) + +**Merged pull requests:** + +- Add Expr::Case when_then_else support to rex_call_operands function [#388](https://github.com/apache/datafusion-python/pull/388) (jdye64) +- Introduce BaseSessionContext abstract class [#390](https://github.com/apache/datafusion-python/pull/390) (jdye64) +- CRUD Schema support for `BaseSessionContext` [#392](https://github.com/apache/datafusion-python/pull/392) (jdye64) +- CRUD Table support for `BaseSessionContext` [#394](https://github.com/apache/datafusion-python/pull/394) (jdye64) + +## [25.0.0](https://github.com/apache/datafusion-python/tree/25.0.0) (2023-05-23) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/24.0.0...25.0.0) + +**Merged pull requests:** + +- Prepare 24.0.0 Release [#376](https://github.com/apache/datafusion-python/pull/376) (andygrove) +- build(deps): bump uuid from 1.3.1 to 1.3.2 [#359](https://github.com/apache/datafusion-python/pull/359) (dependabot[bot]) +- build(deps): bump mimalloc from 0.1.36 to 0.1.37 [#361](https://github.com/apache/datafusion-python/pull/361) (dependabot[bot]) +- build(deps): bump regex-syntax from 0.6.29 to 0.7.1 [#334](https://github.com/apache/datafusion-python/pull/334) (dependabot[bot]) +- upgrade maturin to 0.15.1 [#379](https://github.com/apache/datafusion-python/pull/379) (Jimexist) +- Expand Expr to include RexType basic support [#378](https://github.com/apache/datafusion-python/pull/378) (jdye64) +- Add Python script for generating changelog [#383](https://github.com/apache/datafusion-python/pull/383) (andygrove) + +## [24.0.0](https://github.com/apache/datafusion-python/tree/24.0.0) (2023-05-09) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/23.0.0...24.0.0) + +**Documentation updates:** + +- Fix link to user guide [#354](https://github.com/apache/datafusion-python/pull/354) (andygrove) + +**Merged pull requests:** + +- Add interface to serialize Substrait plans to Python Bytes. [#344](https://github.com/apache/datafusion-python/pull/344) (kylebrooks-8451) +- Add partition_count property to ExecutionPlan. [#346](https://github.com/apache/datafusion-python/pull/346) (kylebrooks-8451) +- Remove unsendable from all Rust pyclass types. [#348](https://github.com/apache/datafusion-python/pull/348) (kylebrooks-8451) +- Fix link to user guide [#354](https://github.com/apache/datafusion-python/pull/354) (andygrove) +- Fix SessionContext execute. [#353](https://github.com/apache/datafusion-python/pull/353) (kylebrooks-8451) +- Pub mod expr in lib.rs [#357](https://github.com/apache/datafusion-python/pull/357) (jdye64) +- Add benchmark derived from TPC-H [#355](https://github.com/apache/datafusion-python/pull/355) (andygrove) +- Add db-benchmark [#365](https://github.com/apache/datafusion-python/pull/365) (andygrove) +- First pass of documentation in mdBook [#364](https://github.com/apache/datafusion-python/pull/364) (MrPowers) +- Add 'pub' and '#[pyo3(get, set)]' to DataTypeMap [#371](https://github.com/apache/datafusion-python/pull/371) (jdye64) +- Fix db-benchmark [#369](https://github.com/apache/datafusion-python/pull/369) (andygrove) +- Docs explaining how to view query plans [#373](https://github.com/apache/datafusion-python/pull/373) (andygrove) +- Improve db-benchmark [#372](https://github.com/apache/datafusion-python/pull/372) (andygrove) +- Make expr member of PyExpr public [#375](https://github.com/apache/datafusion-python/pull/375) (jdye64) + +## [23.0.0](https://github.com/apache/datafusion-python/tree/23.0.0) (2023-04-23) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/22.0.0...23.0.0) + +**Merged pull requests:** + +- Improve API docs, README, and examples for configuring context [#321](https://github.com/apache/datafusion-python/pull/321) (andygrove) +- Osx build linker args [#330](https://github.com/apache/datafusion-python/pull/330) (jdye64) +- Add requirements file for python 3.11 [#332](https://github.com/apache/datafusion-python/pull/332) (r4ntix) +- mac arm64 build [#338](https://github.com/apache/datafusion-python/pull/338) (andygrove) +- Add conda.yaml baseline workflow file [#281](https://github.com/apache/datafusion-python/pull/281) (jdye64) +- Prepare for 23.0.0 release [#335](https://github.com/apache/datafusion-python/pull/335) (andygrove) +- Reuse the Tokio Runtime [#341](https://github.com/apache/datafusion-python/pull/341) (kylebrooks-8451) + +## [22.0.0](https://github.com/apache/datafusion-python/tree/22.0.0) (2023-04-10) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/21.0.0...22.0.0) + +**Merged pull requests:** + +- Fix invalid build yaml [#308](https://github.com/apache/datafusion-python/pull/308) (andygrove) +- Try fix release build [#309](https://github.com/apache/datafusion-python/pull/309) (andygrove) +- Fix release build [#310](https://github.com/apache/datafusion-python/pull/310) (andygrove) +- Enable datafusion-substrait protoc feature, to remove compile-time dependency on protoc [#312](https://github.com/apache/datafusion-python/pull/312) (andygrove) +- Fix Mac/Win release builds in CI [#313](https://github.com/apache/datafusion-python/pull/313) (andygrove) +- install protoc in docs workflow [#314](https://github.com/apache/datafusion-python/pull/314) (andygrove) +- Fix documentation generation in CI [#315](https://github.com/apache/datafusion-python/pull/315) (andygrove) +- Source wheel fix [#319](https://github.com/apache/datafusion-python/pull/319) (andygrove) + +## [21.0.0](https://github.com/apache/datafusion-python/tree/21.0.0) (2023-03-30) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/20.0.0...21.0.0) + +**Merged pull requests:** + +- minor: Fix minor warning on unused import [#289](https://github.com/apache/datafusion-python/pull/289) (viirya) +- feature: Implement `describe()` method [#293](https://github.com/apache/datafusion-python/pull/293) (simicd) +- fix: Printed results not visible in debugger & notebooks [#296](https://github.com/apache/datafusion-python/pull/296) (simicd) +- add package.include and remove wildcard dependency [#295](https://github.com/apache/datafusion-python/pull/295) (andygrove) +- Update main branch name in docs workflow [#303](https://github.com/apache/datafusion-python/pull/303) (andygrove) +- Upgrade to DF 21 [#301](https://github.com/apache/datafusion-python/pull/301) (andygrove) + +## [20.0.0](https://github.com/apache/datafusion-python/tree/20.0.0) (2023-03-17) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/0.8.0...20.0.0) + +**Implemented enhancements:** + +- Empty relation bindings [#208](https://github.com/apache/datafusion-python/pull/208) (jdye64) +- wrap display_name and canonical_name functions [#214](https://github.com/apache/datafusion-python/pull/214) (jdye64) +- Add PyAlias bindings [#216](https://github.com/apache/datafusion-python/pull/216) (jdye64) +- Add bindings for scalar_variable [#218](https://github.com/apache/datafusion-python/pull/218) (jdye64) +- Bindings for LIKE type expressions [#220](https://github.com/apache/datafusion-python/pull/220) (jdye64) +- Bool expr bindings [#223](https://github.com/apache/datafusion-python/pull/223) (jdye64) +- Between bindings [#229](https://github.com/apache/datafusion-python/pull/229) (jdye64) +- Add bindings for GetIndexedField [#227](https://github.com/apache/datafusion-python/pull/227) (jdye64) +- Add bindings for case, cast, and trycast [#232](https://github.com/apache/datafusion-python/pull/232) (jdye64) +- add remaining expr bindings [#233](https://github.com/apache/datafusion-python/pull/233) (jdye64) +- feature: Additional export methods [#236](https://github.com/apache/datafusion-python/pull/236) (simicd) +- Add Python wrapper for LogicalPlan::Union [#240](https://github.com/apache/datafusion-python/pull/240) (iajoiner) +- feature: Create dataframe from pandas, polars, dictionary, list or pyarrow Table [#242](https://github.com/apache/datafusion-python/pull/242) (simicd) +- Add Python wrappers for `LogicalPlan::Join` and `LogicalPlan::CrossJoin` [#246](https://github.com/apache/datafusion-python/pull/246) (iajoiner) +- feature: Set table name from ctx functions [#260](https://github.com/apache/datafusion-python/pull/260) (simicd) +- Explain bindings [#264](https://github.com/apache/datafusion-python/pull/264) (jdye64) +- Extension bindings [#266](https://github.com/apache/datafusion-python/pull/266) (jdye64) +- Subquery alias bindings [#269](https://github.com/apache/datafusion-python/pull/269) (jdye64) +- Create memory table [#271](https://github.com/apache/datafusion-python/pull/271) (jdye64) +- Create view bindings [#273](https://github.com/apache/datafusion-python/pull/273) (jdye64) +- Re-export Datafusion dependencies [#277](https://github.com/apache/datafusion-python/pull/277) (jdye64) +- Distinct bindings [#275](https://github.com/apache/datafusion-python/pull/275) (jdye64) +- Drop table bindings [#283](https://github.com/apache/datafusion-python/pull/283) (jdye64) +- Bindings for LogicalPlan::Repartition [#285](https://github.com/apache/datafusion-python/pull/285) (jdye64) +- Expand Rust return type support for Arrow DataTypes in ScalarValue [#287](https://github.com/apache/datafusion-python/pull/287) (jdye64) + +**Documentation updates:** + +- docs: Example of calling Python UDF & UDAF in SQL [#258](https://github.com/apache/datafusion-python/pull/258) (simicd) + +**Merged pull requests:** + +- Minor docs updates [#210](https://github.com/apache/datafusion-python/pull/210) (andygrove) +- Empty relation bindings [#208](https://github.com/apache/datafusion-python/pull/208) (jdye64) +- wrap display_name and canonical_name functions [#214](https://github.com/apache/datafusion-python/pull/214) (jdye64) +- Add PyAlias bindings [#216](https://github.com/apache/datafusion-python/pull/216) (jdye64) +- Add bindings for scalar_variable [#218](https://github.com/apache/datafusion-python/pull/218) (jdye64) +- Bindings for LIKE type expressions [#220](https://github.com/apache/datafusion-python/pull/220) (jdye64) +- Bool expr bindings [#223](https://github.com/apache/datafusion-python/pull/223) (jdye64) +- Between bindings [#229](https://github.com/apache/datafusion-python/pull/229) (jdye64) +- Add bindings for GetIndexedField [#227](https://github.com/apache/datafusion-python/pull/227) (jdye64) +- Add bindings for case, cast, and trycast [#232](https://github.com/apache/datafusion-python/pull/232) (jdye64) +- add remaining expr bindings [#233](https://github.com/apache/datafusion-python/pull/233) (jdye64) +- Pre-commit hooks [#228](https://github.com/apache/datafusion-python/pull/228) (jdye64) +- Implement new release process [#149](https://github.com/apache/datafusion-python/pull/149) (andygrove) +- feature: Additional export methods [#236](https://github.com/apache/datafusion-python/pull/236) (simicd) +- Add Python wrapper for LogicalPlan::Union [#240](https://github.com/apache/datafusion-python/pull/240) (iajoiner) +- feature: Create dataframe from pandas, polars, dictionary, list or pyarrow Table [#242](https://github.com/apache/datafusion-python/pull/242) (simicd) +- Fix release instructions [#238](https://github.com/apache/datafusion-python/pull/238) (andygrove) +- Add Python wrappers for `LogicalPlan::Join` and `LogicalPlan::CrossJoin` [#246](https://github.com/apache/datafusion-python/pull/246) (iajoiner) +- docs: Example of calling Python UDF & UDAF in SQL [#258](https://github.com/apache/datafusion-python/pull/258) (simicd) +- feature: Set table name from ctx functions [#260](https://github.com/apache/datafusion-python/pull/260) (simicd) +- Upgrade to DataFusion 19 [#262](https://github.com/apache/datafusion-python/pull/262) (andygrove) +- Explain bindings [#264](https://github.com/apache/datafusion-python/pull/264) (jdye64) +- Extension bindings [#266](https://github.com/apache/datafusion-python/pull/266) (jdye64) +- Subquery alias bindings [#269](https://github.com/apache/datafusion-python/pull/269) (jdye64) +- Create memory table [#271](https://github.com/apache/datafusion-python/pull/271) (jdye64) +- Create view bindings [#273](https://github.com/apache/datafusion-python/pull/273) (jdye64) +- Re-export Datafusion dependencies [#277](https://github.com/apache/datafusion-python/pull/277) (jdye64) +- Distinct bindings [#275](https://github.com/apache/datafusion-python/pull/275) (jdye64) +- build(deps): bump actions/checkout from 2 to 3 [#244](https://github.com/apache/datafusion-python/pull/244) (dependabot[bot]) +- build(deps): bump actions/upload-artifact from 2 to 3 [#245](https://github.com/apache/datafusion-python/pull/245) (dependabot[bot]) +- build(deps): bump actions/download-artifact from 2 to 3 [#243](https://github.com/apache/datafusion-python/pull/243) (dependabot[bot]) +- Use DataFusion 20 [#278](https://github.com/apache/datafusion-python/pull/278) (andygrove) +- Drop table bindings [#283](https://github.com/apache/datafusion-python/pull/283) (jdye64) +- Bindings for LogicalPlan::Repartition [#285](https://github.com/apache/datafusion-python/pull/285) (jdye64) +- Expand Rust return type support for Arrow DataTypes in ScalarValue [#287](https://github.com/apache/datafusion-python/pull/287) (jdye64) + +## [0.8.0](https://github.com/apache/datafusion-python/tree/0.8.0) (2023-02-22) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/0.8.0-rc1...0.8.0) + +**Implemented enhancements:** + +- Add support for cuDF physical execution engine [\#202](https://github.com/apache/datafusion-python/issues/202) +- Make it easier to create a Pandas dataframe from DataFusion query results [\#139](https://github.com/apache/datafusion-python/issues/139) + +**Fixed bugs:** + +- Build error: could not compile `thiserror` due to 2 previous errors [\#69](https://github.com/apache/datafusion-python/issues/69) + +**Closed issues:** + +- Integrate with the new `object_store` crate [\#22](https://github.com/apache/datafusion-python/issues/22) + +**Merged pull requests:** + +- Update README in preparation for 0.8 release [\#206](https://github.com/apache/datafusion-python/pull/206) ([andygrove](https://github.com/andygrove)) +- Add support for cudf as a physical execution engine [\#205](https://github.com/apache/datafusion-python/pull/205) ([jdye64](https://github.com/jdye64)) +- Run `maturin develop` instead of `cargo build` in verification script [\#200](https://github.com/apache/datafusion-python/pull/200) ([andygrove](https://github.com/andygrove)) +- Add tests for recently added functionality [\#199](https://github.com/apache/datafusion-python/pull/199) ([andygrove](https://github.com/andygrove)) +- Implement `to_pandas()` [\#197](https://github.com/apache/datafusion-python/pull/197) ([simicd](https://github.com/simicd)) +- Add Python wrapper for LogicalPlan::Sort [\#196](https://github.com/apache/datafusion-python/pull/196) ([andygrove](https://github.com/andygrove)) +- Add Python wrapper for LogicalPlan::Aggregate [\#195](https://github.com/apache/datafusion-python/pull/195) ([andygrove](https://github.com/andygrove)) +- Add Python wrapper for LogicalPlan::Limit [\#193](https://github.com/apache/datafusion-python/pull/193) ([andygrove](https://github.com/andygrove)) +- Add Python wrapper for LogicalPlan::Filter [\#192](https://github.com/apache/datafusion-python/pull/192) ([andygrove](https://github.com/andygrove)) +- Add experimental support for executing SQL with Polars and Pandas [\#190](https://github.com/apache/datafusion-python/pull/190) ([andygrove](https://github.com/andygrove)) +- Update changelog for 0.8 release [\#188](https://github.com/apache/datafusion-python/pull/188) ([andygrove](https://github.com/andygrove)) +- Add ability to execute ExecutionPlan and get a stream of RecordBatch [\#186](https://github.com/apache/datafusion-python/pull/186) ([andygrove](https://github.com/andygrove)) +- Dffield bindings [\#185](https://github.com/apache/datafusion-python/pull/185) ([jdye64](https://github.com/jdye64)) +- Add bindings for DFSchema [\#183](https://github.com/apache/datafusion-python/pull/183) ([jdye64](https://github.com/jdye64)) +- test: Window functions [\#182](https://github.com/apache/datafusion-python/pull/182) ([simicd](https://github.com/simicd)) +- Add bindings for Projection [\#180](https://github.com/apache/datafusion-python/pull/180) ([jdye64](https://github.com/jdye64)) +- Table scan bindings [\#178](https://github.com/apache/datafusion-python/pull/178) ([jdye64](https://github.com/jdye64)) +- Make session configurable [\#176](https://github.com/apache/datafusion-python/pull/176) ([andygrove](https://github.com/andygrove)) +- Upgrade to DataFusion 18.0.0 [\#175](https://github.com/apache/datafusion-python/pull/175) ([andygrove](https://github.com/andygrove)) +- Use latest DataFusion rev in preparation for DF 18 release [\#174](https://github.com/apache/datafusion-python/pull/174) ([andygrove](https://github.com/andygrove)) +- Arrow type bindings [\#173](https://github.com/apache/datafusion-python/pull/173) ([jdye64](https://github.com/jdye64)) +- Pyo3 bump [\#171](https://github.com/apache/datafusion-python/pull/171) ([jdye64](https://github.com/jdye64)) +- feature: Add additional aggregation functions [\#170](https://github.com/apache/datafusion-python/pull/170) ([simicd](https://github.com/simicd)) +- Make from_substrait_plan return DataFrame instead of LogicalPlan [\#164](https://github.com/apache/datafusion-python/pull/164) ([andygrove](https://github.com/andygrove)) +- feature: Implement count method [\#163](https://github.com/apache/datafusion-python/pull/163) ([simicd](https://github.com/simicd)) +- CI Fixes [\#162](https://github.com/apache/datafusion-python/pull/162) ([jdye64](https://github.com/jdye64)) +- Upgrade to DataFusion 17 [\#160](https://github.com/apache/datafusion-python/pull/160) ([andygrove](https://github.com/andygrove)) +- feature: Improve string representation of datafusion classes [\#159](https://github.com/apache/datafusion-python/pull/159) ([simicd](https://github.com/simicd)) +- Make PyExecutionPlan.plan public [\#156](https://github.com/apache/datafusion-python/pull/156) ([andygrove](https://github.com/andygrove)) +- Expose methods on logical and execution plans [\#155](https://github.com/apache/datafusion-python/pull/155) ([andygrove](https://github.com/andygrove)) +- Fix clippy for new Rust version [\#154](https://github.com/apache/datafusion-python/pull/154) ([andygrove](https://github.com/andygrove)) +- Add DataFrame methods for accessing plans [\#153](https://github.com/apache/datafusion-python/pull/153) ([andygrove](https://github.com/andygrove)) +- Use DataFusion rev 5238e8c97f998b4d2cb9fab85fb182f325a1a7fb [\#150](https://github.com/apache/datafusion-python/pull/150) ([andygrove](https://github.com/andygrove)) +- build\(deps\): bump async-trait from 0.1.61 to 0.1.62 [\#148](https://github.com/apache/datafusion-python/pull/148) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Rename default branch from master to main [\#147](https://github.com/apache/datafusion-python/pull/147) ([andygrove](https://github.com/andygrove)) +- Substrait bindings [\#145](https://github.com/apache/datafusion-python/pull/145) ([jdye64](https://github.com/jdye64)) +- build\(deps\): bump uuid from 0.8.2 to 1.2.2 [\#143](https://github.com/apache/datafusion-python/pull/143) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Prepare for 0.8.0 release [\#141](https://github.com/apache/datafusion-python/pull/141) ([andygrove](https://github.com/andygrove)) +- Improve README and add more examples [\#137](https://github.com/apache/datafusion-python/pull/137) ([andygrove](https://github.com/andygrove)) +- test: Expand tests for built-in functions [\#129](https://github.com/apache/datafusion-python/pull/129) ([simicd](https://github.com/simicd)) +- build\(deps\): bump object_store from 0.5.2 to 0.5.3 [\#126](https://github.com/apache/datafusion-python/pull/126) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump mimalloc from 0.1.32 to 0.1.34 [\#125](https://github.com/apache/datafusion-python/pull/125) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Introduce conda directory containing datafusion-dev.yaml conda enviro… [\#124](https://github.com/apache/datafusion-python/pull/124) ([jdye64](https://github.com/jdye64)) +- build\(deps\): bump bzip2 from 0.4.3 to 0.4.4 [\#121](https://github.com/apache/datafusion-python/pull/121) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump tokio from 1.23.0 to 1.24.1 [\#119](https://github.com/apache/datafusion-python/pull/119) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump async-trait from 0.1.60 to 0.1.61 [\#118](https://github.com/apache/datafusion-python/pull/118) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Upgrade to DataFusion 16.0.0 [\#115](https://github.com/apache/datafusion-python/pull/115) ([andygrove](https://github.com/andygrove)) +- Bump async-trait from 0.1.57 to 0.1.60 [\#114](https://github.com/apache/datafusion-python/pull/114) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump object_store from 0.5.1 to 0.5.2 [\#112](https://github.com/apache/datafusion-python/pull/112) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump tokio from 1.21.2 to 1.23.0 [\#109](https://github.com/apache/datafusion-python/pull/109) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add entries for publishing production \(asf-site\) and staging docs [\#107](https://github.com/apache/datafusion-python/pull/107) ([martin-g](https://github.com/martin-g)) +- Add a workflow that builds the docs and deploys them at staged or production [\#104](https://github.com/apache/datafusion-python/pull/104) ([martin-g](https://github.com/martin-g)) +- Upgrade to DataFusion 15.0.0 [\#103](https://github.com/apache/datafusion-python/pull/103) ([andygrove](https://github.com/andygrove)) +- build\(deps\): bump futures from 0.3.24 to 0.3.25 [\#102](https://github.com/apache/datafusion-python/pull/102) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump pyo3 from 0.17.2 to 0.17.3 [\#101](https://github.com/apache/datafusion-python/pull/101) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump mimalloc from 0.1.30 to 0.1.32 [\#98](https://github.com/apache/datafusion-python/pull/98) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump rand from 0.7.3 to 0.8.5 [\#97](https://github.com/apache/datafusion-python/pull/97) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix GitHub actions warnings [\#95](https://github.com/apache/datafusion-python/pull/95) ([martin-g](https://github.com/martin-g)) +- Fixes \#81 - Add CI workflow for source distribution [\#93](https://github.com/apache/datafusion-python/pull/93) ([martin-g](https://github.com/martin-g)) +- post-release updates [\#91](https://github.com/apache/datafusion-python/pull/91) ([andygrove](https://github.com/andygrove)) +- Build for manylinux 2014 [\#88](https://github.com/apache/datafusion-python/pull/88) ([martin-g](https://github.com/martin-g)) +- update release readme tag [\#86](https://github.com/apache/datafusion-python/pull/86) ([Jimexist](https://github.com/Jimexist)) +- Upgrade Maturin to 0.14.2 [\#85](https://github.com/apache/datafusion-python/pull/85) ([martin-g](https://github.com/martin-g)) +- Update release instructions [\#83](https://github.com/apache/datafusion-python/pull/83) ([andygrove](https://github.com/andygrove)) +- \[Functions\] - Add python function binding to `functions` [\#73](https://github.com/apache/datafusion-python/pull/73) ([francis-du](https://github.com/francis-du)) + +## [0.8.0-rc1](https://github.com/apache/datafusion-python/tree/0.8.0-rc1) (2023-02-17) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/0.7.0-rc2...0.8.0-rc1) + +**Implemented enhancements:** + +- Add bindings for datafusion_common::DFField [\#184](https://github.com/apache/datafusion-python/issues/184) +- Add bindings for DFSchema/DFSchemaRef [\#181](https://github.com/apache/datafusion-python/issues/181) +- Add bindings for datafusion_expr Projection [\#179](https://github.com/apache/datafusion-python/issues/179) +- Add bindings for `TableScan` struct from `datafusion_expr::TableScan` [\#177](https://github.com/apache/datafusion-python/issues/177) +- Add a "mapping" struct for types [\#172](https://github.com/apache/datafusion-python/issues/172) +- Improve string representation of datafusion classes \(dataframe, context, expression, ...\) [\#158](https://github.com/apache/datafusion-python/issues/158) +- Add DataFrame count method [\#151](https://github.com/apache/datafusion-python/issues/151) +- \[REQUEST\] Github Actions Improvements [\#146](https://github.com/apache/datafusion-python/issues/146) +- Change default branch name from master to main [\#144](https://github.com/apache/datafusion-python/issues/144) +- Bump pyo3 to 0.18.0 [\#140](https://github.com/apache/datafusion-python/issues/140) +- Add script for Python linting [\#134](https://github.com/apache/datafusion-python/issues/134) +- Add Python bindings for substrait module [\#132](https://github.com/apache/datafusion-python/issues/132) +- Expand unit tests for built-in functions [\#128](https://github.com/apache/datafusion-python/issues/128) +- support creating arrow-datafusion-python conda environment [\#122](https://github.com/apache/datafusion-python/issues/122) +- Build Python source distribution in GitHub workflow [\#81](https://github.com/apache/datafusion-python/issues/81) +- EPIC: Add all functions to python binding `functions` [\#72](https://github.com/apache/datafusion-python/issues/72) + +**Fixed bugs:** + +- Build is broken [\#161](https://github.com/apache/datafusion-python/issues/161) +- Out of memory when sorting [\#157](https://github.com/apache/datafusion-python/issues/157) +- window_lead test appears to be non-deterministic [\#135](https://github.com/apache/datafusion-python/issues/135) +- Reading csv does not work [\#130](https://github.com/apache/datafusion-python/issues/130) +- Github actions produce a lot of warnings [\#94](https://github.com/apache/datafusion-python/issues/94) +- ASF source release tarball has wrong directory name [\#90](https://github.com/apache/datafusion-python/issues/90) +- Python Release Build failing after upgrading to maturin 14.2 [\#87](https://github.com/apache/datafusion-python/issues/87) +- Maturin build hangs on Linux ARM64 [\#84](https://github.com/apache/datafusion-python/issues/84) +- Cannot install on Mac M1 from source tarball from testpypi [\#82](https://github.com/apache/datafusion-python/issues/82) +- ImportPathMismatchError when running pytest locally [\#77](https://github.com/apache/datafusion-python/issues/77) + +**Closed issues:** + +- Publish documentation for Python bindings [\#39](https://github.com/apache/datafusion-python/issues/39) +- Add Python binding for `approx_median` [\#32](https://github.com/apache/datafusion-python/issues/32) +- Release version 0.7.0 [\#7](https://github.com/apache/datafusion-python/issues/7) + +## [0.7.0-rc2](https://github.com/apache/datafusion-python/tree/0.7.0-rc2) (2022-11-26) + +[Full Changelog](https://github.com/apache/datafusion-python/compare/0.7.0...0.7.0-rc2) + +## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) + +**Merged pull requests:** + +- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) +- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) +- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) +- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) +- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) +- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) +- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) +- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) +- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) +- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) +- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) +- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) +- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) +- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) +- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) +- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) +- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) +- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) +- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) +- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) +- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) +- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) +- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) +- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) + +## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) + +## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) + +## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) + +## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) + +**Closed issues:** + +- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) +- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) + +## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) + +**Closed issues:** + +- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) +- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) +- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) +- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) +- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) From 79c22d6d6c0809e7e93a0a23249baa516dbd8d6f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 4 Dec 2024 05:56:11 -0500 Subject: [PATCH 028/248] Search default window functions if no session context was provided (#963) * Search default window functions if no session context was provided * Check if value is None because [] don't trigger the intended behavior --- python/datafusion/dataframe.py | 6 +++--- python/datafusion/functions.py | 1 + src/functions.rs | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index e283f590e..0b38db924 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -446,14 +446,14 @@ def join( left_on = join_keys[0] right_on = join_keys[1] - if on: - if left_on or right_on: + if on is not None: + if left_on is not None or right_on is not None: raise ValueError( "`left_on` or `right_on` should not provided with `on`" ) left_on = on right_on = on - elif left_on or right_on: + elif left_on is not None or right_on is not None: if left_on is None or right_on is None: raise ValueError("`left_on` and `right_on` should both be provided.") else: diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 15ad8822f..f3ee5c092 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -431,6 +431,7 @@ def window( partition_by = expr_list_to_raw_expr_list(partition_by) order_by_raw = sort_list_to_raw_sort_list(order_by) window_frame = window_frame.window_frame if window_frame is not None else None + ctx = ctx.ctx if ctx is not None else None return Expr(f.window(name, args, partition_by, order_by_raw, window_frame, ctx)) diff --git a/src/functions.rs b/src/functions.rs index e29c57f9b..5c450286f 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -16,6 +16,7 @@ // under the License. use datafusion::functions_aggregate::all_default_aggregate_functions; +use datafusion::functions_window::all_default_window_functions; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -282,6 +283,16 @@ fn find_window_fn(name: &str, ctx: Option) -> PyResult Date: Tue, 7 Jan 2025 21:28:25 +0800 Subject: [PATCH 029/248] Add arrow cast (#962) * feat: add data_type parameter to expr_fn macro for arrow_cast function * feat: add arrow_cast function to cast expressions to specified data types * docs: add casting section to user guide with examples for arrow_cast function * test: add unit test for arrow_cast function to validate casting to Float64 and Int32 * fix: update arrow_cast function to accept Expr type for data_type parameter * fix: update test_arrow_cast to use literal casting for data types * fix: update arrow_cast function to accept string type for data_type parameter * fix: update arrow_cast function to accept Expr type for data_type parameter * fix: update test_arrow_cast to use literal for data type parameters * fix: update arrow_cast function to use arg_1 for datatype parameter * fix: update arrow_cast function to accept string type for data_type parameter * Revert "fix: update arrow_cast function to accept string type for data_type parameter" This reverts commit eba0d320820e8f3f9688781f27b2a5579c0e9949. * fix: update test_arrow_cast to cast literals to string type for arrow_cast function * Revert "fix: update test_arrow_cast to cast literals to string type for arrow_cast function" This reverts commit 856ff8c4cad0075c282089b5368a7c3fd17f03d8. * fix: update arrow_cast function to accept string type for data_type parameter * Revert "fix: update arrow_cast function to accept string type for data_type parameter" This reverts commit 9e1ced7fb56c8aec47bc9f540ea5686c7246f022. * fix: add utf8_literal function to create UTF8 literal expressions in tests * Revert "fix: add utf8_literal function to create UTF8 literal expressions in tests" This reverts commit 11ed6749e02ab7b34d47fa105961f088f9fc9245. * feat: add utf8_literal function to create UTF8 literal expressions * fix: update test_arrow_cast to use column 'b' * fix: enhance utf8_literal function to handle non-string values * Add description for utf8_literal vs literal * docs: clarify utf8_literal function documentation to explain use case * docs: add clarification comments for utf8_literal usage in arrow_cast tests * docs: implement ruff recommendation * fix ruff errors * docs: update examples to use utf8_literal in arrow_cast function * docs: correct typo in comment for utf8_literal usage in test_arrow_cast * docs: remove redundant comment in test_arrow_cast for clarity * refactor: rename utf8_literal to string_literal and add alias str_lit * docs: improve docstring for string_literal function for clarity * docs: update import statement to include str_lit alias for string_literal --- .../user-guide/common-operations/functions.rst | 13 ++++++++++++- python/datafusion/__init__.py | 13 +++++++++++++ python/datafusion/expr.py | 16 ++++++++++++++++ python/datafusion/functions.py | 6 ++++++ python/tests/test_functions.py | 18 +++++++++++++++++- src/functions.rs | 3 ++- 6 files changed, 66 insertions(+), 3 deletions(-) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index ad71c72ac..12097be8f 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -38,7 +38,7 @@ DataFusion offers mathematical functions such as :py:func:`~datafusion.functions .. ipython:: python - from datafusion import col, literal + from datafusion import col, literal, string_literal, str_lit from datafusion import functions as f df.select( @@ -104,6 +104,17 @@ This also includes the functions for regular expressions like :py:func:`~datafus f.regexp_replace(col('"Name"'), literal("saur"), literal("fleur")).alias("flowers") ) +Casting +------- + +Casting expressions to different data types using :py:func:`~datafusion.functions.arrow_cast` + +.. ipython:: python + + df.select( + f.arrow_cast(col('"Total"'), string_literal("Float64")).alias("total_as_float"), + f.arrow_cast(col('"Total"'), str_lit("Int32")).alias("total_as_int") + ) Other ----- diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index e0bc57f44..7367b0d3b 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -107,6 +107,19 @@ def literal(value): return Expr.literal(value) +def string_literal(value): + """Create a UTF8 literal expression. + + It differs from `literal` which creates a UTF8view literal. + """ + return Expr.string_literal(value) + + +def str_lit(value): + """Alias for `string_literal`.""" + return string_literal(value) + + def lit(value): """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index b10724381..16add16f4 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -380,6 +380,22 @@ def literal(value: Any) -> Expr: value = pa.scalar(value) return Expr(expr_internal.Expr.literal(value)) + @staticmethod + def string_literal(value: str) -> Expr: + """Creates a new expression representing a UTF8 literal value. + + It is different from `literal` because it is pa.string() instead of + pa.string_view() + + This is needed for cases where DataFusion is expecting a UTF8 instead of + UTF8View literal, like in: + https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 + """ + if isinstance(value, str): + value = pa.scalar(value, type=pa.string()) + return Expr(expr_internal.Expr.literal(value)) + return Expr.literal(value) + @staticmethod def column(value: str) -> Expr: """Creates a new expression representing a column.""" diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index f3ee5c092..c0097c6ab 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -82,6 +82,7 @@ "array_to_string", "array_union", "arrow_typeof", + "arrow_cast", "ascii", "asin", "asinh", @@ -1109,6 +1110,11 @@ def arrow_typeof(arg: Expr) -> Expr: return Expr(f.arrow_typeof(arg.expr)) +def arrow_cast(expr: Expr, data_type: Expr) -> Expr: + """Casts an expression to a specified data type.""" + return Expr(f.arrow_cast(expr.expr, data_type.expr)) + + def random() -> Expr: """Returns a random value in the range ``0.0 <= x < 1.0``.""" return Expr(f.random()) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 0d2fa8f94..5dce188ed 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -23,7 +23,7 @@ from datafusion import SessionContext, column from datafusion import functions as f -from datafusion import literal +from datafusion import literal, string_literal np.seterr(invalid="ignore") @@ -907,6 +907,22 @@ def test_temporal_functions(df): assert result.column(10) == pa.array([31, 26, 2], type=pa.float64()) +def test_arrow_cast(df): + df = df.select( + # we use `string_literal` to return utf8 instead of `literal` which returns + # utf8view because datafusion.arrow_cast expects a utf8 instead of utf8view + # https://github.com/apache/datafusion/blob/86740bfd3d9831d6b7c1d0e1bf4a21d91598a0ac/datafusion/functions/src/core/arrow_cast.rs#L179 + f.arrow_cast(column("b"), string_literal("Float64")).alias("b_as_float"), + f.arrow_cast(column("b"), string_literal("Int32")).alias("b_as_int"), + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + + assert result.column(0) == pa.array([4.0, 5.0, 6.0], type=pa.float64()) + assert result.column(1) == pa.array([4, 5, 6], type=pa.int32()) + + def test_case(df): df = df.select( f.case(column("b")).when(literal(4), literal(10)).otherwise(literal(8)), diff --git a/src/functions.rs b/src/functions.rs index 5c450286f..ccc1981bd 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -400,7 +400,6 @@ macro_rules! expr_fn { } }; } - /// Generates a [pyo3] wrapper for [datafusion::functions::expr_fn] /// /// These functions take a single `Vec` argument using `pyo3(signature = (*args))`. @@ -575,6 +574,7 @@ expr_fn_vec!(r#struct); // Use raw identifier since struct is a keyword expr_fn_vec!(named_struct); expr_fn!(from_unixtime, unixtime); expr_fn!(arrow_typeof, arg_1); +expr_fn!(arrow_cast, arg_1 datatype); expr_fn!(random); // Array Functions @@ -867,6 +867,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(range))?; m.add_wrapped(wrap_pyfunction!(array_agg))?; m.add_wrapped(wrap_pyfunction!(arrow_typeof))?; + m.add_wrapped(wrap_pyfunction!(arrow_cast))?; m.add_wrapped(wrap_pyfunction!(ascii))?; m.add_wrapped(wrap_pyfunction!(asin))?; m.add_wrapped(wrap_pyfunction!(asinh))?; From 85fe35cf433c2168fee40dc48bdaa80126bf4a42 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 7 Jan 2025 05:30:13 -0800 Subject: [PATCH 030/248] Fix small issues in pyproject.toml (#976) * Fix small issues in pyproject.toml * Update classifiers --- pyproject.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d327c0ec1..98bda5aae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ readme = "README.md" license = { file = "LICENSE.txt" } requires-python = ">=3.7" keywords = ["datafusion", "dataframe", "rust", "query-engine"] -classifier = [ +classifiers = [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", @@ -39,10 +39,14 @@ classifier = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python", "Programming Language :: Rust", ] dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"] +dynamic = ["version"] [project.urls] homepage = "https://datafusion.apache.org/python" From 63b13da4bccd66cb474186ebc2c4a1f8ba82230f Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Tue, 7 Jan 2025 14:34:44 +0100 Subject: [PATCH 031/248] chore: set validation and typehint (#983) --- python/datafusion/context.py | 13 ++++++++++++- src/context.rs | 4 ++-- src/dataframe.rs | 21 +-------------------- src/utils.rs | 21 +++++++++++++++++++++ 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index a07b5d175..3fa133346 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -63,6 +63,15 @@ def __arrow_c_array__( # noqa: D105 ) -> tuple[object, object]: ... +class TableProviderExportable(Protocol): + """Type hint for object that has __datafusion_table_provider__ PyCapsule. + + https://datafusion.apache.org/python/user-guide/io/table_provider.html + """ + + def __datafusion_table_provider__(self) -> object: ... # noqa: D105 + + class SessionConfig: """Session configuration options.""" @@ -685,7 +694,9 @@ def deregister_table(self, name: str) -> None: """Remove a table from the session.""" self.ctx.deregister_table(name) - def register_table_provider(self, name: str, provider: Any) -> None: + def register_table_provider( + self, name: str, provider: TableProviderExportable + ) -> None: """Register a table provider. This table provider must have a method called ``__datafusion_table_provider__`` diff --git a/src/context.rs b/src/context.rs index 8675e97df..0512285a7 100644 --- a/src/context.rs +++ b/src/context.rs @@ -43,7 +43,7 @@ use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; use crate::udf::PyScalarUDF; use crate::udwf::PyWindowUDF; -use crate::utils::{get_tokio_runtime, wait_for_future}; +use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; @@ -576,7 +576,7 @@ impl PySessionContext { if provider.hasattr("__datafusion_table_provider__")? { let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; let capsule = capsule.downcast::()?; - // validate_pycapsule(capsule, "arrow_array_stream")?; + validate_pycapsule(capsule, "datafusion_table_provider")?; let provider = unsafe { capsule.reference::() }; let provider: ForeignTableProvider = provider.into(); diff --git a/src/dataframe.rs b/src/dataframe.rs index e7d6ca6d6..fcb46a756 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -44,7 +44,7 @@ use crate::expr::sort_expr::to_sort_expressions; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; -use crate::utils::{get_tokio_runtime, wait_for_future}; +use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; use crate::{ errors::DataFusionError, expr::{sort_expr::PySortExpr, PyExpr}, @@ -724,22 +724,3 @@ fn record_batch_into_schema( RecordBatch::try_new(schema, data_arrays) } - -fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { - let capsule_name = capsule.name()?; - if capsule_name.is_none() { - return Err(PyValueError::new_err( - "Expected schema PyCapsule to have name set.", - )); - } - - let capsule_name = capsule_name.unwrap().to_str()?; - if capsule_name != name { - return Err(PyValueError::new_err(format!( - "Expected name '{}' in PyCapsule, instead got '{}'", - name, capsule_name - ))); - } - - Ok(()) -} diff --git a/src/utils.rs b/src/utils.rs index 7fb23cafe..795589752 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -18,7 +18,9 @@ use crate::errors::DataFusionError; use crate::TokioRuntime; use datafusion::logical_expr::Volatility; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::types::PyCapsule; use std::future::Future; use std::sync::OnceLock; use tokio::runtime::Runtime; @@ -58,3 +60,22 @@ pub(crate) fn parse_volatility(value: &str) -> Result, name: &str) -> PyResult<()> { + let capsule_name = capsule.name()?; + if capsule_name.is_none() { + return Err(PyValueError::new_err( + "Expected schema PyCapsule to have name set.", + )); + } + + let capsule_name = capsule_name.unwrap().to_str()?; + if capsule_name != name { + return Err(PyValueError::new_err(format!( + "Expected name '{}' in PyCapsule, instead got '{}'", + name, capsule_name + ))); + } + + Ok(()) +} From 389164aa90c8dbe689f2e8eac0677ef2b80aaad9 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 9 Jan 2025 05:39:31 +0800 Subject: [PATCH 032/248] feat: support enable_url_table config (#980) * feat: support enable_url_table config * change enable_url_table as method * Remove whitespace --------- Co-authored-by: Tim Saucer --- examples/create-context.py | 3 +++ python/datafusion/context.py | 11 +++++++++++ src/context.rs | 6 ++++++ 3 files changed, 20 insertions(+) diff --git a/examples/create-context.py b/examples/create-context.py index 3184d4085..11525d8b8 100644 --- a/examples/create-context.py +++ b/examples/create-context.py @@ -37,3 +37,6 @@ ) ctx = SessionContext(config, runtime) print(ctx) + +ctx = ctx.enable_url_table() +print(ctx) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 3fa133346..6d7f574c4 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -481,6 +481,17 @@ def __init__( self.ctx = SessionContextInternal(config, runtime) + def enable_url_table(self) -> "SessionContext": + """Control if local files can be queried as tables. + + Returns: + A new :py:class:`SessionContext` object with url table enabled. + """ + klass = self.__class__ + obj = klass.__new__(klass) + obj.ctx = self.ctx.enable_url_table() + return obj + def register_object_store( self, schema: str, store: Any, host: str | None = None ) -> None: diff --git a/src/context.rs b/src/context.rs index 0512285a7..88c90e0fd 100644 --- a/src/context.rs +++ b/src/context.rs @@ -299,6 +299,12 @@ impl PySessionContext { }) } + pub fn enable_url_table(&self) -> PyResult { + Ok(PySessionContext { + ctx: self.ctx.clone().enable_url_table(), + }) + } + /// Register an object store with the given name #[pyo3(signature = (scheme, store, host=None))] pub fn register_object_store( From 4b262be15202f5efb9a963faf66452f7fb0bbad3 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 9 Jan 2025 03:54:38 -0800 Subject: [PATCH 033/248] Support async iteration of RecordBatchStream (#975) * Support async iteration of RecordBatchStream * use __anext__ * use await * fix failing test * Since we are raising an error instead of returning a None, we can update the type hint. --------- Co-authored-by: Tim Saucer --- Cargo.lock | 14 +++++++++ Cargo.toml | 3 +- python/datafusion/record_batch.py | 16 ++++++---- python/tests/test_dataframe.py | 4 +-- src/record_batch.rs | 51 +++++++++++++++++++++++++------ 5 files changed, 69 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d1f291be9..352771cdb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1303,6 +1303,7 @@ dependencies = [ "prost", "prost-types", "pyo3", + "pyo3-async-runtimes", "pyo3-build-config", "tokio", "url", @@ -2672,6 +2673,19 @@ dependencies = [ "unindent", ] +[[package]] +name = "pyo3-async-runtimes" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2529f0be73ffd2be0cc43c013a640796558aa12d7ca0aab5cc14f375b4733031" +dependencies = [ + "futures", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", +] + [[package]] name = "pyo3-build-config" version = "0.22.6" diff --git a/Cargo.toml b/Cargo.toml index 703fc5a26..d28844685 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.41", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} arrow = { version = "53", features = ["pyarrow"] } datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } datafusion-substrait = { version = "43.0.0", optional = true } @@ -60,4 +61,4 @@ crate-type = ["cdylib", "rlib"] [profile.release] lto = true -codegen-units = 1 \ No newline at end of file +codegen-units = 1 diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py index 44936f7d8..75e58998f 100644 --- a/python/datafusion/record_batch.py +++ b/python/datafusion/record_batch.py @@ -57,20 +57,24 @@ def __init__(self, record_batch_stream: df_internal.RecordBatchStream) -> None: """This constructor is typically not called by the end user.""" self.rbs = record_batch_stream - def next(self) -> RecordBatch | None: + def next(self) -> RecordBatch: """See :py:func:`__next__` for the iterator function.""" - try: - next_batch = next(self) - except StopIteration: - return None + return next(self) - return next_batch + async def __anext__(self) -> RecordBatch: + """Async iterator function.""" + next_batch = await self.rbs.__anext__() + return RecordBatch(next_batch) def __next__(self) -> RecordBatch: """Iterator function.""" next_batch = next(self.rbs) return RecordBatch(next_batch) + def __aiter__(self) -> typing_extensions.Self: + """Async iterator function.""" + return self + def __iter__(self) -> typing_extensions.Self: """Iterator function.""" return self diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index b82f95e35..e3bd1b2a5 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -761,8 +761,8 @@ def test_execution_plan(aggregate_df): batch = stream.next() assert batch is not None # there should be no more batches - batch = stream.next() - assert batch is None + with pytest.raises(StopIteration): + stream.next() def test_repartition(df): diff --git a/src/record_batch.rs b/src/record_batch.rs index 427807f22..eacdb5867 100644 --- a/src/record_batch.rs +++ b/src/record_batch.rs @@ -15,13 +15,17 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use crate::utils::wait_for_future; use datafusion::arrow::pyarrow::ToPyArrow; use datafusion::arrow::record_batch::RecordBatch; use datafusion::physical_plan::SendableRecordBatchStream; use futures::StreamExt; +use pyo3::exceptions::{PyStopAsyncIteration, PyStopIteration}; use pyo3::prelude::*; use pyo3::{pyclass, pymethods, PyObject, PyResult, Python}; +use tokio::sync::Mutex; #[pyclass(name = "RecordBatch", module = "datafusion", subclass)] pub struct PyRecordBatch { @@ -43,31 +47,58 @@ impl From for PyRecordBatch { #[pyclass(name = "RecordBatchStream", module = "datafusion", subclass)] pub struct PyRecordBatchStream { - stream: SendableRecordBatchStream, + stream: Arc>, } impl PyRecordBatchStream { pub fn new(stream: SendableRecordBatchStream) -> Self { - Self { stream } + Self { + stream: Arc::new(Mutex::new(stream)), + } } } #[pymethods] impl PyRecordBatchStream { - fn next(&mut self, py: Python) -> PyResult> { - let result = self.stream.next(); - match wait_for_future(py, result) { - None => Ok(None), - Some(Ok(b)) => Ok(Some(b.into())), - Some(Err(e)) => Err(e.into()), - } + fn next(&mut self, py: Python) -> PyResult { + let stream = self.stream.clone(); + wait_for_future(py, next_stream(stream, true)) } - fn __next__(&mut self, py: Python) -> PyResult> { + fn __next__(&mut self, py: Python) -> PyResult { self.next(py) } + fn __anext__<'py>(&'py self, py: Python<'py>) -> PyResult> { + let stream = self.stream.clone(); + pyo3_async_runtimes::tokio::future_into_py(py, next_stream(stream, false)) + } + fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { slf } + + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } +} + +async fn next_stream( + stream: Arc>, + sync: bool, +) -> PyResult { + let mut stream = stream.lock().await; + match stream.next().await { + Some(Ok(batch)) => Ok(batch.into()), + Some(Err(e)) => Err(e.into()), + None => { + // Depending on whether the iteration is sync or not, we raise either a + // StopIteration or a StopAsyncIteration + if sync { + Err(PyStopIteration::new_err("stream exhausted")) + } else { + Err(PyStopAsyncIteration::new_err("stream exhausted")) + } + } + } } From db1bc62999f559d515a6a8a7f2194ab6d20b3035 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 9 Jan 2025 12:34:23 -0500 Subject: [PATCH 034/248] Chore/upgrade datafusion 44 (#973) * Bump DataFusion version to 44 * Trait definition for plan properties now returns LexOrdering * find_df_window_func was removed upstream * Prepare and Execute variants were removed from LogicalPlan * Substrait functions now take SessionState instead of SessionContext * Remove unused import * RuntimeConfig is now deprecated * Switch from RuntimeConfig to RuntimeEnvBuilder * Update return types on unit tests * DF 44 changes the execution plan properties to have boundedness and emission type * Initcap now returns stringview * Bump datafusion version in example --- Cargo.lock | 783 +++++++++--------- Cargo.toml | 9 +- README.md | 2 +- benchmarks/db-benchmark/groupby-datafusion.py | 6 +- benchmarks/tpch/tpch.py | 2 +- docs/source/user-guide/configuration.rst | 8 +- examples/create-context.py | 4 +- examples/ffi-table-provider/Cargo.toml | 4 +- python/datafusion/__init__.py | 4 +- python/datafusion/context.py | 55 +- python/tests/test_context.py | 10 +- python/tests/test_functions.py | 18 +- src/context.rs | 64 +- src/dataset_exec.rs | 20 +- src/functions.rs | 11 +- src/lib.rs | 2 +- src/sql/logical.rs | 2 - src/substrait.rs | 6 +- src/udwf.rs | 6 - 19 files changed, 529 insertions(+), 487 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 352771cdb..105cc30c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -132,20 +132,20 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] name = "apache-avro" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceb7c683b2f8f40970b70e39ff8be514c95b96fcb9c4af87e1ed2cb2e10801a0" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" dependencies = [ - "bzip2", + "bigdecimal", + "bzip2 0.4.4", "crc32fast", "digest", - "lazy_static", "libflate", "log", "num-bigint", @@ -153,15 +153,16 @@ dependencies = [ "rand", "regex-lite", "serde", + "serde_bytes", "serde_json", "snap", - "strum 0.25.0", - "strum_macros 0.25.3", + "strum", + "strum_macros", "thiserror 1.0.69", "typed-builder", "uuid", "xz2", - "zstd 0.12.4", + "zstd", ] [[package]] @@ -415,16 +416,15 @@ version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ - "bzip2", + "bzip2 0.4.4", "flate2", "futures-core", - "futures-io", "memchr", "pin-project-lite", "tokio", "xz2", - "zstd 0.13.2", - "zstd-safe 7.2.1", + "zstd", + "zstd-safe", ] [[package]] @@ -444,18 +444,18 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -506,6 +506,20 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bigdecimal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -598,6 +612,16 @@ dependencies = [ "libc", ] +[[package]] +name = "bzip2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -611,9 +635,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.2" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" +checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" dependencies = [ "jobserver", "libc", @@ -634,9 +658,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", @@ -681,8 +705,8 @@ version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "strum", + "strum_macros", "unicode-width", ] @@ -708,9 +732,9 @@ dependencies = [ [[package]] name = "const_panic" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "013b6c2c3a14d678f38cd23994b02da3a1a1b6a5d1eedddfe63a5a5f11b13a81" +checksum = "2459fc9262a1aa204eb4b5764ad4f189caec88aea9634389c0a25f8be7f6265e" [[package]] name = "constant_time_eq" @@ -778,18 +802,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" @@ -850,11 +874,10 @@ dependencies = [ [[package]] name = "datafusion" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9" dependencies = [ - "ahash", "apache-avro", "arrow", "arrow-array", @@ -863,7 +886,7 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.5.0", "chrono", "dashmap", "datafusion-catalog", @@ -874,6 +897,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -884,19 +908,14 @@ dependencies = [ "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", "itertools", "log", "num-traits", - "num_cpus", "object_store", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", + "regex", "sqlparser", "tempfile", "tokio", @@ -904,14 +923,14 @@ dependencies = [ "url", "uuid", "xz2", - "zstd 0.13.2", + "zstd", ] [[package]] name = "datafusion-catalog" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e" dependencies = [ "arrow-schema", "async-trait", @@ -924,9 +943,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b" dependencies = [ "ahash", "apache-avro", @@ -934,44 +953,48 @@ dependencies = [ "arrow-array", "arrow-buffer", "arrow-schema", - "chrono", "half", "hashbrown 0.14.5", "indexmap", - "instant", "libc", - "num_cpus", + "log", "object_store", "parquet", "paste", "pyo3", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +checksum = "72fbf14d4079f7ce5306393084fe5057dddfdc2113577e0049310afa12e94281" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3" + [[package]] name = "datafusion-execution" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486" dependencies = [ "arrow", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store", "parking_lot", @@ -982,45 +1005,41 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", - "strum 0.26.3", - "strum_macros 0.26.4", ] [[package]] name = "datafusion-expr-common" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55" dependencies = [ "arrow", "datafusion-common", "itertools", - "paste", ] [[package]] name = "datafusion-ffi" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +checksum = "114e944790756b84c2cc5971eae24f5430980149345601939ac222885d4db5f7" dependencies = [ "abi_stable", "arrow", @@ -1028,7 +1047,6 @@ dependencies = [ "async-trait", "datafusion", "datafusion-proto", - "doc-comment", "futures", "log", "prost", @@ -1036,9 +1054,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da" dependencies = [ "arrow", "arrow-buffer", @@ -1047,8 +1065,11 @@ dependencies = [ "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools", @@ -1063,44 +1084,44 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9" dependencies = [ "ahash", "arrow", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap", "log", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5" dependencies = [ "arrow", "arrow-array", @@ -1116,18 +1137,35 @@ dependencies = [ "itertools", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", "datafusion-functions-window-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "log", @@ -1136,48 +1174,54 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "44.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" +dependencies = [ + "quote", + "syn 2.0.95", +] + [[package]] name = "datafusion-optimizer" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", "itertools", "log", - "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1194,39 +1238,40 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools", ] [[package]] name = "datafusion-physical-optimizer" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools", + "log", + "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da" dependencies = [ "ahash", "arrow", @@ -1240,7 +1285,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1250,18 +1294,16 @@ dependencies = [ "indexmap", "itertools", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-proto" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" +checksum = "8e23b0998195e495bfa7b37cdceb317129a6c40522219f6872d2e0c9ae9f4fcb" dependencies = [ "arrow", "chrono", @@ -1275,14 +1317,12 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" +checksum = "cfc59992a29eed2d2c1dd779deac99083b217774ebcf90ee121840607a4d866f" dependencies = [ "arrow", - "chrono", "datafusion-common", - "object_store", "prost", ] @@ -1294,7 +1334,6 @@ dependencies = [ "async-trait", "datafusion", "datafusion-ffi", - "datafusion-functions-window-common", "datafusion-proto", "datafusion-substrait", "futures", @@ -1312,30 +1351,32 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc" dependencies = [ "arrow", "arrow-array", "arrow-schema", + "bigdecimal", "datafusion-common", "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", - "strum 0.26.3", ] [[package]] name = "datafusion-substrait" -version = "43.0.0" +version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9c768d2b4c4485c43afbaeeb86dd1f2ac3fb34a9e6e8c8b06180d2a223d5ba" +checksum = "d2ec36dd38512b1ecc7a3bb92e72046b944611b2f0d709445c1e51b0143bffd4" dependencies = [ "arrow-buffer", "async-recursion", + "async-trait", "chrono", "datafusion", "itertools", @@ -1365,15 +1406,9 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "dyn-clone" version = "1.0.17" @@ -1404,9 +1439,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fixedbitset" @@ -1416,9 +1451,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "24.3.25" +version = "24.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1505,7 +1540,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -1578,9 +1613,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "h2" @@ -1628,24 +1663,12 @@ version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hex" version = "0.4.3" @@ -1654,9 +1677,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -1700,9 +1723,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", @@ -1720,9 +1743,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.3" +version = "0.27.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" dependencies = [ "futures-util", "http", @@ -1893,7 +1916,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -1919,9 +1942,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1933,18 +1956,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -1983,9 +1994,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.74" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ "once_cell", "wasm-bindgen", @@ -1999,9 +2010,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lexical-core" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -2012,9 +2023,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -2023,9 +2034,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ "lexical-util", "static_assertions", @@ -2033,18 +2044,18 @@ dependencies = [ [[package]] name = "lexical-util" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" dependencies = [ "lexical-util", "lexical-write-integer", @@ -2053,9 +2064,9 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" dependencies = [ "lexical-util", "static_assertions", @@ -2063,9 +2074,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.167" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libflate" @@ -2119,9 +2130,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "litemap" @@ -2207,9 +2218,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ "adler2", ] @@ -2253,6 +2264,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -2305,36 +2317,27 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" -version = "0.36.5" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" +checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" dependencies = [ "async-trait", "base64 0.22.1", "bytes", "chrono", "futures", + "httparse", "humantime", "hyper", "itertools", @@ -2431,7 +2434,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.13.2", + "zstd", "zstd-sys", ] @@ -2466,7 +2469,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.5.0", + "heck", "itertools", "prost", "prost-types", @@ -2505,18 +2508,18 @@ dependencies = [ [[package]] name = "phf" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ "phf_generator", "phf_shared", @@ -2524,9 +2527,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", "rand", @@ -2534,18 +2537,18 @@ dependencies = [ [[package]] name = "phf_shared" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ "siphasher", ] [[package]] name = "pin-project-lite" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -2576,12 +2579,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.25" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" dependencies = [ "proc-macro2", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2595,9 +2598,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", "prost-derive", @@ -2605,12 +2608,11 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" dependencies = [ - "bytes", - "heck 0.5.0", + "heck", "itertools", "log", "multimap", @@ -2620,28 +2622,28 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.90", + "syn 2.0.95", "tempfile", ] [[package]] name = "prost-derive" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "prost-types" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" dependencies = [ "prost", ] @@ -2655,6 +2657,15 @@ dependencies = [ "cmake", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "pyo3" version = "0.22.6" @@ -2715,7 +2726,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2724,11 +2735,11 @@ version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -2739,9 +2750,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" -version = "0.36.2" +version = "0.37.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" dependencies = [ "memchr", "serde", @@ -2760,7 +2771,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror 2.0.3", + "thiserror 2.0.10", "tokio", "tracing", ] @@ -2779,7 +2790,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.3", + "thiserror 2.0.10", "tinyvec", "tracing", "web-time", @@ -2787,9 +2798,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.7" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" +checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" dependencies = [ "cfg_aliases", "libc", @@ -2801,9 +2812,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -2838,11 +2849,31 @@ dependencies = [ "getrandom", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.95", +] + [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] @@ -2903,9 +2934,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.9" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ "base64 0.22.1", "bytes", @@ -2937,6 +2968,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", + "tower", "tower-service", "url", "wasm-bindgen", @@ -2990,22 +3022,22 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.41" +version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "rustls" -version = "0.23.19" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "once_cell", "ring", @@ -3038,9 +3070,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" dependencies = [ "web-time", ] @@ -3058,9 +3090,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" @@ -3107,7 +3139,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3118,9 +3150,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "3.0.1" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" +checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ "bitflags 2.6.0", "core-foundation", @@ -3131,9 +3163,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.1" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" dependencies = [ "core-foundation-sys", "libc", @@ -3141,9 +3173,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" dependencies = [ "serde", ] @@ -3156,22 +3188,31 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3182,14 +3223,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.135" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" dependencies = [ "itoa", "memchr", @@ -3206,7 +3247,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3253,9 +3294,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "siphasher" -version = "0.3.11" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "slab" @@ -3287,10 +3328,10 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3317,9 +3358,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.51.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -3327,13 +3368,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3343,38 +3384,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] -name = "static_assertions" -version = "1.1.0" +name = "stacker" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] [[package]] -name = "strum" -version = "0.25.0" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros 0.26.4", -] - -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.90", -] [[package]] name = "strum_macros" @@ -3382,20 +3414,20 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "substrait" -version = "0.45.5" +version = "0.50.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a127ae9d8e443cea5c2122eb2ffe5fe489e802a1e746a09c5a5cb59d074c0aeb" +checksum = "b1772d041c37cc7e6477733c76b2acf4ee36bd52b2ae4d9ea0ec9c87d003db32" dependencies = [ - "heck 0.5.0", + "heck", "pbjson", "pbjson-build", "pbjson-types", @@ -3410,7 +3442,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.90", + "syn 2.0.95", "typify", "walkdir", ] @@ -3434,9 +3466,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.90" +version = "2.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" dependencies = [ "proc-macro2", "quote", @@ -3460,7 +3492,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3471,12 +3503,13 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.14.0" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" dependencies = [ "cfg-if", "fastrand", + "getrandom", "once_cell", "rustix", "windows-sys 0.59.0", @@ -3493,11 +3526,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.3" +version = "2.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +checksum = "a3ac7f54ca534db81081ef1c1e7f6ea8a3ef428d2fc069097c079443d24124d3" dependencies = [ - "thiserror-impl 2.0.3", + "thiserror-impl 2.0.10", ] [[package]] @@ -3508,18 +3541,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "thiserror-impl" -version = "2.0.3" +version = "2.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +checksum = "9e9465d30713b56a37ede7185763c3492a91be2f5fa68d958c44e41ab9248beb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3554,9 +3587,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" dependencies = [ "tinyvec_macros", ] @@ -3569,9 +3602,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.1" +version = "1.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" dependencies = [ "backtrace", "bytes", @@ -3585,31 +3618,30 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ "rustls", - "rustls-pki-types", "tokio", ] [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -3618,6 +3650,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.3" @@ -3643,7 +3696,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3694,22 +3747,22 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" [[package]] name = "typed-builder" -version = "0.16.2" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34085c17941e36627a879208083e25d357243812c30e7d7387c3b954f30ade16" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" dependencies = [ "typed-builder-macro", ] [[package]] name = "typed-builder-macro" -version = "0.16.2" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -3734,7 +3787,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" dependencies = [ - "heck 0.5.0", + "heck", "log", "proc-macro2", "quote", @@ -3743,7 +3796,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.90", + "syn 2.0.95", "thiserror 1.0.69", "unicode-ident", ] @@ -3761,7 +3814,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.90", + "syn 2.0.95", "typify-impl", ] @@ -3867,9 +3920,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -3878,24 +3931,23 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.47" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", @@ -3906,9 +3958,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3916,22 +3968,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "wasm-streams" @@ -3948,9 +4000,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.74" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", @@ -4159,7 +4211,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "synstructure", ] @@ -4181,7 +4233,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", ] [[package]] @@ -4201,7 +4253,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.95", "synstructure", ] @@ -4230,16 +4282,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", -] - -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe 6.0.6", + "syn 2.0.95", ] [[package]] @@ -4248,17 +4291,7 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "zstd-safe 7.2.1", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d28844685..48219414a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,11 +38,10 @@ tokio = { version = "1.41", features = ["macros", "rt", "rt-multi-thread", "sync pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "43.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-substrait = { version = "43.0.0", optional = true } -datafusion-proto = { version = "43.0.0" } -datafusion-ffi = { version = "43.0.0" } -datafusion-functions-window-common = { version = "43.0.0" } +datafusion = { version = "44.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-substrait = { version = "44.0.0", optional = true } +datafusion-proto = { version = "44.0.0" } +datafusion-ffi = { version = "44.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.11", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } diff --git a/README.md b/README.md index 83b307e7a..ca612c1ab 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ It is possible to configure runtime (memory and disk settings) and configuration ```python runtime = ( - RuntimeConfig() + RuntimeEnvBuilder() .with_disk_manager_os() .with_fair_spill_pool(10000000) ) diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 3a4399f7d..960c8ba9a 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -22,7 +22,7 @@ from datafusion import ( col, functions as f, - RuntimeConfig, + RuntimeEnvBuilder, SessionConfig, SessionContext, ) @@ -85,7 +85,9 @@ def execute(df): # create a session context with explicit runtime and config settings runtime = ( - RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(64 * 1024 * 1024 * 1024) + RuntimeEnvBuilder() + .with_disk_manager_os() + .with_fair_spill_pool(64 * 1024 * 1024 * 1024) ) config = ( SessionConfig() diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index 7f104a4cb..daa831b55 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -28,7 +28,7 @@ def bench(data_path, query_path): # create context # runtime = ( - # RuntimeConfig() + # RuntimeEnvBuilder() # .with_disk_manager_os() # .with_fair_spill_pool(10000000) # ) diff --git a/docs/source/user-guide/configuration.rst b/docs/source/user-guide/configuration.rst index 7d330019f..db200a46a 100644 --- a/docs/source/user-guide/configuration.rst +++ b/docs/source/user-guide/configuration.rst @@ -19,18 +19,18 @@ Configuration ============= Let's look at how we can configure DataFusion. When creating a :py:class:`~datafusion.context.SessionContext`, you can pass in -a :py:class:`~datafusion.context.SessionConfig` and :py:class:`~datafusion.context.RuntimeConfig` object. These two cover a wide range of options. +a :py:class:`~datafusion.context.SessionConfig` and :py:class:`~datafusion.context.RuntimeEnvBuilder` object. These two cover a wide range of options. .. code-block:: python - from datafusion import RuntimeConfig, SessionConfig, SessionContext + from datafusion import RuntimeEnvBuilder, SessionConfig, SessionContext # create a session context with default settings ctx = SessionContext() print(ctx) # create a session context with explicit runtime and config settings - runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000) + runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() .with_create_default_catalog_and_schema(True) @@ -48,4 +48,4 @@ a :py:class:`~datafusion.context.SessionConfig` and :py:class:`~datafusion.conte You can read more about available :py:class:`~datafusion.context.SessionConfig` options in the `rust DataFusion Configuration guide `_, -and about :code:`RuntimeConfig` options in the rust `online API documentation `_. +and about :code:`RuntimeEnvBuilder` options in the rust `online API documentation `_. diff --git a/examples/create-context.py b/examples/create-context.py index 11525d8b8..760c8513e 100644 --- a/examples/create-context.py +++ b/examples/create-context.py @@ -15,14 +15,14 @@ # specific language governing permissions and limitations # under the License. -from datafusion import RuntimeConfig, SessionConfig, SessionContext +from datafusion import RuntimeEnvBuilder, SessionConfig, SessionContext # create a session context with default settings ctx = SessionContext() print(ctx) # create a session context with explicit runtime and config settings -runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000) +runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() .with_create_default_catalog_and_schema(True) diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml index 4e54eaf03..4e6f91f33 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-table-provider/Cargo.toml @@ -21,8 +21,8 @@ version = "0.1.0" edition = "2021" [dependencies] -datafusion = { version = "43.0.0" } -datafusion-ffi = { version = "43.0.0" } +datafusion = { version = "44.0.0" } +datafusion-ffi = { version = "44.0.0" } pyo3 = { version = "0.22.6", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "53.2.0" } arrow-array = { version = "53.2.0" } diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 7367b0d3b..2d8db42c8 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -29,7 +29,7 @@ from .context import ( SessionContext, SessionConfig, - RuntimeConfig, + RuntimeEnvBuilder, SQLOptions, ) @@ -66,7 +66,7 @@ "SessionContext", "SessionConfig", "SQLOptions", - "RuntimeConfig", + "RuntimeEnvBuilder", "Expr", "ScalarUDF", "WindowFrame", diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 6d7f574c4..3c284c9f9 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -20,7 +20,7 @@ from __future__ import annotations from ._internal import SessionConfig as SessionConfigInternal -from ._internal import RuntimeConfig as RuntimeConfigInternal +from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal from ._internal import SQLOptions as SQLOptionsInternal from ._internal import SessionContext as SessionContextInternal @@ -265,39 +265,41 @@ def set(self, key: str, value: str) -> SessionConfig: return self -class RuntimeConfig: +class RuntimeEnvBuilder: """Runtime configuration options.""" def __init__(self) -> None: - """Create a new :py:class:`RuntimeConfig` with default values.""" - self.config_internal = RuntimeConfigInternal() + """Create a new :py:class:`RuntimeEnvBuilder` with default values.""" + self.config_internal = RuntimeEnvBuilderInternal() - def with_disk_manager_disabled(self) -> RuntimeConfig: + def with_disk_manager_disabled(self) -> RuntimeEnvBuilder: """Disable the disk manager, attempts to create temporary files will error. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. """ self.config_internal = self.config_internal.with_disk_manager_disabled() return self - def with_disk_manager_os(self) -> RuntimeConfig: + def with_disk_manager_os(self) -> RuntimeEnvBuilder: """Use the operating system's temporary directory for disk manager. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. """ self.config_internal = self.config_internal.with_disk_manager_os() return self - def with_disk_manager_specified(self, *paths: str | pathlib.Path) -> RuntimeConfig: + def with_disk_manager_specified( + self, *paths: str | pathlib.Path + ) -> RuntimeEnvBuilder: """Use the specified paths for the disk manager's temporary files. Args: paths: Paths to use for the disk manager's temporary files. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. """ paths_list = [str(p) for p in paths] self.config_internal = self.config_internal.with_disk_manager_specified( @@ -305,16 +307,16 @@ def with_disk_manager_specified(self, *paths: str | pathlib.Path) -> RuntimeConf ) return self - def with_unbounded_memory_pool(self) -> RuntimeConfig: + def with_unbounded_memory_pool(self) -> RuntimeEnvBuilder: """Use an unbounded memory pool. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. """ self.config_internal = self.config_internal.with_unbounded_memory_pool() return self - def with_fair_spill_pool(self, size: int) -> RuntimeConfig: + def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder: """Use a fair spill pool with the specified size. This pool works best when you know beforehand the query has multiple spillable @@ -335,16 +337,16 @@ def with_fair_spill_pool(self, size: int) -> RuntimeConfig: size: Size of the memory pool in bytes. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. Examples usage:: - config = RuntimeConfig().with_fair_spill_pool(1024) + config = RuntimeEnvBuilder().with_fair_spill_pool(1024) """ self.config_internal = self.config_internal.with_fair_spill_pool(size) return self - def with_greedy_memory_pool(self, size: int) -> RuntimeConfig: + def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder: """Use a greedy memory pool with the specified size. This pool works well for queries that do not need to spill or have a single @@ -355,32 +357,39 @@ def with_greedy_memory_pool(self, size: int) -> RuntimeConfig: size: Size of the memory pool in bytes. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. Example usage:: - config = RuntimeConfig().with_greedy_memory_pool(1024) + config = RuntimeEnvBuilder().with_greedy_memory_pool(1024) """ self.config_internal = self.config_internal.with_greedy_memory_pool(size) return self - def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeConfig: + def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder: """Use the specified path to create any needed temporary files. Args: path: Path to use for temporary files. Returns: - A new :py:class:`RuntimeConfig` object with the updated setting. + A new :py:class:`RuntimeEnvBuilder` object with the updated setting. Example usage:: - config = RuntimeConfig().with_temp_file_path("/tmp") + config = RuntimeEnvBuilder().with_temp_file_path("/tmp") """ self.config_internal = self.config_internal.with_temp_file_path(str(path)) return self +@deprecated("Use `RuntimeEnvBuilder` instead.") +class RuntimeConfig(RuntimeEnvBuilder): + """See `RuntimeEnvBuilder`.""" + + pass + + class SQLOptions: """Options to be used when performing SQL queries.""" @@ -454,7 +463,9 @@ class SessionContext: """ def __init__( - self, config: SessionConfig | None = None, runtime: RuntimeConfig | None = None + self, + config: SessionConfig | None = None, + runtime: RuntimeEnvBuilder | None = None, ) -> None: """Main interface for executing queries with DataFusion. diff --git a/python/tests/test_context.py b/python/tests/test_context.py index ab86faa9d..10e8ad0e9 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -25,7 +25,7 @@ from datafusion import ( DataFrame, - RuntimeConfig, + RuntimeEnvBuilder, SessionConfig, SessionContext, SQLOptions, @@ -43,7 +43,7 @@ def test_create_context_session_config_only(): def test_create_context_runtime_config_only(): - SessionContext(runtime=RuntimeConfig()) + SessionContext(runtime=RuntimeEnvBuilder()) @pytest.mark.parametrize("path_to_str", (True, False)) @@ -54,7 +54,7 @@ def test_runtime_configs(tmp_path, path_to_str): path1 = str(path1) if path_to_str else path1 path2 = str(path2) if path_to_str else path2 - runtime = RuntimeConfig().with_disk_manager_specified(path1, path2) + runtime = RuntimeEnvBuilder().with_disk_manager_specified(path1, path2) config = SessionConfig().with_default_catalog_and_schema("foo", "bar") ctx = SessionContext(config, runtime) assert ctx is not None @@ -67,7 +67,7 @@ def test_runtime_configs(tmp_path, path_to_str): def test_temporary_files(tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path - runtime = RuntimeConfig().with_temp_file_path(path) + runtime = RuntimeEnvBuilder().with_temp_file_path(path) config = SessionConfig().with_default_catalog_and_schema("foo", "bar") ctx = SessionContext(config, runtime) assert ctx is not None @@ -77,7 +77,7 @@ def test_temporary_files(tmp_path, path_to_str): def test_create_context_with_all_valid_args(): - runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000) + runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() .with_create_default_catalog_and_schema(True) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 5dce188ed..01c6c9cef 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -103,8 +103,11 @@ def test_lit_arith(df): result = df.collect() assert len(result) == 1 result = result[0] + assert result.column(0) == pa.array([5, 6, 7]) - assert result.column(1) == pa.array(["Hello!", "World!", "!!"]) + assert result.column(1) == pa.array( + ["Hello!", "World!", "!!"], type=pa.string_view() + ) def test_math_functions(): @@ -661,9 +664,12 @@ def test_array_function_obj_tests(stmt, py_expr): ), ( f.concat(column("a").cast(pa.string()), literal("?")), - pa.array(["Hello?", "World?", "!?"]), + pa.array(["Hello?", "World?", "!?"], type=pa.string_view()), + ), + ( + f.initcap(column("c")), + pa.array(["Hello ", " World ", " !"], type=pa.string_view()), ), - (f.initcap(column("c")), pa.array(["Hello ", " World ", " !"])), (f.left(column("a"), literal(3)), pa.array(["Hel", "Wor", "!"])), (f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())), (f.lower(column("a")), pa.array(["hello", "world", "!"])), @@ -871,8 +877,8 @@ def test_temporal_functions(df): result = df.collect() assert len(result) == 1 result = result[0] - assert result.column(0) == pa.array([12, 6, 7], type=pa.float64()) - assert result.column(1) == pa.array([2022, 2027, 2020], type=pa.float64()) + assert result.column(0) == pa.array([12, 6, 7], type=pa.int32()) + assert result.column(1) == pa.array([2022, 2027, 2020], type=pa.int32()) assert result.column(2) == pa.array( [datetime(2022, 12, 1), datetime(2027, 6, 1), datetime(2020, 7, 1)], type=pa.timestamp("us"), @@ -904,7 +910,7 @@ def test_temporal_functions(df): assert result.column(9) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) - assert result.column(10) == pa.array([31, 26, 2], type=pa.float64()) + assert result.column(10) == pa.array([31, 26, 2], type=pa.int32()) def test_arrow_cast(df): diff --git a/src/context.rs b/src/context.rs index 88c90e0fd..bab7fd42a 100644 --- a/src/context.rs +++ b/src/context.rs @@ -62,7 +62,7 @@ use datafusion::execution::context::{ use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, UnboundedMemoryPool}; use datafusion::execution::options::ReadOptions; -use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, @@ -165,62 +165,62 @@ impl PySessionConfig { } /// Runtime options for a SessionContext -#[pyclass(name = "RuntimeConfig", module = "datafusion", subclass)] +#[pyclass(name = "RuntimeEnvBuilder", module = "datafusion", subclass)] #[derive(Clone)] -pub struct PyRuntimeConfig { - pub config: RuntimeConfig, +pub struct PyRuntimeEnvBuilder { + pub builder: RuntimeEnvBuilder, } #[pymethods] -impl PyRuntimeConfig { +impl PyRuntimeEnvBuilder { #[new] fn new() -> Self { Self { - config: RuntimeConfig::default(), + builder: RuntimeEnvBuilder::default(), } } fn with_disk_manager_disabled(&self) -> Self { - let config = self.config.clone(); - let config = config.with_disk_manager(DiskManagerConfig::Disabled); - Self { config } + let mut builder = self.builder.clone(); + builder = builder.with_disk_manager(DiskManagerConfig::Disabled); + Self { builder } } fn with_disk_manager_os(&self) -> Self { - let config = self.config.clone(); - let config = config.with_disk_manager(DiskManagerConfig::NewOs); - Self { config } + let builder = self.builder.clone(); + let builder = builder.with_disk_manager(DiskManagerConfig::NewOs); + Self { builder } } fn with_disk_manager_specified(&self, paths: Vec) -> Self { - let config = self.config.clone(); + let builder = self.builder.clone(); let paths = paths.iter().map(|s| s.into()).collect(); - let config = config.with_disk_manager(DiskManagerConfig::NewSpecified(paths)); - Self { config } + let builder = builder.with_disk_manager(DiskManagerConfig::NewSpecified(paths)); + Self { builder } } fn with_unbounded_memory_pool(&self) -> Self { - let config = self.config.clone(); - let config = config.with_memory_pool(Arc::new(UnboundedMemoryPool::default())); - Self { config } + let builder = self.builder.clone(); + let builder = builder.with_memory_pool(Arc::new(UnboundedMemoryPool::default())); + Self { builder } } fn with_fair_spill_pool(&self, size: usize) -> Self { - let config = self.config.clone(); - let config = config.with_memory_pool(Arc::new(FairSpillPool::new(size))); - Self { config } + let builder = self.builder.clone(); + let builder = builder.with_memory_pool(Arc::new(FairSpillPool::new(size))); + Self { builder } } fn with_greedy_memory_pool(&self, size: usize) -> Self { - let config = self.config.clone(); - let config = config.with_memory_pool(Arc::new(GreedyMemoryPool::new(size))); - Self { config } + let builder = self.builder.clone(); + let builder = builder.with_memory_pool(Arc::new(GreedyMemoryPool::new(size))); + Self { builder } } fn with_temp_file_path(&self, path: &str) -> Self { - let config = self.config.clone(); - let config = config.with_temp_file_path(path); - Self { config } + let builder = self.builder.clone(); + let builder = builder.with_temp_file_path(path); + Self { builder } } } @@ -276,19 +276,19 @@ impl PySessionContext { #[new] pub fn new( config: Option, - runtime: Option, + runtime: Option, ) -> PyResult { let config = if let Some(c) = config { c.config } else { SessionConfig::default().with_information_schema(true) }; - let runtime_config = if let Some(c) = runtime { - c.config + let runtime_env_builder = if let Some(c) = runtime { + c.builder } else { - RuntimeConfig::default() + RuntimeEnvBuilder::default() }; - let runtime = Arc::new(RuntimeEnv::try_new(runtime_config)?); + let runtime = Arc::new(runtime_env_builder.build()?); let session_state = SessionStateBuilder::new() .with_config(config) .with_runtime_env(runtime) diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 2759aa678..9d2559429 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; /// Implements a Datafusion physical ExecutionPlan that delegates to a PyArrow Dataset /// This actually performs the projection, filtering and scanning of a Dataset use pyo3::prelude::*; @@ -34,11 +35,11 @@ use datafusion::error::{DataFusionError as InnerDataFusionError, Result as DFRes use datafusion::execution::context::TaskContext; use datafusion::logical_expr::utils::conjunction; use datafusion::logical_expr::Expr; -use datafusion::physical_expr::{EquivalenceProperties, PhysicalSortExpr}; +use datafusion::physical_expr::{EquivalenceProperties, LexOrdering}; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties, - Partitioning, SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, + SendableRecordBatchStream, Statistics, }; use crate::errors::DataFusionError; @@ -136,7 +137,8 @@ impl DatasetExec { let plan_properties = datafusion::physical_plan::PlanProperties::new( EquivalenceProperties::new(schema.clone()), Partitioning::UnknownPartitioning(fragments.len()), - ExecutionMode::Bounded, + EmissionType::Final, + Boundedness::Bounded, ); Ok(DatasetExec { @@ -251,12 +253,16 @@ impl ExecutionPlanProperties for DatasetExec { self.plan_properties.output_partitioning() } - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { + fn output_ordering(&self) -> Option<&LexOrdering> { None } - fn execution_mode(&self) -> datafusion::physical_plan::ExecutionMode { - self.plan_properties.execution_mode + fn boundedness(&self) -> Boundedness { + self.plan_properties.boundedness + } + + fn pipeline_behavior(&self) -> EmissionType { + self.plan_properties.emission_type } fn equivalence_properties(&self) -> &datafusion::physical_expr::EquivalenceProperties { diff --git a/src/functions.rs b/src/functions.rs index ccc1981bd..ae032d702 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -36,10 +36,7 @@ use datafusion::functions_aggregate; use datafusion::functions_window; use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::sqlparser::ast::NullTreatment as DFNullTreatment; -use datafusion::logical_expr::{ - expr::{find_df_window_func, WindowFunction}, - lit, Expr, WindowFunctionDefinition, -}; +use datafusion::logical_expr::{expr::WindowFunction, lit, Expr, WindowFunctionDefinition}; fn add_builder_fns_to_aggregate( agg_fn: Expr, @@ -232,12 +229,6 @@ fn when(when: PyExpr, then: PyExpr) -> PyResult { /// /// NOTE: we search the built-ins first because the `UDAF` versions currently do not have the same behavior. fn find_window_fn(name: &str, ctx: Option) -> PyResult { - // search built in window functions (soon to be deprecated) - let df_window_func = find_df_window_func(name); - if let Some(df_window_func) = df_window_func { - return Ok(df_window_func); - } - if let Some(ctx) = ctx { // search UDAFs let udaf = ctx diff --git a/src/lib.rs b/src/lib.rs index 0b57e0999..1111d5d06 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,7 +78,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 40f0a6a65..a541889c7 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -85,12 +85,10 @@ impl PyLogicalPlan { | LogicalPlan::Union(_) | LogicalPlan::Statement(_) | LogicalPlan::Values(_) - | LogicalPlan::Prepare(_) | LogicalPlan::Dml(_) | LogicalPlan::Ddl(_) | LogicalPlan::Copy(_) | LogicalPlan::DescribeTable(_) - | LogicalPlan::Execute(_) | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( "Conversion of variant not implemented: {:?}", self.plan diff --git a/src/substrait.rs b/src/substrait.rs index f89b6b093..16e8c9507 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -114,7 +114,8 @@ impl PySubstraitProducer { /// Convert DataFusion LogicalPlan to Substrait Plan #[staticmethod] pub fn to_substrait_plan(plan: PyLogicalPlan, ctx: &PySessionContext) -> PyResult { - match producer::to_substrait_plan(&plan.plan, &ctx.ctx) { + let session_state = ctx.ctx.state(); + match producer::to_substrait_plan(&plan.plan, &session_state) { Ok(plan) => Ok(PyPlan { plan: *plan }), Err(e) => Err(py_datafusion_err(e)), } @@ -134,7 +135,8 @@ impl PySubstraitConsumer { plan: PyPlan, py: Python, ) -> PyResult { - let result = consumer::from_substrait_plan(&ctx.ctx, &plan.plan); + let session_state = ctx.ctx.state(); + let result = consumer::from_substrait_plan(&session_state, &plan.plan); let logical_plan = wait_for_future(py, result).map_err(DataFusionError::from)?; Ok(PyLogicalPlan::new(logical_plan)) } diff --git a/src/udwf.rs b/src/udwf.rs index 3f5ad0b1d..689eb79e3 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -22,9 +22,7 @@ use std::sync::Arc; use arrow::array::{make_array, Array, ArrayData, ArrayRef}; use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; use datafusion::logical_expr::window_state::WindowAggState; -use datafusion::physical_plan::PhysicalExpr; use datafusion::scalar::ScalarValue; -use datafusion_functions_window_common::expr::ExpressionArgs; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -319,8 +317,4 @@ impl WindowUDFImpl for MultiColumnWindowUDF { let _ = _partition_evaluator_args; (self.partition_evaluator_factory)() } - - fn expressions(&self, expr_args: ExpressionArgs) -> Vec> { - expr_args.input_exprs().into() - } } From 2d8b1d32f4941b2e02a29e9135025a32ba6ae471 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 11 Jan 2025 10:12:04 +0800 Subject: [PATCH 035/248] Default to ZSTD compression when writing Parquet (#981) * fix: update default compression to ZSTD and improve documentation for write_parquet method * fix: clarify compression level documentation for ZSTD in write_parquet method * fix: update default compression level for ZSTD to 4 in write_parquet method * fix: improve docstring formatting for DataFrame parquet writing method * feat: implement Compression enum and update write_parquet method to use it * add test * fix: remove unused import and update default compression to ZSTD in rs' write_parquet method * fix: update compression type strings to lowercase in DataFrame parquet writing method doc * test: update parquet compression tests to validate invalid and default compression levels * add comment on source of Compression * docs: enhance Compression enum documentation and add default level method * test: include gzip in default compression level tests for write_parquet * refactor: simplify Compression enum methods and improve type handling in DataFrame.write_parquet * docs: update Compression enum methods to include return type descriptions * move comment to within test * Ruff format --------- Co-authored-by: Tim Saucer --- python/datafusion/dataframe.py | 94 +++++++++++++++++++++++++++++++--- python/tests/test_dataframe.py | 14 ++++- src/dataframe.rs | 2 +- 3 files changed, 101 insertions(+), 9 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 0b38db924..f8aef0c91 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -21,7 +21,16 @@ from __future__ import annotations import warnings -from typing import Any, Iterable, List, TYPE_CHECKING, Literal, overload +from typing import ( + Any, + Iterable, + List, + TYPE_CHECKING, + Literal, + overload, + Optional, + Union, +) from datafusion.record_batch import RecordBatchStream from typing_extensions import deprecated from datafusion.plan import LogicalPlan, ExecutionPlan @@ -35,6 +44,60 @@ from datafusion._internal import DataFrame as DataFrameInternal from datafusion.expr import Expr, SortExpr, sort_or_default +from enum import Enum + + +# excerpt from deltalake +# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 +class Compression(Enum): + """Enum representing the available compression types for Parquet files.""" + + UNCOMPRESSED = "uncompressed" + SNAPPY = "snappy" + GZIP = "gzip" + BROTLI = "brotli" + LZ4 = "lz4" + LZ0 = "lz0" + ZSTD = "zstd" + LZ4_RAW = "lz4_raw" + + @classmethod + def from_str(cls, value: str) -> "Compression": + """Convert a string to a Compression enum value. + + Args: + value: The string representation of the compression type. + + Returns: + The Compression enum lowercase value. + + Raises: + ValueError: If the string does not match any Compression enum value. + """ + try: + return cls(value.lower()) + except ValueError: + raise ValueError( + f"{value} is not a valid Compression. Valid values are: {[item.value for item in Compression]}" + ) + + def get_default_level(self) -> Optional[int]: + """Get the default compression level for the compression type. + + Returns: + The default compression level for the compression type. + """ + # GZIP, BROTLI default values from deltalake repo + # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 + # ZSTD default value from delta-rs + # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223 + if self == Compression.GZIP: + return 6 + elif self == Compression.BROTLI: + return 1 + elif self == Compression.ZSTD: + return 4 + return None class DataFrame: @@ -620,17 +683,36 @@ def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None def write_parquet( self, path: str | pathlib.Path, - compression: str = "uncompressed", + compression: Union[str, Compression] = Compression.ZSTD, compression_level: int | None = None, ) -> None: """Execute the :py:class:`DataFrame` and write the results to a Parquet file. Args: path: Path of the Parquet file to write. - compression: Compression type to use. - compression_level: Compression level to use. - """ - self.df.write_parquet(str(path), compression, compression_level) + compression: Compression type to use. Default is "ZSTD". + Available compression types are: + - "uncompressed": No compression. + - "snappy": Snappy compression. + - "gzip": Gzip compression. + - "brotli": Brotli compression. + - "lz0": LZ0 compression. + - "lz4": LZ4 compression. + - "lz4_raw": LZ4_RAW compression. + - "zstd": Zstandard compression. + compression_level: Compression level to use. For ZSTD, the + recommended range is 1 to 22, with the default being 4. Higher levels + provide better compression but slower speed. + """ + # Convert string to Compression enum if necessary + if isinstance(compression, str): + compression = Compression.from_str(compression) + + if compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}: + if compression_level is None: + compression_level = compression.get_default_level() + + self.df.write_parquet(str(path), compression.value, compression_level) def write_json(self, path: str | pathlib.Path) -> None: """Execute the :py:class:`DataFrame` and write the results to a JSON file. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index e3bd1b2a5..fa5f4e8c5 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -1107,14 +1107,24 @@ def test_write_compressed_parquet_wrong_compression_level( ) -@pytest.mark.parametrize("compression", ["brotli", "zstd", "wrong"]) -def test_write_compressed_parquet_missing_compression_level(df, tmp_path, compression): +@pytest.mark.parametrize("compression", ["wrong"]) +def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression): path = tmp_path with pytest.raises(ValueError): df.write_parquet(str(path), compression=compression) +@pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"]) +def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression): + # Test write_parquet with zstd, brotli, gzip default compression level, + # ie don't specify compression level + # should complete without error + path = tmp_path + + df.write_parquet(str(path), compression=compression) + + def test_dataframe_export(df) -> None: # Guarantees that we have the canonical implementation # reading our dataframe export diff --git a/src/dataframe.rs b/src/dataframe.rs index fcb46a756..71a6fe60f 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -463,7 +463,7 @@ impl PyDataFrame { /// Write a `DataFrame` to a Parquet file. #[pyo3(signature = ( path, - compression="uncompressed", + compression="zstd", compression_level=None ))] fn write_parquet( From 39fec53ca1182049700806a423f60d44f2a9676d Mon Sep 17 00:00:00 2001 From: kosiew Date: Tue, 14 Jan 2025 20:01:10 +0800 Subject: [PATCH 036/248] fix: correct LZ0 to LZO in compression options (#995) * fix: correct LZ0 to LZO in compression options * fix: disable LZO compression option and update tests to reflect its unavailability * fix: ruff format expected string in test_execution_plan * fix: update test for execution plan and add validation for invalid LZO compression * fix: remove LZO compression option and related test cases * ruff autoformat * fix: remove TODO comment regarding LZO compression implementation --- python/datafusion/dataframe.py | 6 ++++-- python/tests/test_dataframe.py | 2 ++ python/tests/test_functions.py | 18 +++++++++--------- src/dataframe.rs | 2 +- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index f8aef0c91..b0c1abdad 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -57,7 +57,9 @@ class Compression(Enum): GZIP = "gzip" BROTLI = "brotli" LZ4 = "lz4" - LZ0 = "lz0" + # lzo is not implemented yet + # https://github.com/apache/arrow-rs/issues/6970 + # LZO = "lzo" ZSTD = "zstd" LZ4_RAW = "lz4_raw" @@ -696,10 +698,10 @@ def write_parquet( - "snappy": Snappy compression. - "gzip": Gzip compression. - "brotli": Brotli compression. - - "lz0": LZ0 compression. - "lz4": LZ4 compression. - "lz4_raw": LZ4_RAW compression. - "zstd": Zstandard compression. + Note: LZO is not yet implemented in arrow-rs and is therefore excluded. compression_level: Compression level to use. For ZSTD, the recommended range is 1 to 22, with the default being 4. Higher levels provide better compression but slower speed. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index fa5f4e8c5..a1a871e9a 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -1115,6 +1115,8 @@ def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression) df.write_parquet(str(path), compression=compression) +# not testing lzo because it it not implemented yet +# https://github.com/apache/arrow-rs/issues/6970 @pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"]) def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression): # Test write_parquet with zstd, brotli, gzip default compression level, diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 01c6c9cef..add170c17 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -790,9 +790,9 @@ def test_hash_functions(df): ) assert result.column(2) == pa.array( [ - b("185F8DB32271FE25F561A6FC938B2E26" "4306EC304EDA518007D1764826381969"), - b("78AE647DC5544D227130A0682A51E30B" "C7777FBB6D8A8F17007463A3ECD1D524"), - b("BB7208BC9B5D7C04F1236A82A0093A5E" "33F40423D5BA8D4266F7092C3BA43B62"), + b("185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"), + b("78AE647DC5544D227130A0682A51E30BC7777FBB6D8A8F17007463A3ECD1D524"), + b("BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"), ] ) assert result.column(3) == pa.array( @@ -838,16 +838,16 @@ def test_hash_functions(df): ) assert result.column(5) == pa.array( [ - b("F73A5FBF881F89B814871F46E26AD3FA" "37CB2921C5E8561618639015B3CCBB71"), - b("B792A0383FB9E7A189EC150686579532" "854E44B71AC394831DAED169BA85CCC5"), - b("27988A0E51812297C77A433F63523334" "6AEE29A829DCF4F46E0F58F402C6CFCB"), + b("F73A5FBF881F89B814871F46E26AD3FA37CB2921C5E8561618639015B3CCBB71"), + b("B792A0383FB9E7A189EC150686579532854E44B71AC394831DAED169BA85CCC5"), + b("27988A0E51812297C77A433F635233346AEE29A829DCF4F46E0F58F402C6CFCB"), ] ) assert result.column(6) == pa.array( [ - b("FBC2B0516EE8744D293B980779178A35" "08850FDCFE965985782C39601B65794F"), - b("BF73D18575A736E4037D45F9E316085B" "86C19BE6363DE6AA789E13DEAACC1C4E"), - b("C8D11B9F7237E4034ADBCD2005735F9B" "C4C597C75AD89F4492BEC8F77D15F7EB"), + b("FBC2B0516EE8744D293B980779178A3508850FDCFE965985782C39601B65794F"), + b("BF73D18575A736E4037D45F9E316085B86C19BE6363DE6AA789E13DEAACC1C4E"), + b("C8D11B9F7237E4034ADBCD2005735F9BC4C597C75AD89F4492BEC8F77D15F7EB"), ] ) assert result.column(7) == result.column(1) # SHA-224 diff --git a/src/dataframe.rs b/src/dataframe.rs index 71a6fe60f..b875480a7 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -491,7 +491,7 @@ impl PyDataFrame { ZstdLevel::try_new(verify_compression_level(compression_level)? as i32) .map_err(|e| PyValueError::new_err(format!("{e}")))?, ), - "lz0" => Compression::LZO, + "lzo" => Compression::LZO, "lz4" => Compression::LZ4, "lz4_raw" => Compression::LZ4_RAW, "uncompressed" => Compression::UNCOMPRESSED, From 31fee392d41f723179678ee7fdac2719ef1d40d1 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 19 Jan 2025 07:51:14 -0500 Subject: [PATCH 037/248] Feat/use uv python management (#994) * Remove requirements files and add dependencies into pyproject.toml instead * Remove old conda files since we will use uv as our primary method for developers to set up environments * Working through CI changes to use uv instead of pip and conda * Add uv lock to exclude files * Revert "Remove old conda files since we will use uv as our primary method for developers to set up environments" This reverts commit 88aff7e39334cde8101ef6d313c6bb2bd1f3981c. * Windows workflows don't use source command * Add in extra include for ignoreing rat * Use uv commands in CI * Remove conda recipes and CI stages * Working on CI using uv * Install doc requirements * Remove caching uv * Set uv venv * Add requirements for building * Revert github action to allowed one * Call uv sync with verbose mode so users can see the build occuring in CI * Test setting specific hash on action * Test setting rust-toolchain github action with pinned version * Testing night rust toolchain against apache rejection criteria * Github action is fickle with the pattern matching * Switch all Ci to use nightly rust toolchain until infra team whitelists the stable toolchain * Speed up CI by preventing build during uv sync * Additional uv commands missing no-project option * Setting python versions of dependencies to match lowest supported python version, 3.8 * Update maturin and move to deps for dev * CI ordering was wrong and maturin needed uv option * Switch to stable toolchain * uv requires two dashes * Submodule init * change directories for unit tests * Add deps for build * Maturin build doesn't take uv as parameter * Update documentation for setting up with uv * Enable cache in CI * Update documentation to use uv * Small adjustment to CI config --- .github/workflows/build.yml | 62 +- .github/workflows/conda.yml | 107 - .github/workflows/docs.yaml | 23 +- .github/workflows/test.yaml | 59 +- .pre-commit-config.yaml | 4 +- README.md | 68 +- conda/environments/datafusion-cuda-dev.yaml | 44 - conda/environments/datafusion-dev.yaml | 41 - conda/recipes/bld.bat | 26 - conda/recipes/build.sh | 84 - conda/recipes/meta.yaml | 75 - dev/python_lint.sh | 2 +- dev/release/README.md | 27 +- dev/release/rat_exclude_files.txt | 4 +- dev/release/verify-release-candidate.sh | 6 +- docs/README.md | 32 +- docs/build.sh | 6 + docs/mdbook/src/installation.md | 53 +- docs/requirements.txt | 26 - .../source/contributor-guide/introduction.rst | 42 +- pyproject.toml | 25 +- requirements-310.txt | 195 -- requirements-311.txt | 175 -- requirements-312.txt | 184 -- requirements.in | 26 - uv.lock | 1842 +++++++++++++++++ 26 files changed, 2031 insertions(+), 1207 deletions(-) delete mode 100644 .github/workflows/conda.yml delete mode 100644 conda/environments/datafusion-cuda-dev.yaml delete mode 100644 conda/environments/datafusion-dev.yaml delete mode 100644 conda/recipes/bld.bat delete mode 100644 conda/recipes/build.sh delete mode 100644 conda/recipes/meta.yaml delete mode 100644 docs/requirements.txt delete mode 100644 requirements-310.txt delete mode 100644 requirements-311.txt delete mode 100644 requirements-312.txt delete mode 100644 requirements.in create mode 100644 uv.lock diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 084a96192..acabad3ca 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,28 +31,33 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" + + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + # Use the --no-install-package to only install the dependencies + # but do not yet build the rust library - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install ruff + run: uv sync --dev --no-install-package datafusion + # Update output format to enable automatic inline annotations. - name: Run Ruff run: | - ruff check --output-format=github python/ - ruff format --check python/ + uv run --no-project ruff check --output-format=github python/ + uv run --no-project ruff format --check python/ generate-license: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: astral-sh/setup-uv@v5 with: - profile: minimal - toolchain: stable - override: true + enable-cache: true + - name: Generate license file - run: python ./dev/create_license.py + run: uv run --no-project python ./dev/create_license.py - uses: actions/upload-artifact@v4 with: name: python-wheel-license @@ -74,15 +79,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Upgrade pip - run: python -m pip install --upgrade pip - - - name: Install maturin - run: pip install maturin==1.5.1 + - uses: dtolnay/rust-toolchain@stable - run: rm LICENSE.txt - name: Download LICENSE.txt @@ -97,8 +94,14 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + - name: Build Python package - run: maturin build --release --strip --features substrait + run: | + uv sync --dev --no-install-package datafusion + uv run --no-project maturin build --release --strip --features substrait - name: List Windows wheels if: matrix.os == 'windows-latest' @@ -132,15 +135,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Upgrade pip - run: python -m pip install --upgrade pip - - - name: Install maturin - run: pip install maturin==1.5.1 + - uses: dtolnay/rust-toolchain@stable - run: rm LICENSE.txt - name: Download LICENSE.txt @@ -155,9 +150,14 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + - name: Build Python package run: | - maturin build --release --strip --features substrait + uv sync --dev --no-install-package datafusion + uv run --no-project maturin build --release --strip --features substrait - name: List Mac wheels run: find target/wheels/ diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml deleted file mode 100644 index c2b8fab02..000000000 --- a/.github/workflows/conda.yml +++ /dev/null @@ -1,107 +0,0 @@ -name: Build conda nightly -on: - push: - branches: - - main - pull_request: - paths: - - Cargo.toml - - Cargo.lock - - pyproject.toml - - conda/recipes/** - - .github/workflows/conda.yml - schedule: - - cron: '0 0 * * 0' - -# When this workflow is queued, automatically cancel any previous running -# or pending jobs from the same branch -concurrency: - group: conda-${{ github.head_ref }} - cancel-in-progress: true - -# Required shell entrypoint to have properly activated conda environments -defaults: - run: - shell: bash -l {0} - -jobs: - conda: - name: "Build conda nightlies (python: ${{ matrix.python }}, arch: ${{ matrix.arch }})" - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python: ["3.8", "3.9", "3.10", "3.11"] - arch: ["linux-64", "linux-aarch64"] - steps: - - name: Manage disk space - if: matrix.arch == 'linux-aarch64' - run: | - sudo mkdir -p /opt/empty_dir || true - for d in \ - /opt/ghc \ - /opt/hostedtoolcache \ - /usr/lib/jvm \ - /usr/local/.ghcup \ - /usr/local/lib/android \ - /usr/local/share/powershell \ - /usr/share/dotnet \ - /usr/share/swift \ - ; do - sudo rsync --stats -a --delete /opt/empty_dir/ $d || true - done - sudo apt-get purge -y -f firefox \ - google-chrome-stable \ - microsoft-edge-stable - sudo apt-get autoremove -y >& /dev/null - sudo apt-get autoclean -y >& /dev/null - sudo docker image prune --all --force - df -h - - name: Create swapfile - if: matrix.arch == 'linux-aarch64' - run: | - sudo fallocate -l 10GiB /swapfile || true - sudo chmod 600 /swapfile || true - sudo mkswap /swapfile || true - sudo swapon /swapfile || true - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Set up Python - uses: conda-incubator/setup-miniconda@v3.0.4 - with: - miniforge-variant: Miniforge3 - python-version: "3.8" - channel-priority: strict - - name: Install dependencies - run: | - conda install -c conda-forge conda-build conda-verify - - which python - pip list - conda list - # Clean the conda cache - - name: Clean Conda Cache - run: conda clean --all --yes - - name: Build conda packages - run: | - # suffix for nightly package versions - export VERSION_SUFFIX=a`date +%y%m%d` - - conda build conda/recipes \ - --python ${{ matrix.python }} \ - --variants "{target_platform: [${{ matrix.arch }}]}" \ - --error-overlinking \ - --no-test \ - --no-anaconda-upload \ - --output-folder packages - - name: Test conda packages - if: matrix.arch == 'linux-64' # can only test native platform packages - run: | - conda build --test packages/${{ matrix.arch }}/*.tar.bz2 - - name: Upload conda packages as artifacts - uses: actions/upload-artifact@v4 - with: - name: "conda nightlies (python - ${{ matrix.python }}, arch - ${{ matrix.arch }})" - # need to install all conda channel metadata to properly install locally - path: packages/ diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 86288e2d8..9037e0a5c 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -57,27 +57,24 @@ jobs: version: '27.4' repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install dependencies - run: | - set -x - python3 -m venv venv - source venv/bin/activate - pip install -r requirements-311.txt - pip install -r docs/requirements.txt - - name: Build Datafusion + - name: Install dependencies and build + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Build repo run: | - set -x - source venv/bin/activate - maturin develop + uv venv + uv sync --dev --no-install-package datafusion --group docs + uv run --no-project maturin develop --uv - name: Build docs run: | set -x - source venv/bin/activate cd docs curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet - make html + uv run --no-project make html - name: Copy & push the generated HTML if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag') diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 21faedecd..c93d4c06f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -43,11 +43,10 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@stable id: rust-toolchain with: - toolchain: ${{ matrix.toolchain }} - override: true + components: clippy,rustfmt - name: Install Protoc uses: arduino/setup-protoc@v3 @@ -64,60 +63,35 @@ jobs: uses: actions/cache@v4 with: path: ~/.cargo - key: cargo-cache-${{ steps.rust-toolchain.outputs.rustc_hash }}-${{ hashFiles('Cargo.lock') }} + key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }} - name: Check Formatting - uses: actions-rs/cargo@v1 if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - with: - command: fmt - args: -- --check + run: cargo fmt -- --check - name: Run Clippy - uses: actions-rs/cargo@v1 if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - with: - command: clippy - args: --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure - - - name: Create Virtualenv (3.12) - if: ${{ matrix.python-version == '3.12' }} - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements-312.txt + run: cargo clippy --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure - - name: Create Virtualenv (3.10) - if: ${{ matrix.python-version == '3.10' }} - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements-310.txt - - - name: Create Virtualenv (3.11) - if: ${{ matrix.python-version == '3.11' }} - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements-311.txt + - name: Install dependencies and build + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true - name: Run tests env: RUST_BACKTRACE: 1 run: | git submodule update --init - source venv/bin/activate - pip install -e . -vv - pytest -v . + uv sync --dev --no-install-package datafusion + uv run --no-project maturin develop --uv + uv run --no-project pytest -v . - name: FFI unit tests run: | - source venv/bin/activate - pip install -e . -vv - pip install maturin==1.5.1 cd examples/ffi-table-provider - maturin develop --release --strip - pytest python/tests/_test_table_provider.py + uv run --no-project maturin develop --uv + uv run --no-project pytest python/tests/_test_table_provider.py - name: Cache the generated dataset id: cache-tpch-dataset @@ -134,7 +108,6 @@ jobs: - name: Run TPC-H examples run: | - source venv/bin/activate cd examples/tpch - python convert_data_to_parquet.py - pytest _tests.py + uv run --no-project python convert_data_to_parquet.py + uv run --no-project pytest _tests.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8509fae2c..e20fedf5c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,9 +17,9 @@ repos: - repo: https://github.com/rhysd/actionlint - rev: v1.6.23 + rev: v1.7.6 hooks: - - id: actionlint-docker + - id: actionlint-docker - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.3.0 diff --git a/README.md b/README.md index ca612c1ab..5aaf7f5f3 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,13 @@ See [examples](examples/README.md) for more information. - [Serialize query plans using Substrait](https://github.com/apache/datafusion-python/blob/main/examples/substrait.py) -## How to install (from pip) +## How to install + +### uv + +```bash +uv add datafusion +``` ### Pip @@ -164,61 +170,69 @@ You can verify the installation by running: ## How to develop -This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). +This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). The Maturin tools used in this workflow can be installed either via `uv` or `pip`. Both approaches should offer the same experience. It is recommended to use `uv` since it has significant performance improvements +over `pip`. -The Maturin tools used in this workflow can be installed either via Conda or Pip. Both approaches should offer the same experience. Multiple approaches are only offered to appease developer preference. Bootstrapping for both Conda and Pip are as follows. +Bootstrap (`uv`): -Bootstrap (Conda): +By default `uv` will attempt to build the datafusion python package. For our development we prefer to build manually. This means +that when creating your virtual environment using `uv sync` you need to pass in the additional `--no-install-package datafusion` +and for `uv run` commands the additional parameter `--no-project` ```bash # fetch this repo git clone git@github.com:apache/datafusion-python.git -# create the conda environment for dev -conda env create -f ./conda/environments/datafusion-dev.yaml -n datafusion-dev -# activate the conda environment -conda activate datafusion-dev +# create the virtual enviornment +uv sync --dev --no-install-package datafusion +# activate the environment +source .venv/bin/activate ``` -Or alternatively, if you are on an OS that supports CUDA Toolkit, you can use `-f ./conda/environments/datafusion-cuda-dev.yaml`. - -Bootstrap (Pip): +Bootstrap (`pip`): ```bash # fetch this repo git clone git@github.com:apache/datafusion-python.git # prepare development environment (used to build wheel / install in development) -python3 -m venv venv +python3 -m venv .venv # activate the venv -source venv/bin/activate +source .venv/bin/activate # update pip itself if necessary python -m pip install -U pip -# install dependencies (for Python 3.8+) -python -m pip install -r requirements.in +# install dependencies +python -m pip install -r pyproject.toml ``` The tests rely on test data in git submodules. ```bash -git submodule init -git submodule update +git submodule update --init ``` Whenever rust code changes (your changes or via `git pull`): ```bash # make sure you activate the venv using "source venv/bin/activate" first -maturin develop +maturin develop --uv python -m pytest ``` +Alternatively if you are using `uv` you can do the following without +needing to activate the virtual environment: + +```bash +uv run --no-project maturin develop --uv +uv --no-project pytest . +``` + ### Running & Installing pre-commit hooks -arrow-datafusion-python takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce +`datafusion-python` takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keeping PRs clean and concise. Our pre-commit hooks can be installed by running `pre-commit install`, which will install the configurations in -your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete +your DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete the commit if an offending lint is found allowing you to make changes locally before pushing. The pre-commit hooks can also be run adhoc without installing them by simply running `pre-commit run --all-files` @@ -236,18 +250,8 @@ There are scripts in `ci/scripts` for running Rust and Python linters. ## How to update dependencies -To change test dependencies, change the `requirements.in` and run +To change test dependencies, change the `pyproject.toml` and run ```bash -# install pip-tools (this can be done only once), also consider running in venv -python -m pip install pip-tools -python -m piptools compile --generate-hashes -o requirements-310.txt +uv sync --dev --no-install-package datafusion ``` - -To update dependencies, run with `-U` - -```bash -python -m piptools compile -U --generate-hashes -o requirements-310.txt -``` - -More details [here](https://github.com/jazzband/pip-tools) diff --git a/conda/environments/datafusion-cuda-dev.yaml b/conda/environments/datafusion-cuda-dev.yaml deleted file mode 100644 index 1f6f23942..000000000 --- a/conda/environments/datafusion-cuda-dev.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -channels: - - conda-forge -dependencies: - - black - - flake8 - - isort - - maturin>=1.5.1 - - mypy - - numpy - - pyarrow>=11.0.0 - - pytest - - toml - - importlib_metadata - - python>=3.10 - # Packages useful for building distributions and releasing - - mamba - - conda-build - - anaconda-client - # Packages for documentation building - - sphinx - - pydata-sphinx-theme==0.8.0 - - myst-parser - - jinja2 - # GPU packages - - cudf - - cudatoolkit=11.8 -name: datafusion-dev diff --git a/conda/environments/datafusion-dev.yaml b/conda/environments/datafusion-dev.yaml deleted file mode 100644 index b4b503dc6..000000000 --- a/conda/environments/datafusion-dev.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -channels: - - conda-forge -dependencies: - - black - - flake8 - - isort - - maturin>=1.5.1 - - mypy - - numpy - - pyarrow>=11.0.0 - - pytest - - toml - - importlib_metadata - - python>=3.10 - # Packages useful for building distributions and releasing - - mamba - - conda-build - - anaconda-client - # Packages for documentation building - - sphinx - - pydata-sphinx-theme==0.8.0 - - myst-parser - - jinja2 -name: datafusion-dev diff --git a/conda/recipes/bld.bat b/conda/recipes/bld.bat deleted file mode 100644 index 90626a637..000000000 --- a/conda/recipes/bld.bat +++ /dev/null @@ -1,26 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -maturin build -vv -j %CPU_COUNT% --release --strip --features substrait --manylinux off --interpreter=%PYTHON% - -FOR /F "delims=" %%i IN ('dir /s /b target\wheels\*.whl') DO set datafusion_wheel=%%i - -%PYTHON% -m pip install --no-deps %datafusion_wheel% -vv - -cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/build.sh b/conda/recipes/build.sh deleted file mode 100644 index 259894313..000000000 --- a/conda/recipes/build.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -set -ex - -# See https://github.com/conda-forge/rust-feedstock/blob/master/recipe/build.sh for cc env explanation -if [ "$c_compiler" = gcc ] ; then - case "$target_platform" in - linux-64) rust_env_arch=X86_64_UNKNOWN_LINUX_GNU ;; - linux-aarch64) rust_env_arch=AARCH64_UNKNOWN_LINUX_GNU ;; - linux-ppc64le) rust_env_arch=POWERPC64LE_UNKNOWN_LINUX_GNU ;; - *) echo "unknown target_platform $target_platform" ; exit 1 ;; - esac - - export CARGO_TARGET_${rust_env_arch}_LINKER=$CC -fi - -declare -a _xtra_maturin_args - -mkdir -p $SRC_DIR/.cargo - -if [ "$target_platform" = "osx-64" ] ; then - cat <> $SRC_DIR/.cargo/config -[target.x86_64-apple-darwin] -linker = "$CC" -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - -EOF - - _xtra_maturin_args+=(--target=x86_64-apple-darwin) - -elif [ "$target_platform" = "osx-arm64" ] ; then - cat <> $SRC_DIR/.cargo/config -# Required for intermediate codegen stuff -[target.x86_64-apple-darwin] -linker = "$CC_FOR_BUILD" - -# Required for final binary artifacts for target -[target.aarch64-apple-darwin] -linker = "$CC" -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - -EOF - _xtra_maturin_args+=(--target=aarch64-apple-darwin) - - # This variable must be set to the directory containing the target's libpython DSO - export PYO3_CROSS_LIB_DIR=$PREFIX/lib - - # xref: https://github.com/PyO3/pyo3/commit/7beb2720 - export PYO3_PYTHON_VERSION=${PY_VER} - - # xref: https://github.com/conda-forge/python-feedstock/issues/621 - sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/os-patch.py - sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/platform-patch.py -fi - -maturin build -vv -j "${CPU_COUNT}" --release --strip --features substrait --manylinux off --interpreter="${PYTHON}" "${_xtra_maturin_args[@]}" - -"${PYTHON}" -m pip install $SRC_DIR/target/wheels/datafusion*.whl --no-deps -vv - -cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/meta.yaml b/conda/recipes/meta.yaml deleted file mode 100644 index b0784253a..000000000 --- a/conda/recipes/meta.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -{% set name = "datafusion" %} -{% set major_minor_patch = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').split('.') %} -{% set new_patch = major_minor_patch[2] | int + 1 %} -{% set version = (major_minor_patch[:2] + [new_patch]) | join('.') + environ.get('VERSION_SUFFIX', '') %} - - -package: - name: {{ name|lower }} - version: {{ version }} - -source: - git_url: ../.. - -build: - number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ python | replace(".", "") }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} - -requirements: - build: - - python # [build_platform != target_platform] - - cross-python_{{ target_platform }} # [build_platform != target_platform] - - zlib # [build_platform != target_platform] - - {{ compiler('c') }} - - {{ compiler('rust') }} - - cargo-bundle-licenses - - maturin >=1.5.1,<1.6.0 - - libprotobuf =3 - host: - - python - - maturin >=1.5.1,<1.6.0 - - pip - - zlib - - xz # [linux64] - run: - - python - - pyarrow >=11.0.0 - - typing_extensions - -test: - imports: - - datafusion - commands: - - pip check - requires: - - pip - -about: - home: https://arrow.apache.org/datafusion - license: Apache-2.0 - license_family: APACHE - license_file: - - LICENSE.txt - - THIRDPARTY.yml - description: | - DataFusion is an extensible query execution framework, written in Rust, - that uses Apache Arrow as its in-memory format. - doc_url: https://arrow.apache.org/datafusion - dev_url: https://github.com/apache/arrow-datafusion diff --git a/dev/python_lint.sh b/dev/python_lint.sh index 29f0d4833..2d867f29d 100755 --- a/dev/python_lint.sh +++ b/dev/python_lint.sh @@ -21,6 +21,6 @@ # DataFusion CI does set -e -source venv/bin/activate +source .venv/bin/activate flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503 black --line-length 79 . diff --git a/dev/release/README.md b/dev/release/README.md index b2c015e1d..f0b333999 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -172,8 +172,8 @@ git checkout 40.0.0-rc1 git submodule update --init --recursive # create the env -python3 -m venv venv -source venv/bin/activate +python3 -m venv .venv +source .venv/bin/activate # install release candidate pip install --extra-index-url https://test.pypi.org/simple/ datafusion==40.0.0 @@ -218,28 +218,9 @@ uploading them using `twine`: twine upload --repository pypi dist-release/* ``` -### Publish Python Artifacts to Anaconda +### Publish Python Artifacts to conda-forge -Publishing artifacts to Anaconda is similar to PyPi. First, Download the source tarball created in the previous step and untar it. - -```bash -# Assuming you have an existing conda environment named `datafusion-dev` if not see root README for instructions -conda activate datafusion-dev -conda build . -``` - -This will setup a virtual conda environment and build the artifacts inside of that virtual env. This step can take a few minutes as the entire build, host, and runtime environments are setup. Once complete a local filesystem path will be emitted for the location of the resulting package. Observe that path and copy to your clipboard. - -Ex: `/home/conda/envs/datafusion/conda-bld/linux-64/datafusion-0.7.0.tar.bz2` - -Now you are ready to publish this resulting package to anaconda.org. This can be accomplished in a few simple steps. - -```bash -# First login to Anaconda with the datafusion credentials -anaconda login -# Upload the package -anaconda upload /home/conda/envs/datafusion/conda-bld/linux-64/datafusion-0.7.0.tar.bz2 -``` +Pypi packages auto upload to conda-forge via [datafusion feedstock](https://github.com/conda-forge/datafusion-feedstock) ### Push the Release Tag diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index f65ddd06e..dcd5d9aac 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -45,4 +45,6 @@ Cargo.lock .github/* benchmarks/tpch/queries/q*.sql benchmarks/tpch/create_tables.sql -.cargo/config.toml \ No newline at end of file +.cargo/config.toml +**/.cargo/config.toml +uv.lock \ No newline at end of file diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 3879a267f..1a9104b55 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -106,7 +106,7 @@ setup_tempdir() { } test_source_distribution() { - # install rust toolchain in a similar fashion like test-miniconda + # install rust toolchain export RUSTUP_HOME=$PWD/test-rustup export CARGO_HOME=$PWD/test-rustup @@ -125,8 +125,8 @@ test_source_distribution() { git clone https://github.com/apache/arrow-testing.git testing git clone https://github.com/apache/parquet-testing.git parquet-testing - python3 -m venv venv - source venv/bin/activate + python3 -m venv .venv + source .venv/bin/activate python3 -m pip install -U pip python3 -m pip install -r requirements-310.txt maturin develop diff --git a/docs/README.md b/docs/README.md index b4b94120e..2bffea9bd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -26,42 +26,32 @@ when changes are merged to the main branch. ## Dependencies It's recommended to install build dependencies and build the documentation -inside a Python `venv`. +inside a Python `venv` using `uv`. To prepare building the documentation run the following on the root level of the project: -1. Set up virtual environment if it was not already created - ```bash - python3 -m venv venv - ``` -1. Activate virtual environment - ```bash - source venv/bin/activate - ``` -1. Install Datafusion's Python dependencies - ```bash - pip install -r requirements-310.txt - ``` -1. Install documentation dependencies - ```bash - pip install -r docs/requirements.txt - ``` +```bash +# Set up a virtual environment with the documentation dependencies +uv sync --dev --group docs --no-install-package datafusion +``` ## Build & Preview Run the provided script to build the HTML pages. ```bash -cd docs -./build.sh +# Build the repository +uv run --no-project maturin develop --uv +# Build the documentation +uv run --no-project docs/build.sh ``` -The HTML will be generated into a `build` directory. +The HTML will be generated into a `build` directory in `docs`. Preview the site on Linux by running this command. ```bash -firefox build/html/index.html +firefox docs/build/html/index.html ``` ## Release Process diff --git a/docs/build.sh b/docs/build.sh index 31398d195..f73330323 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -20,6 +20,10 @@ set -e +original_dir=$(pwd) +script_dir=$(dirname "$(realpath "$0")") +cd "$script_dir" || exit + if [ ! -f pokemon.csv ]; then curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv fi @@ -33,3 +37,5 @@ rm -rf temp 2> /dev/null mkdir temp cp -rf source/* temp/ make SOURCEDIR=`pwd`/temp html + +cd "$original_dir" || exit diff --git a/docs/mdbook/src/installation.md b/docs/mdbook/src/installation.md index ba00c8b80..b29f3b66b 100644 --- a/docs/mdbook/src/installation.md +++ b/docs/mdbook/src/installation.md @@ -18,44 +18,45 @@ DataFusion is easy to install, just like any other Python library. -## Using pip +## Using uv -``` bash -pip install datafusion -``` +If you do not yet have a virtual environment, create one: -## Conda & JupyterLab setup +```bash +uv venv +``` -This section explains how to install DataFusion in a conda environment with other libraries that allow for a nice Jupyter workflow. This setup is completely optional. These steps are only needed if you'd like to run DataFusion in a Jupyter notebook and have an interface like this: +You can add datafusion to your virtual environment with the usual: -![DataFusion in Jupyter](https://github.com/MrPowers/datafusion-book/raw/main/src/images/datafusion-jupyterlab.png) +```bash +uv pip install datafusion +``` -Create a conda environment with DataFusion, Jupyter, and other useful dependencies in the `datafusion-env.yml` file: +Or, to add to a project: +```bash +uv add datafusion ``` -name: datafusion-env -channels: - - conda-forge - - defaults -dependencies: - - python=3.9 - - ipykernel - - nb_conda - - jupyterlab - - jupyterlab_code_formatter - - isort - - black - - pip - - pip: - - datafusion +## Using pip + +``` bash +pip install datafusion ``` -Create the environment with `conda env create -f datafusion-env.yml`. +## uv & JupyterLab setup -Activate the environment with `conda activate datafusion-env`. +This section explains how to install DataFusion in a uv environment with other libraries that allow for a nice Jupyter workflow. This setup is completely optional. These steps are only needed if you'd like to run DataFusion in a Jupyter notebook and have an interface like this: -Run `jupyter lab` or open the [JupyterLab Desktop application](https://github.com/jupyterlab/jupyterlab-desktop) to start running DataFusion in a Jupyter notebook. +![DataFusion in Jupyter](https://github.com/MrPowers/datafusion-book/raw/main/src/images/datafusion-jupyterlab.png) + +Create a virtual environment with DataFusion, Jupyter, and other useful dependencies and start the desktop application. + +```bash +uv venv +uv pip install datafusion jupyterlab jupyterlab_code_formatter +uv run jupyter lab +``` ## Examples diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index f5cece78e..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -sphinx -pydata-sphinx-theme==0.8.0 -myst-parser -maturin -jinja2 -ipython -pandas -pickleshare -sphinx-autoapi diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index 4457a898f..fb98cfd1d 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -29,22 +29,24 @@ Doing so is a great way to help the community as well as get more familiar with How to develop -------------- -This assumes that you have rust and cargo installed. We use the workflow recommended by `pyo3 `_ and `maturin `_. +This assumes that you have rust and cargo installed. We use the workflow recommended by +`pyo3 `_ and `maturin `_. We recommend using +`uv `_ for python package management. + +By default `uv` will attempt to build the datafusion python package. For our development we prefer to build manually. This means +that when creating your virtual environment using `uv sync` you need to pass in the additional `--no-install-package datafusion` +and for `uv run` commands the additional parameter `--no-project` Bootstrap: .. code-block:: shell # fetch this repo - git clone git@github.com:apache/arrow-datafusion-python.git - # prepare development environment (used to build wheel / install in development) - python3 -m venv venv - # activate the venv - source venv/bin/activate - # update pip itself if necessary - python -m pip install -U pip - # install dependencies (for Python 3.8+) - python -m pip install -r requirements-310.txt + git clone git@github.com:apache/datafusion-python.git + # create the virtual enviornment + uv sync --dev --no-install-package datafusion + # activate the environment + source .venv/bin/activate The tests rely on test data in git submodules. @@ -58,8 +60,8 @@ Whenever rust code changes (your changes or via `git pull`): .. code-block:: shell - # make sure you activate the venv using "source venv/bin/activate" first - maturin develop + # make sure you activate the venv using "source .venv/bin/activate" first + maturin develop -uv python -m pytest Running & Installing pre-commit hooks @@ -86,20 +88,10 @@ Mostly, the ``python`` code is limited to pure wrappers with type hints and good Update Dependencies ------------------- -To change test dependencies, change the `requirements.in` and run - -.. code-block:: shell - - # install pip-tools (this can be done only once), also consider running in venv - python -m pip install pip-tools - python -m piptools compile --generate-hashes -o requirements-310.txt +To change test dependencies, change the ``pyproject.toml`` and run - -To update dependencies, run with `-U` +To update dependencies, run .. code-block:: shell - python -m piptools compile -U --generate-hashes -o requirements-310.txt - - -More details about pip-tools `here `_ + uv sync --dev --no-install-package datafusion diff --git a/pyproject.toml b/pyproject.toml index 98bda5aae..6e8acfe71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["maturin>=1.5.1,<1.6.0"] +requires = ["maturin>=1.8.1"] build-backend = "maturin" [project] @@ -24,7 +24,7 @@ name = "datafusion" description = "Build and run queries against data" readme = "README.md" license = { file = "LICENSE.txt" } -requires-python = ">=3.7" +requires-python = ">=3.8" keywords = ["datafusion", "dataframe", "rust", "query-engine"] classifiers = [ "Development Status :: 2 - Pre-Alpha", @@ -35,7 +35,6 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -82,3 +81,23 @@ max-doc-length = 88 "dev/*" = ["D"] "benchmarks/*" = ["D", "F"] "docs/*" = ["D"] + +[dependency-groups] +dev = [ + "maturin>=1.8.1", + "numpy>1.24.4 ; python_full_version >= '3.10'", + "pytest>=7.4.4", + "ruff>=0.9.1", + "toml>=0.10.2", +] +docs = [ + "sphinx>=7.1.2", + "pydata-sphinx-theme==0.8.0", + "myst-parser>=3.0.1", + "jinja2>=3.1.5", + "ipython>=8.12.3", + "pandas>=2.0.3", + "pickleshare>=0.7.5", + "sphinx-autoapi>=3.4.0", + "setuptools>=75.3.0", +] \ No newline at end of file diff --git a/requirements-310.txt b/requirements-310.txt deleted file mode 100644 index d7d25f3f1..000000000 --- a/requirements-310.txt +++ /dev/null @@ -1,195 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --generate-hashes --output-file=requirements-310.txt -# -exceptiongroup==1.2.1 \ - --hash=sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad \ - --hash=sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16 - # via pytest -iniconfig==2.0.0 \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - # via pytest -maturin==1.6.0 \ - --hash=sha256:16ef860df20028618b5a064da06b02c1c47acba064a4d25aaf84662a459ec599 \ - --hash=sha256:337899784955934dd67b30497d1dd5fab22da89f60bb079dbaf2eaa446b97a10 \ - --hash=sha256:4e931c92037128ade49cd26dd040d9c46ad8092d8170cc44f5c3a0b4a052d576 \ - --hash=sha256:50133965e52d8b5b969381fee3fde111ae2383905cdaba7650f256e08ccddcd4 \ - --hash=sha256:a2a2436628c36d98dabd79b52256df7e12fc4fd1b122984d9373fdf918fd4609 \ - --hash=sha256:aa4eb7dca7d246b466392f21016f67ff09a9aff2305fa714ca25a2344e4639e7 \ - --hash=sha256:b955025c24c8babc808db49e0ff90db8b4b1320dcc16b14eb26132841737230d \ - --hash=sha256:bd85edcb1b8e2bcddc1b7d16ce58ce00a66aa80c422745c8ad9e132ac40d4b48 \ - --hash=sha256:c87d1a7596c42b589099adb831343a56e02373588366e4cede96cbdf8bd68f9d \ - --hash=sha256:d67ca8dc7f3b2314bd3bf83c4de52645e220ee312fd526e53acc6a735f233fad \ - --hash=sha256:d8620970bd0b6a0acb99dbd0b1c2ebb7a69909d25f6023bdff9635a39001aa51 \ - --hash=sha256:d92b045e90ed919a8a2520dda64e3f384e5e746ea51e1498cc6ac3e9e5c76054 \ - --hash=sha256:dbbbf25dc3c207b0a7bd4f3aea1df33d4f22b8508592796a6f36f4d8ed216db0 - # via -r requirements.in -mypy==1.10.0 \ - --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ - --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ - --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ - --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ - --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ - --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ - --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ - --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ - --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ - --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ - --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ - --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ - --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ - --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ - --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ - --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ - --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ - --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ - --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ - --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ - --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ - --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ - --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ - --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ - --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ - --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ - --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 - # via -r requirements.in -mypy-extensions==1.0.0 \ - --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ - --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via mypy -numpy==2.0.0 \ - --hash=sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f \ - --hash=sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238 \ - --hash=sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f \ - --hash=sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95 \ - --hash=sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a \ - --hash=sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a \ - --hash=sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2 \ - --hash=sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2 \ - --hash=sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f \ - --hash=sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609 \ - --hash=sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f \ - --hash=sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad \ - --hash=sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86 \ - --hash=sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65 \ - --hash=sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb \ - --hash=sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995 \ - --hash=sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a \ - --hash=sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85 \ - --hash=sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4 \ - --hash=sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275 \ - --hash=sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1 \ - --hash=sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196 \ - --hash=sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d \ - --hash=sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e \ - --hash=sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514 \ - --hash=sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f \ - --hash=sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6 \ - --hash=sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4 \ - --hash=sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44 \ - --hash=sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df \ - --hash=sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581 \ - --hash=sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787 \ - --hash=sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5 \ - --hash=sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc \ - --hash=sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871 \ - --hash=sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54 \ - --hash=sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2 \ - --hash=sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98 \ - --hash=sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9 \ - --hash=sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864 \ - --hash=sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de \ - --hash=sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289 \ - --hash=sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b \ - --hash=sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c \ - --hash=sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9 - # via - # -r requirements.in - # pyarrow -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via pytest -pluggy==1.5.0 \ - --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ - --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - # via pytest -pyarrow==16.1.0 \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 - # via -r requirements.in -pytest==8.2.2 \ - --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ - --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 - # via -r requirements.in -ruff==0.4.9 \ - --hash=sha256:06b60f91bfa5514bb689b500a25ba48e897d18fea14dce14b48a0c40d1635893 \ - --hash=sha256:0e8e7b95673f22e0efd3571fb5b0cf71a5eaaa3cc8a776584f3b2cc878e46bff \ - --hash=sha256:2d45ddc6d82e1190ea737341326ecbc9a61447ba331b0a8962869fcada758505 \ - --hash=sha256:4555056049d46d8a381f746680db1c46e67ac3b00d714606304077682832998e \ - --hash=sha256:5d5460f789ccf4efd43f265a58538a2c24dbce15dbf560676e430375f20a8198 \ - --hash=sha256:673bddb893f21ab47a8334c8e0ea7fd6598ecc8e698da75bcd12a7b9d0a3206e \ - --hash=sha256:732dd550bfa5d85af8c3c6cbc47ba5b67c6aed8a89e2f011b908fc88f87649db \ - --hash=sha256:784d3ec9bd6493c3b720a0b76f741e6c2d7d44f6b2be87f5eef1ae8cc1d54c84 \ - --hash=sha256:78de3fdb95c4af084087628132336772b1c5044f6e710739d440fc0bccf4d321 \ - --hash=sha256:8064590fd1a50dcf4909c268b0e7c2498253273309ad3d97e4a752bb9df4f521 \ - --hash=sha256:88bffe9c6a454bf8529f9ab9091c99490578a593cc9f9822b7fc065ee0712a06 \ - --hash=sha256:8c1aff58c31948cc66d0b22951aa19edb5af0a3af40c936340cd32a8b1ab7438 \ - --hash=sha256:98ec2775fd2d856dc405635e5ee4ff177920f2141b8e2d9eb5bd6efd50e80317 \ - --hash=sha256:b262ed08d036ebe162123170b35703aaf9daffecb698cd367a8d585157732991 \ - --hash=sha256:e0a22c4157e53d006530c902107c7f550b9233e9706313ab57b892d7197d8e52 \ - --hash=sha256:e91175fbe48f8a2174c9aad70438fe9cb0a5732c4159b2a10a3565fea2d94cde \ - --hash=sha256:f1cb0828ac9533ba0135d148d214e284711ede33640465e706772645483427e3 - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f - # via - # maturin - # mypy - # pytest -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via mypy diff --git a/requirements-311.txt b/requirements-311.txt deleted file mode 100644 index 35b91133c..000000000 --- a/requirements-311.txt +++ /dev/null @@ -1,175 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --generate-hashes --output-file=requirements-311.txt -# -iniconfig==2.0.0 \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - # via pytest -maturin==1.6.0 \ - --hash=sha256:16ef860df20028618b5a064da06b02c1c47acba064a4d25aaf84662a459ec599 \ - --hash=sha256:337899784955934dd67b30497d1dd5fab22da89f60bb079dbaf2eaa446b97a10 \ - --hash=sha256:4e931c92037128ade49cd26dd040d9c46ad8092d8170cc44f5c3a0b4a052d576 \ - --hash=sha256:50133965e52d8b5b969381fee3fde111ae2383905cdaba7650f256e08ccddcd4 \ - --hash=sha256:a2a2436628c36d98dabd79b52256df7e12fc4fd1b122984d9373fdf918fd4609 \ - --hash=sha256:aa4eb7dca7d246b466392f21016f67ff09a9aff2305fa714ca25a2344e4639e7 \ - --hash=sha256:b955025c24c8babc808db49e0ff90db8b4b1320dcc16b14eb26132841737230d \ - --hash=sha256:bd85edcb1b8e2bcddc1b7d16ce58ce00a66aa80c422745c8ad9e132ac40d4b48 \ - --hash=sha256:c87d1a7596c42b589099adb831343a56e02373588366e4cede96cbdf8bd68f9d \ - --hash=sha256:d67ca8dc7f3b2314bd3bf83c4de52645e220ee312fd526e53acc6a735f233fad \ - --hash=sha256:d8620970bd0b6a0acb99dbd0b1c2ebb7a69909d25f6023bdff9635a39001aa51 \ - --hash=sha256:d92b045e90ed919a8a2520dda64e3f384e5e746ea51e1498cc6ac3e9e5c76054 \ - --hash=sha256:dbbbf25dc3c207b0a7bd4f3aea1df33d4f22b8508592796a6f36f4d8ed216db0 - # via -r requirements.in -mypy==1.10.0 \ - --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ - --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ - --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ - --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ - --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ - --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ - --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ - --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ - --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ - --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ - --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ - --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ - --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ - --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ - --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ - --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ - --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ - --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ - --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ - --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ - --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ - --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ - --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ - --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ - --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ - --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ - --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 - # via -r requirements.in -mypy-extensions==1.0.0 \ - --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ - --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via mypy -numpy==1.26.4 \ - --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ - --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ - --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ - --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \ - --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \ - --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \ - --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \ - --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \ - --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \ - --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \ - --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \ - --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \ - --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \ - --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \ - --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \ - --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \ - --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \ - --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \ - --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \ - --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \ - --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \ - --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \ - --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \ - --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \ - --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \ - --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \ - --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \ - --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \ - --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \ - --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \ - --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \ - --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \ - --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \ - --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ - --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ - --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f - # via - # -r requirements.in - # pyarrow -packaging==24.0 \ - --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ - --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 - # via pytest -pluggy==1.5.0 \ - --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ - --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - # via pytest -pyarrow==16.1.0 \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 - # via -r requirements.in -pytest==8.2.2 \ - --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ - --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 - # via -r requirements.in -ruff==0.4.8 \ - --hash=sha256:14019a06dbe29b608f6b7cbcec300e3170a8d86efaddb7b23405cb7f7dcaf780 \ - --hash=sha256:16d717b1d57b2e2fd68bd0bf80fb43931b79d05a7131aa477d66fc40fbd86268 \ - --hash=sha256:284c2e3f3396fb05f5f803c9fffb53ebbe09a3ebe7dda2929ed8d73ded736deb \ - --hash=sha256:384154a1c3f4bf537bac69f33720957ee49ac8d484bfc91720cc94172026ceed \ - --hash=sha256:6d795d7639212c2dfd01991259460101c22aabf420d9b943f153ab9d9706e6a9 \ - --hash=sha256:6ea874950daca5697309d976c9afba830d3bf0ed66887481d6bca1673fc5b66a \ - --hash=sha256:704977a658131651a22b5ebeb28b717ef42ac6ee3b11e91dc87b633b5d83142b \ - --hash=sha256:72584676164e15a68a15778fd1b17c28a519e7a0622161eb2debdcdabdc71883 \ - --hash=sha256:7663a6d78f6adb0eab270fa9cf1ff2d28618ca3a652b60f2a234d92b9ec89066 \ - --hash=sha256:9678d5c9b43315f323af2233a04d747409d1e3aa6789620083a82d1066a35199 \ - --hash=sha256:a7354f921e3fbe04d2a62d46707e569f9315e1a613307f7311a935743c51a764 \ - --hash=sha256:aad360893e92486662ef3be0a339c5ca3c1b109e0134fcd37d534d4be9fb8de3 \ - --hash=sha256:d05f8d6f0c3cce5026cecd83b7a143dcad503045857bc49662f736437380ad45 \ - --hash=sha256:e14a3a095d07560a9d6769a72f781d73259655919d9b396c650fc98a8157555d \ - --hash=sha256:e9d5ce97cacc99878aa0d084c626a15cd21e6b3d53fd6f9112b7fc485918e1fa \ - --hash=sha256:eeceb78da8afb6de0ddada93112869852d04f1cd0f6b80fe464fd4e35c330913 \ - --hash=sha256:fc95aac2943ddf360376be9aa3107c8cf9640083940a8c5bd824be692d2216dc - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -typing-extensions==4.12.1 \ - --hash=sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a \ - --hash=sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1 - # via mypy diff --git a/requirements-312.txt b/requirements-312.txt deleted file mode 100644 index e4de5a5d2..000000000 --- a/requirements-312.txt +++ /dev/null @@ -1,184 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --generate-hashes --output-file=requirements-312.txt -# -iniconfig==2.0.0 \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - # via pytest -maturin==1.6.0 \ - --hash=sha256:16ef860df20028618b5a064da06b02c1c47acba064a4d25aaf84662a459ec599 \ - --hash=sha256:337899784955934dd67b30497d1dd5fab22da89f60bb079dbaf2eaa446b97a10 \ - --hash=sha256:4e931c92037128ade49cd26dd040d9c46ad8092d8170cc44f5c3a0b4a052d576 \ - --hash=sha256:50133965e52d8b5b969381fee3fde111ae2383905cdaba7650f256e08ccddcd4 \ - --hash=sha256:a2a2436628c36d98dabd79b52256df7e12fc4fd1b122984d9373fdf918fd4609 \ - --hash=sha256:aa4eb7dca7d246b466392f21016f67ff09a9aff2305fa714ca25a2344e4639e7 \ - --hash=sha256:b955025c24c8babc808db49e0ff90db8b4b1320dcc16b14eb26132841737230d \ - --hash=sha256:bd85edcb1b8e2bcddc1b7d16ce58ce00a66aa80c422745c8ad9e132ac40d4b48 \ - --hash=sha256:c87d1a7596c42b589099adb831343a56e02373588366e4cede96cbdf8bd68f9d \ - --hash=sha256:d67ca8dc7f3b2314bd3bf83c4de52645e220ee312fd526e53acc6a735f233fad \ - --hash=sha256:d8620970bd0b6a0acb99dbd0b1c2ebb7a69909d25f6023bdff9635a39001aa51 \ - --hash=sha256:d92b045e90ed919a8a2520dda64e3f384e5e746ea51e1498cc6ac3e9e5c76054 \ - --hash=sha256:dbbbf25dc3c207b0a7bd4f3aea1df33d4f22b8508592796a6f36f4d8ed216db0 - # via -r requirements.in -mypy==1.10.0 \ - --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ - --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ - --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ - --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ - --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ - --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ - --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ - --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ - --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ - --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ - --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ - --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ - --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ - --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ - --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ - --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ - --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ - --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ - --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ - --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ - --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ - --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ - --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ - --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ - --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ - --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ - --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 - # via -r requirements.in -mypy-extensions==1.0.0 \ - --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ - --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via mypy -numpy==2.0.0 \ - --hash=sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f \ - --hash=sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238 \ - --hash=sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f \ - --hash=sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95 \ - --hash=sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a \ - --hash=sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a \ - --hash=sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2 \ - --hash=sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2 \ - --hash=sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f \ - --hash=sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609 \ - --hash=sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f \ - --hash=sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad \ - --hash=sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86 \ - --hash=sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65 \ - --hash=sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb \ - --hash=sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995 \ - --hash=sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a \ - --hash=sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85 \ - --hash=sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4 \ - --hash=sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275 \ - --hash=sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1 \ - --hash=sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196 \ - --hash=sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d \ - --hash=sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e \ - --hash=sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514 \ - --hash=sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f \ - --hash=sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6 \ - --hash=sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4 \ - --hash=sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44 \ - --hash=sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df \ - --hash=sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581 \ - --hash=sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787 \ - --hash=sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5 \ - --hash=sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc \ - --hash=sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871 \ - --hash=sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54 \ - --hash=sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2 \ - --hash=sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98 \ - --hash=sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9 \ - --hash=sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864 \ - --hash=sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de \ - --hash=sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289 \ - --hash=sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b \ - --hash=sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c \ - --hash=sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9 - # via - # -r requirements.in - # pyarrow -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via pytest -pluggy==1.5.0 \ - --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ - --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - # via pytest -pyarrow==16.1.0 \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 - # via -r requirements.in -pytest==8.2.2 \ - --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ - --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 - # via -r requirements.in -ruff==0.4.9 \ - --hash=sha256:06b60f91bfa5514bb689b500a25ba48e897d18fea14dce14b48a0c40d1635893 \ - --hash=sha256:0e8e7b95673f22e0efd3571fb5b0cf71a5eaaa3cc8a776584f3b2cc878e46bff \ - --hash=sha256:2d45ddc6d82e1190ea737341326ecbc9a61447ba331b0a8962869fcada758505 \ - --hash=sha256:4555056049d46d8a381f746680db1c46e67ac3b00d714606304077682832998e \ - --hash=sha256:5d5460f789ccf4efd43f265a58538a2c24dbce15dbf560676e430375f20a8198 \ - --hash=sha256:673bddb893f21ab47a8334c8e0ea7fd6598ecc8e698da75bcd12a7b9d0a3206e \ - --hash=sha256:732dd550bfa5d85af8c3c6cbc47ba5b67c6aed8a89e2f011b908fc88f87649db \ - --hash=sha256:784d3ec9bd6493c3b720a0b76f741e6c2d7d44f6b2be87f5eef1ae8cc1d54c84 \ - --hash=sha256:78de3fdb95c4af084087628132336772b1c5044f6e710739d440fc0bccf4d321 \ - --hash=sha256:8064590fd1a50dcf4909c268b0e7c2498253273309ad3d97e4a752bb9df4f521 \ - --hash=sha256:88bffe9c6a454bf8529f9ab9091c99490578a593cc9f9822b7fc065ee0712a06 \ - --hash=sha256:8c1aff58c31948cc66d0b22951aa19edb5af0a3af40c936340cd32a8b1ab7438 \ - --hash=sha256:98ec2775fd2d856dc405635e5ee4ff177920f2141b8e2d9eb5bd6efd50e80317 \ - --hash=sha256:b262ed08d036ebe162123170b35703aaf9daffecb698cd367a8d585157732991 \ - --hash=sha256:e0a22c4157e53d006530c902107c7f550b9233e9706313ab57b892d7197d8e52 \ - --hash=sha256:e91175fbe48f8a2174c9aad70438fe9cb0a5732c4159b2a10a3565fea2d94cde \ - --hash=sha256:f1cb0828ac9533ba0135d148d214e284711ede33640465e706772645483427e3 - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via mypy diff --git a/requirements.in b/requirements.in deleted file mode 100644 index 1b7f62052..000000000 --- a/requirements.in +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -maturin>=1.5.1 -mypy -numpy -pyarrow>=11.0.0 -pytest -ruff -toml -importlib_metadata; python_version < "3.8" -PyGitHub diff --git a/uv.lock b/uv.lock new file mode 100644 index 000000000..75d9ed018 --- /dev/null +++ b/uv.lock @@ -0,0 +1,1842 @@ +version = 1 +requires-python = ">=3.8" +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", + "python_full_version < '3.9'", +] + +[[package]] +name = "alabaster" +version = "0.7.13" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/94/71/a8ee96d1fd95ca04a0d2e2d9c4081dac4c2d2b12f7ddb899c8cb9bfd1532/alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2", size = 11454 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3", size = 13857 }, +] + +[[package]] +name = "alabaster" +version = "0.7.16" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92", size = 13511 }, +] + +[[package]] +name = "alabaster" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, +] + +[[package]] +name = "appnope" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, +] + +[[package]] +name = "astroid" +version = "3.2.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/53/1067e1113ecaf58312357f2cd93063674924119d80d173adc3f6f2387aa2/astroid-3.2.4.tar.gz", hash = "sha256:0e14202810b30da1b735827f78f5157be2bbd4a7a59b7707ca0bfc2fb4c0063a", size = 397576 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/96/b32bbbb46170a1c8b8b1f28c794202e25cfe743565e9d3469b8eb1e0cc05/astroid-3.2.4-py3-none-any.whl", hash = "sha256:413658a61eeca6202a59231abb473f932038fbcbf1666587f66d482083413a25", size = 276348 }, +] + +[[package]] +name = "astroid" +version = "3.3.8" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "typing-extensions", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/28/0bc8a17d6cd4cc3c79ae41b7105a2b9a327c110e5ddd37a8a27b29a5c8a2/astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c", size = 275153 }, +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + +[[package]] +name = "babel" +version = "2.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytz", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, +] + +[[package]] +name = "backcall" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/40/764a663805d84deee23043e1426a9175567db89c8b3287b5c2ad9f71aa93/backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", size = 18041 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/1c/ff6546b6c12603d8dd1070aa3c3d273ad4c07f5771689a7b69a550e8c951/backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255", size = 11157 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, +] + +[[package]] +name = "certifi" +version = "2024.12.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/bd/1d41ee578ce09523c81a15426705dd20969f5abf006d1afe8aeff0dd776a/certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db", size = 166010 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/32/8f6669fc4798494966bf446c8c4a162e0b5d893dff088afddf76414f70e1/certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56", size = 164927 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/58/5580c1716040bc89206c77d8f74418caf82ce519aae06450393ca73475d1/charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de", size = 198013 }, + { url = "https://files.pythonhosted.org/packages/d0/11/00341177ae71c6f5159a08168bcb98c6e6d196d372c94511f9f6c9afe0c6/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176", size = 141285 }, + { url = "https://files.pythonhosted.org/packages/01/09/11d684ea5819e5a8f5100fb0b38cf8d02b514746607934134d31233e02c8/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037", size = 151449 }, + { url = "https://files.pythonhosted.org/packages/08/06/9f5a12939db324d905dc1f70591ae7d7898d030d7662f0d426e2286f68c9/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f", size = 143892 }, + { url = "https://files.pythonhosted.org/packages/93/62/5e89cdfe04584cb7f4d36003ffa2936681b03ecc0754f8e969c2becb7e24/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a", size = 146123 }, + { url = "https://files.pythonhosted.org/packages/a9/ac/ab729a15c516da2ab70a05f8722ecfccc3f04ed7a18e45c75bbbaa347d61/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a", size = 147943 }, + { url = "https://files.pythonhosted.org/packages/03/d2/3f392f23f042615689456e9a274640c1d2e5dd1d52de36ab8f7955f8f050/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247", size = 142063 }, + { url = "https://files.pythonhosted.org/packages/f2/e3/e20aae5e1039a2cd9b08d9205f52142329f887f8cf70da3650326670bddf/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408", size = 150578 }, + { url = "https://files.pythonhosted.org/packages/8d/af/779ad72a4da0aed925e1139d458adc486e61076d7ecdcc09e610ea8678db/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb", size = 153629 }, + { url = "https://files.pythonhosted.org/packages/c2/b6/7aa450b278e7aa92cf7732140bfd8be21f5f29d5bf334ae987c945276639/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d", size = 150778 }, + { url = "https://files.pythonhosted.org/packages/39/f4/d9f4f712d0951dcbfd42920d3db81b00dd23b6ab520419626f4023334056/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807", size = 146453 }, + { url = "https://files.pythonhosted.org/packages/49/2b/999d0314e4ee0cff3cb83e6bc9aeddd397eeed693edb4facb901eb8fbb69/charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f", size = 95479 }, + { url = "https://files.pythonhosted.org/packages/2d/ce/3cbed41cff67e455a386fb5e5dd8906cdda2ed92fbc6297921f2e4419309/charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f", size = 102790 }, + { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, + { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, + { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, + { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335 }, + { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862 }, + { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673 }, + { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211 }, + { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039 }, + { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939 }, + { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075 }, + { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340 }, + { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205 }, + { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441 }, + { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, + { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, + { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, + { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, + { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, + { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, + { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, + { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, + { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, + { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, + { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, + { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, + { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/10/bd/6517ea94f2672e801011d50b5d06be2a0deaf566aea27bcdcd47e5195357/charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c", size = 195653 }, + { url = "https://files.pythonhosted.org/packages/e5/0d/815a2ba3f283b4eeaa5ece57acade365c5b4135f65a807a083c818716582/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9", size = 140701 }, + { url = "https://files.pythonhosted.org/packages/aa/17/c94be7ee0d142687e047fe1de72060f6d6837f40eedc26e87e6e124a3fc6/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8", size = 150495 }, + { url = "https://files.pythonhosted.org/packages/f7/33/557ac796c47165fc141e4fb71d7b0310f67e05cb420756f3a82e0a0068e0/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6", size = 142946 }, + { url = "https://files.pythonhosted.org/packages/1e/0d/38ef4ae41e9248d63fc4998d933cae22473b1b2ac4122cf908d0f5eb32aa/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c", size = 144737 }, + { url = "https://files.pythonhosted.org/packages/43/01/754cdb29dd0560f58290aaaa284d43eea343ad0512e6ad3b8b5c11f08592/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a", size = 147471 }, + { url = "https://files.pythonhosted.org/packages/ba/cd/861883ba5160c7a9bd242c30b2c71074cda2aefcc0addc91118e0d4e0765/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd", size = 140801 }, + { url = "https://files.pythonhosted.org/packages/6f/7f/0c0dad447819e90b93f8ed238cc8f11b91353c23c19e70fa80483a155bed/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd", size = 149312 }, + { url = "https://files.pythonhosted.org/packages/8e/09/9f8abcc6fff60fb727268b63c376c8c79cc37b833c2dfe1f535dfb59523b/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824", size = 152347 }, + { url = "https://files.pythonhosted.org/packages/be/e5/3f363dad2e24378f88ccf63ecc39e817c29f32e308ef21a7a6d9c1201165/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca", size = 149888 }, + { url = "https://files.pythonhosted.org/packages/e4/10/a78c0e91f487b4ad0ef7480ac765e15b774f83de2597f1b6ef0eaf7a2f99/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b", size = 145169 }, + { url = "https://files.pythonhosted.org/packages/d3/81/396e7d7f5d7420da8273c91175d2e9a3f569288e3611d521685e4b9ac9cc/charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e", size = 95094 }, + { url = "https://files.pythonhosted.org/packages/40/bb/20affbbd9ea29c71ea123769dc568a6d42052ff5089c5fe23e21e21084a6/charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4", size = 102139 }, + { url = "https://files.pythonhosted.org/packages/7f/c0/b913f8f02836ed9ab32ea643c6fe4d3325c3d8627cf6e78098671cafff86/charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", size = 197867 }, + { url = "https://files.pythonhosted.org/packages/0f/6c/2bee440303d705b6fb1e2ec789543edec83d32d258299b16eed28aad48e0/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", size = 141385 }, + { url = "https://files.pythonhosted.org/packages/3d/04/cb42585f07f6f9fd3219ffb6f37d5a39b4fd2db2355b23683060029c35f7/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", size = 151367 }, + { url = "https://files.pythonhosted.org/packages/54/54/2412a5b093acb17f0222de007cc129ec0e0df198b5ad2ce5699355269dfe/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770", size = 143928 }, + { url = "https://files.pythonhosted.org/packages/5a/6d/e2773862b043dcf8a221342954f375392bb2ce6487bcd9f2c1b34e1d6781/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4", size = 146203 }, + { url = "https://files.pythonhosted.org/packages/b9/f8/ca440ef60d8f8916022859885f231abb07ada3c347c03d63f283bec32ef5/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537", size = 148082 }, + { url = "https://files.pythonhosted.org/packages/04/d2/42fd330901aaa4b805a1097856c2edf5095e260a597f65def493f4b8c833/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496", size = 142053 }, + { url = "https://files.pythonhosted.org/packages/9e/af/3a97a4fa3c53586f1910dadfc916e9c4f35eeada36de4108f5096cb7215f/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78", size = 150625 }, + { url = "https://files.pythonhosted.org/packages/26/ae/23d6041322a3556e4da139663d02fb1b3c59a23ab2e2b56432bd2ad63ded/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7", size = 153549 }, + { url = "https://files.pythonhosted.org/packages/94/22/b8f2081c6a77cb20d97e57e0b385b481887aa08019d2459dc2858ed64871/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6", size = 150945 }, + { url = "https://files.pythonhosted.org/packages/c7/0b/c5ec5092747f801b8b093cdf5610e732b809d6cb11f4c51e35fc28d1d389/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294", size = 146595 }, + { url = "https://files.pythonhosted.org/packages/0c/5a/0b59704c38470df6768aa154cc87b1ac7c9bb687990a1559dc8765e8627e/charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5", size = 95453 }, + { url = "https://files.pythonhosted.org/packages/85/2d/a9790237cb4d01a6d57afadc8573c8b73c609ade20b80f4cda30802009ee/charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765", size = 102811 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = { editable = "." } +dependencies = [ + { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pyarrow", version = "18.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "maturin" }, + { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pytest" }, + { name = "ruff" }, + { name = "toml" }, +] +docs = [ + { name = "ipython", version = "8.12.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "ipython", version = "8.31.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "jinja2" }, + { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "myst-parser", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pandas", version = "2.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pandas", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pickleshare" }, + { name = "pydata-sphinx-theme" }, + { name = "setuptools", version = "75.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "setuptools", version = "75.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinx-autoapi" }, +] + +[package.metadata] +requires-dist = [ + { name = "pyarrow", specifier = ">=11.0.0" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "maturin", specifier = ">=1.8.1" }, + { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" }, + { name = "pytest", specifier = ">=7.4.4" }, + { name = "ruff", specifier = ">=0.9.1" }, + { name = "toml", specifier = ">=0.10.2" }, +] +docs = [ + { name = "ipython", specifier = ">=8.12.3" }, + { name = "jinja2", specifier = ">=3.1.5" }, + { name = "myst-parser", specifier = ">=3.0.1" }, + { name = "pandas", specifier = ">=2.0.3" }, + { name = "pickleshare", specifier = ">=0.7.5" }, + { name = "pydata-sphinx-theme", specifier = "==0.8.0" }, + { name = "setuptools", specifier = ">=75.3.0" }, + { name = "sphinx", specifier = ">=7.1.2" }, + { name = "sphinx-autoapi", specifier = ">=3.4.0" }, +] + +[[package]] +name = "decorator" +version = "5.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/0c/8d907af351aa16b42caae42f9d6aa37b900c67308052d10fdce809f8d952/decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", size = 35016 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, +] + +[[package]] +name = "docutils" +version = "0.20.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666 }, +] + +[[package]] +name = "docutils" +version = "0.21.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, +] + +[[package]] +name = "executing" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "imagesize" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769 }, +] + +[[package]] +name = "importlib-metadata" +version = "8.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp", version = "3.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "zipp", version = "3.21.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + +[[package]] +name = "ipython" +version = "8.12.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "appnope", marker = "python_full_version < '3.9' and sys_platform == 'darwin'" }, + { name = "backcall", marker = "python_full_version < '3.9'" }, + { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version < '3.9'" }, + { name = "jedi", marker = "python_full_version < '3.9'" }, + { name = "matplotlib-inline", marker = "python_full_version < '3.9'" }, + { name = "pexpect", marker = "python_full_version < '3.9' and sys_platform != 'win32'" }, + { name = "pickleshare", marker = "python_full_version < '3.9'" }, + { name = "prompt-toolkit", marker = "python_full_version < '3.9'" }, + { name = "pygments", marker = "python_full_version < '3.9'" }, + { name = "stack-data", marker = "python_full_version < '3.9'" }, + { name = "traitlets", marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/6a/44ef299b1762f5a73841e87fae8a73a8cc8aee538d6dc8c77a5afe1fd2ce/ipython-8.12.3.tar.gz", hash = "sha256:3910c4b54543c2ad73d06579aa771041b7d5707b033bd488669b4cf544e3b363", size = 5470171 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/97/8fe103906cd81bc42d3b0175b5534a9f67dccae47d6451131cf8d0d70bb2/ipython-8.12.3-py3-none-any.whl", hash = "sha256:b0340d46a933d27c657b211a329d0be23793c36595acf9e6ef4164bc01a1804c", size = 798307 }, +] + +[[package]] +name = "ipython" +version = "8.18.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version == '3.9.*'" }, + { name = "exceptiongroup", marker = "python_full_version == '3.9.*'" }, + { name = "jedi", marker = "python_full_version == '3.9.*'" }, + { name = "matplotlib-inline", marker = "python_full_version == '3.9.*'" }, + { name = "pexpect", marker = "python_full_version == '3.9.*' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version == '3.9.*'" }, + { name = "pygments", marker = "python_full_version == '3.9.*'" }, + { name = "stack-data", marker = "python_full_version == '3.9.*'" }, + { name = "traitlets", marker = "python_full_version == '3.9.*'" }, + { name = "typing-extensions", marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/b9/3ba6c45a6df813c09a48bac313c22ff83efa26cbb55011218d925a46e2ad/ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", size = 5486330 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/6b/d9fdcdef2eb6a23f391251fde8781c38d42acd82abe84d054cb74f7863b0/ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397", size = 808161 }, +] + +[[package]] +name = "ipython" +version = "8.31.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version >= '3.10'" }, + { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" }, + { name = "jedi", marker = "python_full_version >= '3.10'" }, + { name = "matplotlib-inline", marker = "python_full_version >= '3.10'" }, + { name = "pexpect", marker = "python_full_version >= '3.10' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version >= '3.10'" }, + { name = "pygments", marker = "python_full_version >= '3.10'" }, + { name = "stack-data", marker = "python_full_version >= '3.10'" }, + { name = "traitlets", marker = "python_full_version >= '3.10'" }, + { name = "typing-extensions", marker = "python_full_version >= '3.10' and python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/35/6f90fdddff7a08b7b715fccbd2427b5212c9525cd043d26fdc45bee0708d/ipython-8.31.0.tar.gz", hash = "sha256:b6a2274606bec6166405ff05e54932ed6e5cfecaca1fc05f2cacde7bb074d70b", size = 5501011 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/60/d0feb6b6d9fe4ab89fe8fe5b47cbf6cd936bfd9f1e7ffa9d0015425aeed6/ipython-8.31.0-py3-none-any.whl", hash = "sha256:46ec58f8d3d076a61d128fe517a51eb730e3aaf0c184ea8c17d16e366660c6a6", size = 821583 }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278 }, +] + +[[package]] +name = "jinja2" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe", version = "2.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "markupsafe", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/92/b3130cbbf5591acf9ade8708c365f3238046ac7cb8ccba6e81abccb0ccff/jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", size = 244674 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/0f/2ba5fbcd631e3e88689309dbe978c5769e883e4b84ebfe7da30b43275c5a/jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb", size = 134596 }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, +] + +[[package]] +name = "markupsafe" +version = "2.1.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/87/5b/aae44c6655f3801e81aa3eef09dbbf012431987ba564d7231722f68df02d/MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", size = 19384 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/54/ad5eb37bf9d51800010a74e4665425831a9db4e7c4e0fde4352e391e808e/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", size = 18206 }, + { url = "https://files.pythonhosted.org/packages/6a/4a/a4d49415e600bacae038c67f9fecc1d5433b9d3c71a4de6f33537b89654c/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", size = 14079 }, + { url = "https://files.pythonhosted.org/packages/0a/7b/85681ae3c33c385b10ac0f8dd025c30af83c78cec1c37a6aa3b55e67f5ec/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", size = 26620 }, + { url = "https://files.pythonhosted.org/packages/7c/52/2b1b570f6b8b803cef5ac28fdf78c0da318916c7d2fe9402a84d591b394c/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", size = 25818 }, + { url = "https://files.pythonhosted.org/packages/29/fe/a36ba8c7ca55621620b2d7c585313efd10729e63ef81e4e61f52330da781/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", size = 25493 }, + { url = "https://files.pythonhosted.org/packages/60/ae/9c60231cdfda003434e8bd27282b1f4e197ad5a710c14bee8bea8a9ca4f0/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", size = 30630 }, + { url = "https://files.pythonhosted.org/packages/65/dc/1510be4d179869f5dafe071aecb3f1f41b45d37c02329dfba01ff59e5ac5/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", size = 29745 }, + { url = "https://files.pythonhosted.org/packages/30/39/8d845dd7d0b0613d86e0ef89549bfb5f61ed781f59af45fc96496e897f3a/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", size = 30021 }, + { url = "https://files.pythonhosted.org/packages/c7/5c/356a6f62e4f3c5fbf2602b4771376af22a3b16efa74eb8716fb4e328e01e/MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", size = 16659 }, + { url = "https://files.pythonhosted.org/packages/69/48/acbf292615c65f0604a0c6fc402ce6d8c991276e16c80c46a8f758fbd30c/MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", size = 17213 }, + { url = "https://files.pythonhosted.org/packages/11/e7/291e55127bb2ae67c64d66cef01432b5933859dfb7d6949daa721b89d0b3/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", size = 18219 }, + { url = "https://files.pythonhosted.org/packages/6b/cb/aed7a284c00dfa7c0682d14df85ad4955a350a21d2e3b06d8240497359bf/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", size = 14098 }, + { url = "https://files.pythonhosted.org/packages/1c/cf/35fe557e53709e93feb65575c93927942087e9b97213eabc3fe9d5b25a55/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", size = 29014 }, + { url = "https://files.pythonhosted.org/packages/97/18/c30da5e7a0e7f4603abfc6780574131221d9148f323752c2755d48abad30/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", size = 28220 }, + { url = "https://files.pythonhosted.org/packages/0c/40/2e73e7d532d030b1e41180807a80d564eda53babaf04d65e15c1cf897e40/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", size = 27756 }, + { url = "https://files.pythonhosted.org/packages/18/46/5dca760547e8c59c5311b332f70605d24c99d1303dd9a6e1fc3ed0d73561/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", size = 33988 }, + { url = "https://files.pythonhosted.org/packages/6d/c5/27febe918ac36397919cd4a67d5579cbbfa8da027fa1238af6285bb368ea/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", size = 32718 }, + { url = "https://files.pythonhosted.org/packages/f8/81/56e567126a2c2bc2684d6391332e357589a96a76cb9f8e5052d85cb0ead8/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", size = 33317 }, + { url = "https://files.pythonhosted.org/packages/00/0b/23f4b2470accb53285c613a3ab9ec19dc944eaf53592cb6d9e2af8aa24cc/MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", size = 16670 }, + { url = "https://files.pythonhosted.org/packages/b7/a2/c78a06a9ec6d04b3445a949615c4c7ed86a0b2eb68e44e7541b9d57067cc/MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", size = 17224 }, + { url = "https://files.pythonhosted.org/packages/53/bd/583bf3e4c8d6a321938c13f49d44024dbe5ed63e0a7ba127e454a66da974/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", size = 18215 }, + { url = "https://files.pythonhosted.org/packages/48/d6/e7cd795fc710292c3af3a06d80868ce4b02bfbbf370b7cee11d282815a2a/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", size = 14069 }, + { url = "https://files.pythonhosted.org/packages/51/b5/5d8ec796e2a08fc814a2c7d2584b55f889a55cf17dd1a90f2beb70744e5c/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", size = 29452 }, + { url = "https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", size = 28462 }, + { url = "https://files.pythonhosted.org/packages/2d/75/fd6cb2e68780f72d47e6671840ca517bda5ef663d30ada7616b0462ad1e3/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", size = 27869 }, + { url = "https://files.pythonhosted.org/packages/b0/81/147c477391c2750e8fc7705829f7351cf1cd3be64406edcf900dc633feb2/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", size = 33906 }, + { url = "https://files.pythonhosted.org/packages/8b/ff/9a52b71839d7a256b563e85d11050e307121000dcebc97df120176b3ad93/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", size = 32296 }, + { url = "https://files.pythonhosted.org/packages/88/07/2dc76aa51b481eb96a4c3198894f38b480490e834479611a4053fbf08623/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", size = 33038 }, + { url = "https://files.pythonhosted.org/packages/96/0c/620c1fb3661858c0e37eb3cbffd8c6f732a67cd97296f725789679801b31/MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", size = 16572 }, + { url = "https://files.pythonhosted.org/packages/3f/14/c3554d512d5f9100a95e737502f4a2323a1959f6d0d01e0d0997b35f7b10/MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", size = 17127 }, + { url = "https://files.pythonhosted.org/packages/f8/ff/2c942a82c35a49df5de3a630ce0a8456ac2969691b230e530ac12314364c/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", size = 18192 }, + { url = "https://files.pythonhosted.org/packages/4f/14/6f294b9c4f969d0c801a4615e221c1e084722ea6114ab2114189c5b8cbe0/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", size = 14072 }, + { url = "https://files.pythonhosted.org/packages/81/d4/fd74714ed30a1dedd0b82427c02fa4deec64f173831ec716da11c51a50aa/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", size = 26928 }, + { url = "https://files.pythonhosted.org/packages/c7/bd/50319665ce81bb10e90d1cf76f9e1aa269ea6f7fa30ab4521f14d122a3df/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", size = 26106 }, + { url = "https://files.pythonhosted.org/packages/4c/6f/f2b0f675635b05f6afd5ea03c094557bdb8622fa8e673387444fe8d8e787/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68", size = 25781 }, + { url = "https://files.pythonhosted.org/packages/51/e0/393467cf899b34a9d3678e78961c2c8cdf49fb902a959ba54ece01273fb1/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", size = 30518 }, + { url = "https://files.pythonhosted.org/packages/f6/02/5437e2ad33047290dafced9df741d9efc3e716b75583bbd73a9984f1b6f7/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", size = 29669 }, + { url = "https://files.pythonhosted.org/packages/0e/7d/968284145ffd9d726183ed6237c77938c021abacde4e073020f920e060b2/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", size = 29933 }, + { url = "https://files.pythonhosted.org/packages/bf/f3/ecb00fc8ab02b7beae8699f34db9357ae49d9f21d4d3de6f305f34fa949e/MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", size = 16656 }, + { url = "https://files.pythonhosted.org/packages/92/21/357205f03514a49b293e214ac39de01fadd0970a6e05e4bf1ddd0ffd0881/MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", size = 17206 }, + { url = "https://files.pythonhosted.org/packages/0f/31/780bb297db036ba7b7bbede5e1d7f1e14d704ad4beb3ce53fb495d22bc62/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", size = 18193 }, + { url = "https://files.pythonhosted.org/packages/6c/77/d77701bbef72892affe060cdacb7a2ed7fd68dae3b477a8642f15ad3b132/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", size = 14073 }, + { url = "https://files.pythonhosted.org/packages/d9/a7/1e558b4f78454c8a3a0199292d96159eb4d091f983bc35ef258314fe7269/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", size = 26486 }, + { url = "https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", size = 25685 }, + { url = "https://files.pythonhosted.org/packages/6a/18/ae5a258e3401f9b8312f92b028c54d7026a97ec3ab20bfaddbdfa7d8cce8/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", size = 25338 }, + { url = "https://files.pythonhosted.org/packages/0b/cc/48206bd61c5b9d0129f4d75243b156929b04c94c09041321456fd06a876d/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", size = 30439 }, + { url = "https://files.pythonhosted.org/packages/d1/06/a41c112ab9ffdeeb5f77bc3e331fdadf97fa65e52e44ba31880f4e7f983c/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", size = 29531 }, + { url = "https://files.pythonhosted.org/packages/02/8c/ab9a463301a50dab04d5472e998acbd4080597abc048166ded5c7aa768c8/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", size = 29823 }, + { url = "https://files.pythonhosted.org/packages/bc/29/9bc18da763496b055d8e98ce476c8e718dcfd78157e17f555ce6dd7d0895/MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", size = 16658 }, + { url = "https://files.pythonhosted.org/packages/f6/f8/4da07de16f10551ca1f640c92b5f316f9394088b183c6a57183df6de5ae4/MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", size = 17211 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 }, + { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393 }, + { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732 }, + { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866 }, + { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964 }, + { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977 }, + { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366 }, + { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091 }, + { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065 }, + { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514 }, + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 }, + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352 }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122 }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085 }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978 }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208 }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357 }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344 }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101 }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603 }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510 }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486 }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480 }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914 }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796 }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473 }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114 }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098 }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208 }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 }, + { url = "https://files.pythonhosted.org/packages/a7/ea/9b1530c3fdeeca613faeb0fb5cbcf2389d816072fab72a71b45749ef6062/MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a", size = 14344 }, + { url = "https://files.pythonhosted.org/packages/4b/c2/fbdbfe48848e7112ab05e627e718e854d20192b674952d9042ebd8c9e5de/MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff", size = 12389 }, + { url = "https://files.pythonhosted.org/packages/f0/25/7a7c6e4dbd4f867d95d94ca15449e91e52856f6ed1905d58ef1de5e211d0/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13", size = 21607 }, + { url = "https://files.pythonhosted.org/packages/53/8f/f339c98a178f3c1e545622206b40986a4c3307fe39f70ccd3d9df9a9e425/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144", size = 20728 }, + { url = "https://files.pythonhosted.org/packages/1a/03/8496a1a78308456dbd50b23a385c69b41f2e9661c67ea1329849a598a8f9/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29", size = 20826 }, + { url = "https://files.pythonhosted.org/packages/e6/cf/0a490a4bd363048c3022f2f475c8c05582179bb179defcee4766fb3dcc18/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0", size = 21843 }, + { url = "https://files.pythonhosted.org/packages/19/a3/34187a78613920dfd3cdf68ef6ce5e99c4f3417f035694074beb8848cd77/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0", size = 21219 }, + { url = "https://files.pythonhosted.org/packages/17/d8/5811082f85bb88410ad7e452263af048d685669bbbfb7b595e8689152498/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178", size = 20946 }, + { url = "https://files.pythonhosted.org/packages/7c/31/bd635fb5989440d9365c5e3c47556cfea121c7803f5034ac843e8f37c2f2/MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f", size = 15063 }, + { url = "https://files.pythonhosted.org/packages/b3/73/085399401383ce949f727afec55ec3abd76648d04b9f22e1c0e99cb4bec3/MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a", size = 15506 }, +] + +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, +] + +[[package]] +name = "maturin" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/08/ccb0f917722a35ab0d758be9bb5edaf645c3a3d6170061f10d396ecd273f/maturin-1.8.1.tar.gz", hash = "sha256:49cd964aabf59f8b0a6969f9860d2cdf194ac331529caae14c884f5659568857", size = 197397 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/00/f34077315f34db8ad2ccf6bfe11b864ca27baab3a1320634da8e3cf89a48/maturin-1.8.1-py3-none-linux_armv6l.whl", hash = "sha256:7e590a23d9076b8a994f2e67bc63dc9a2d1c9a41b1e7b45ac354ba8275254e89", size = 7568415 }, + { url = "https://files.pythonhosted.org/packages/5c/07/9219976135ce0cb32d2fa6ea5c6d0ad709013d9a17967312e149b98153a6/maturin-1.8.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d8251a95682c83ea60988c804b620c181911cd824aa107b4a49ac5333c92968", size = 14527816 }, + { url = "https://files.pythonhosted.org/packages/e6/04/fa009a00903acdd1785d58322193140bfe358595347c39f315112dabdf9e/maturin-1.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b9fc1a4354cac5e32c190410208039812ea88c4a36bd2b6499268ec49ef5de00", size = 7580446 }, + { url = "https://files.pythonhosted.org/packages/9b/d4/414b2aab9bbfe88182b734d3aa1b4fef7d7701e50f6be48500378b8c8721/maturin-1.8.1-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:621e171c6b39f95f1d0df69a118416034fbd59c0f89dcaea8c2ea62019deecba", size = 7650535 }, + { url = "https://files.pythonhosted.org/packages/f0/64/879418a8a0196013ec1fb19eada0781c04a30e8d6d9227e80f91275a4f5b/maturin-1.8.1-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:98f638739a5132962347871b85c91f525c9246ef4d99796ae98a2031e3df029f", size = 8006702 }, + { url = "https://files.pythonhosted.org/packages/39/c2/605829324f8371294f70303aca130682df75318958efed246873d3d604ab/maturin-1.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:f9f5c47521924b6e515cbc652a042fe5f17f8747445be9d931048e5d8ddb50a4", size = 7368164 }, + { url = "https://files.pythonhosted.org/packages/be/6c/30e136d397bb146b94b628c0ef7f17708281611b97849e2cf37847025ac7/maturin-1.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:0f4407c7353c31bfbb8cdeb82bc2170e474cbfb97b5ba27568f440c9d6c1fdd4", size = 7450889 }, + { url = "https://files.pythonhosted.org/packages/1b/50/e1f5023512696d4e56096f702e2f68d6d9a30afe0a4eec82b0e27b8eb4e4/maturin-1.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:ec49cd70cad3c389946c6e2bc0bd50772a7fcb463040dd800720345897eec9bf", size = 9585819 }, + { url = "https://files.pythonhosted.org/packages/b7/80/b24b5248d89d2e5982553900237a337ea098ca9297b8369ca2aa95549e0f/maturin-1.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08767d794de8f8a11c5c8b1b47a4ff9fb6ae2d2d97679e27030f2f509c8c2a0", size = 10920801 }, + { url = "https://files.pythonhosted.org/packages/6e/f4/8ede7a662fabf93456b44390a5ad22630e25fb5ddaecf787251071b2e143/maturin-1.8.1-py3-none-win32.whl", hash = "sha256:d678407713f3e10df33c5b3d7a343ec0551eb7f14d8ad9ba6febeb96f4e4c75c", size = 6873556 }, + { url = "https://files.pythonhosted.org/packages/9c/22/757f093ed0e319e9648155b8c9d716765442bea5bc98ebc58ad4ad5b0524/maturin-1.8.1-py3-none-win_amd64.whl", hash = "sha256:a526f90fe0e5cb59ffb81f4ff547ddc42e823bbdeae4a31012c0893ca6dcaf46", size = 7823153 }, + { url = "https://files.pythonhosted.org/packages/a4/f5/051413e04f6da25069db5e76759ecdb8cd2a8ab4a94045b5a3bf548c66fa/maturin-1.8.1-py3-none-win_arm64.whl", hash = "sha256:e95f077fd2ddd2f048182880eed458c308571a534be3eb2add4d3dac55bf57f4", size = 6552131 }, +] + +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "myst-parser" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", + "python_full_version < '3.9'", +] +dependencies = [ + { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "jinja2", marker = "python_full_version < '3.10'" }, + { name = "markdown-it-py", marker = "python_full_version < '3.10'" }, + { name = "mdit-py-plugins", marker = "python_full_version < '3.10'" }, + { name = "pyyaml", marker = "python_full_version < '3.10'" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/64/e2f13dac02f599980798c01156393b781aec983b52a6e4057ee58f07c43a/myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87", size = 92392 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/de/21aa8394f16add8f7427f0a1326ccd2b3a2a8a3245c9252bc5ac034c6155/myst_parser-3.0.1-py3-none-any.whl", hash = "sha256:6457aaa33a5d474aca678b8ead9b3dc298e89c68e67012e73146ea6fd54babf1", size = 83163 }, +] + +[[package]] +name = "myst-parser" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "jinja2", marker = "python_full_version >= '3.10'" }, + { name = "markdown-it-py", marker = "python_full_version >= '3.10'" }, + { name = "mdit-py-plugins", marker = "python_full_version >= '3.10'" }, + { name = "pyyaml", marker = "python_full_version >= '3.10'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/55/6d1741a1780e5e65038b74bce6689da15f620261c490c3511eb4c12bac4b/myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531", size = 93858 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/b4/b036f8fdb667587bb37df29dc6644681dd78b7a2a6321a34684b79412b28/myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d", size = 84563 }, +] + +[[package]] +name = "numpy" +version = "1.24.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140 }, + { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297 }, + { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611 }, + { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357 }, + { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222 }, + { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514 }, + { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508 }, + { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033 }, + { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951 }, + { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923 }, + { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446 }, + { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466 }, + { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722 }, + { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102 }, + { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616 }, + { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263 }, + { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660 }, + { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112 }, + { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549 }, + { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950 }, + { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228 }, + { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170 }, + { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918 }, + { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441 }, + { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590 }, + { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744 }, + { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290 }, +] + +[[package]] +name = "numpy" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/91/3495b3237510f79f5d81f2508f9f13fea78ebfdf07538fc7444badda173d/numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece", size = 21165245 }, + { url = "https://files.pythonhosted.org/packages/05/33/26178c7d437a87082d11019292dce6d3fe6f0e9026b7b2309cbf3e489b1d/numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04", size = 13738540 }, + { url = "https://files.pythonhosted.org/packages/ec/31/cc46e13bf07644efc7a4bf68df2df5fb2a1a88d0cd0da9ddc84dc0033e51/numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66", size = 5300623 }, + { url = "https://files.pythonhosted.org/packages/6e/16/7bfcebf27bb4f9d7ec67332ffebee4d1bf085c84246552d52dbb548600e7/numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b", size = 6901774 }, + { url = "https://files.pythonhosted.org/packages/f9/a3/561c531c0e8bf082c5bef509d00d56f82e0ea7e1e3e3a7fc8fa78742a6e5/numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd", size = 13907081 }, + { url = "https://files.pythonhosted.org/packages/fa/66/f7177ab331876200ac7563a580140643d1179c8b4b6a6b0fc9838de2a9b8/numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318", size = 19523451 }, + { url = "https://files.pythonhosted.org/packages/25/7f/0b209498009ad6453e4efc2c65bcdf0ae08a182b2b7877d7ab38a92dc542/numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8", size = 19927572 }, + { url = "https://files.pythonhosted.org/packages/3e/df/2619393b1e1b565cd2d4c4403bdd979621e2c4dea1f8532754b2598ed63b/numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326", size = 14400722 }, + { url = "https://files.pythonhosted.org/packages/22/ad/77e921b9f256d5da36424ffb711ae79ca3f451ff8489eeca544d0701d74a/numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97", size = 6472170 }, + { url = "https://files.pythonhosted.org/packages/10/05/3442317535028bc29cf0c0dd4c191a4481e8376e9f0db6bcf29703cadae6/numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131", size = 15905558 }, + { url = "https://files.pythonhosted.org/packages/8b/cf/034500fb83041aa0286e0fb16e7c76e5c8b67c0711bb6e9e9737a717d5fe/numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448", size = 21169137 }, + { url = "https://files.pythonhosted.org/packages/4a/d9/32de45561811a4b87fbdee23b5797394e3d1504b4a7cf40c10199848893e/numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195", size = 13703552 }, + { url = "https://files.pythonhosted.org/packages/c1/ca/2f384720020c7b244d22508cb7ab23d95f179fcfff33c31a6eeba8d6c512/numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57", size = 5298957 }, + { url = "https://files.pythonhosted.org/packages/0e/78/a3e4f9fb6aa4e6fdca0c5428e8ba039408514388cf62d89651aade838269/numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a", size = 6905573 }, + { url = "https://files.pythonhosted.org/packages/a0/72/cfc3a1beb2caf4efc9d0b38a15fe34025230da27e1c08cc2eb9bfb1c7231/numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669", size = 13914330 }, + { url = "https://files.pythonhosted.org/packages/ba/a8/c17acf65a931ce551fee11b72e8de63bf7e8a6f0e21add4c937c83563538/numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951", size = 19534895 }, + { url = "https://files.pythonhosted.org/packages/ba/86/8767f3d54f6ae0165749f84648da9dcc8cd78ab65d415494962c86fac80f/numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9", size = 19937253 }, + { url = "https://files.pythonhosted.org/packages/df/87/f76450e6e1c14e5bb1eae6836478b1028e096fd02e85c1c37674606ab752/numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15", size = 14414074 }, + { url = "https://files.pythonhosted.org/packages/5c/ca/0f0f328e1e59f73754f06e1adfb909de43726d4f24c6a3f8805f34f2b0fa/numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4", size = 6470640 }, + { url = "https://files.pythonhosted.org/packages/eb/57/3a3f14d3a759dcf9bf6e9eda905794726b758819df4663f217d658a58695/numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc", size = 15910230 }, + { url = "https://files.pythonhosted.org/packages/45/40/2e117be60ec50d98fa08c2f8c48e09b3edea93cfcabd5a9ff6925d54b1c2/numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b", size = 20895803 }, + { url = "https://files.pythonhosted.org/packages/46/92/1b8b8dee833f53cef3e0a3f69b2374467789e0bb7399689582314df02651/numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e", size = 13471835 }, + { url = "https://files.pythonhosted.org/packages/7f/19/e2793bde475f1edaea6945be141aef6c8b4c669b90c90a300a8954d08f0a/numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c", size = 5038499 }, + { url = "https://files.pythonhosted.org/packages/e3/ff/ddf6dac2ff0dd50a7327bcdba45cb0264d0e96bb44d33324853f781a8f3c/numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c", size = 6633497 }, + { url = "https://files.pythonhosted.org/packages/72/21/67f36eac8e2d2cd652a2e69595a54128297cdcb1ff3931cfc87838874bd4/numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692", size = 13621158 }, + { url = "https://files.pythonhosted.org/packages/39/68/e9f1126d757653496dbc096cb429014347a36b228f5a991dae2c6b6cfd40/numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a", size = 19236173 }, + { url = "https://files.pythonhosted.org/packages/d1/e9/1f5333281e4ebf483ba1c888b1d61ba7e78d7e910fdd8e6499667041cc35/numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c", size = 19634174 }, + { url = "https://files.pythonhosted.org/packages/71/af/a469674070c8d8408384e3012e064299f7a2de540738a8e414dcfd639996/numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded", size = 14099701 }, + { url = "https://files.pythonhosted.org/packages/d0/3d/08ea9f239d0e0e939b6ca52ad403c84a2bce1bde301a8eb4888c1c1543f1/numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5", size = 6174313 }, + { url = "https://files.pythonhosted.org/packages/b2/b5/4ac39baebf1fdb2e72585c8352c56d063b6126be9fc95bd2bb5ef5770c20/numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a", size = 15606179 }, + { url = "https://files.pythonhosted.org/packages/43/c1/41c8f6df3162b0c6ffd4437d729115704bd43363de0090c7f913cfbc2d89/numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c", size = 21169942 }, + { url = "https://files.pythonhosted.org/packages/39/bc/fd298f308dcd232b56a4031fd6ddf11c43f9917fbc937e53762f7b5a3bb1/numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd", size = 13711512 }, + { url = "https://files.pythonhosted.org/packages/96/ff/06d1aa3eeb1c614eda245c1ba4fb88c483bee6520d361641331872ac4b82/numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b", size = 5306976 }, + { url = "https://files.pythonhosted.org/packages/2d/98/121996dcfb10a6087a05e54453e28e58694a7db62c5a5a29cee14c6e047b/numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729", size = 6906494 }, + { url = "https://files.pythonhosted.org/packages/15/31/9dffc70da6b9bbf7968f6551967fc21156207366272c2a40b4ed6008dc9b/numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1", size = 13912596 }, + { url = "https://files.pythonhosted.org/packages/b9/14/78635daab4b07c0930c919d451b8bf8c164774e6a3413aed04a6d95758ce/numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd", size = 19526099 }, + { url = "https://files.pythonhosted.org/packages/26/4c/0eeca4614003077f68bfe7aac8b7496f04221865b3a5e7cb230c9d055afd/numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d", size = 19932823 }, + { url = "https://files.pythonhosted.org/packages/f1/46/ea25b98b13dccaebddf1a803f8c748680d972e00507cd9bc6dcdb5aa2ac1/numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d", size = 14404424 }, + { url = "https://files.pythonhosted.org/packages/c8/a6/177dd88d95ecf07e722d21008b1b40e681a929eb9e329684d449c36586b2/numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa", size = 6476809 }, + { url = "https://files.pythonhosted.org/packages/ea/2b/7fc9f4e7ae5b507c1a3a21f0f15ed03e794c1242ea8a242ac158beb56034/numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73", size = 15911314 }, + { url = "https://files.pythonhosted.org/packages/8f/3b/df5a870ac6a3be3a86856ce195ef42eec7ae50d2a202be1f5a4b3b340e14/numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8", size = 21025288 }, + { url = "https://files.pythonhosted.org/packages/2c/97/51af92f18d6f6f2d9ad8b482a99fb74e142d71372da5d834b3a2747a446e/numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4", size = 6762793 }, + { url = "https://files.pythonhosted.org/packages/12/46/de1fbd0c1b5ccaa7f9a005b66761533e2f6a3e560096682683a223631fe9/numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c", size = 19334885 }, + { url = "https://files.pythonhosted.org/packages/cc/dc/d330a6faefd92b446ec0f0dfea4c3207bb1fef3c4771d19cf4543efd2c78/numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385", size = 15828784 }, +] + +[[package]] +name = "numpy" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/fdbf6a7871703df6160b5cf3dd774074b086d278172285c52c2758b76305/numpy-2.2.1.tar.gz", hash = "sha256:45681fd7128c8ad1c379f0ca0776a8b0c6583d2f69889ddac01559dfe4390918", size = 20227662 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/c4/5588367dc9f91e1a813beb77de46ea8cab13f778e1b3a0e661ab031aba44/numpy-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5edb4e4caf751c1518e6a26a83501fda79bff41cc59dac48d70e6d65d4ec4440", size = 21213214 }, + { url = "https://files.pythonhosted.org/packages/d8/8b/32dd9f08419023a4cf856c5ad0b4eba9b830da85eafdef841a104c4fc05a/numpy-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa3017c40d513ccac9621a2364f939d39e550c542eb2a894b4c8da92b38896ab", size = 14352248 }, + { url = "https://files.pythonhosted.org/packages/84/2d/0e895d02940ba6e12389f0ab5cac5afcf8dc2dc0ade4e8cad33288a721bd/numpy-2.2.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:61048b4a49b1c93fe13426e04e04fdf5a03f456616f6e98c7576144677598675", size = 5391007 }, + { url = "https://files.pythonhosted.org/packages/11/b9/7f1e64a0d46d9c2af6d17966f641fb12d5b8ea3003f31b2308f3e3b9a6aa/numpy-2.2.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7671dc19c7019103ca44e8d94917eba8534c76133523ca8406822efdd19c9308", size = 6926174 }, + { url = "https://files.pythonhosted.org/packages/2e/8c/043fa4418bc9364e364ab7aba8ff6ef5f6b9171ade22de8fbcf0e2fa4165/numpy-2.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4250888bcb96617e00bfa28ac24850a83c9f3a16db471eca2ee1f1714df0f957", size = 14330914 }, + { url = "https://files.pythonhosted.org/packages/f7/b6/d8110985501ca8912dfc1c3bbef99d66e62d487f72e46b2337494df77364/numpy-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7746f235c47abc72b102d3bce9977714c2444bdfaea7888d241b4c4bb6a78bf", size = 16379607 }, + { url = "https://files.pythonhosted.org/packages/e2/57/bdca9fb8bdaa810c3a4ff2eb3231379b77f618a7c0d24be9f7070db50775/numpy-2.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:059e6a747ae84fce488c3ee397cee7e5f905fd1bda5fb18c66bc41807ff119b2", size = 15541760 }, + { url = "https://files.pythonhosted.org/packages/97/55/3b9147b3cbc3b6b1abc2a411dec5337a46c873deca0dd0bf5bef9d0579cc/numpy-2.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f62aa6ee4eb43b024b0e5a01cf65a0bb078ef8c395e8713c6e8a12a697144528", size = 18168476 }, + { url = "https://files.pythonhosted.org/packages/00/e7/7c2cde16c9b87a8e14fdd262ca7849c4681cf48c8a774505f7e6f5e3b643/numpy-2.2.1-cp310-cp310-win32.whl", hash = "sha256:48fd472630715e1c1c89bf1feab55c29098cb403cc184b4859f9c86d4fcb6a95", size = 6570985 }, + { url = "https://files.pythonhosted.org/packages/a1/a8/554b0e99fc4ac11ec481254781a10da180d0559c2ebf2c324232317349ee/numpy-2.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:b541032178a718c165a49638d28272b771053f628382d5e9d1c93df23ff58dbf", size = 12913384 }, + { url = "https://files.pythonhosted.org/packages/59/14/645887347124e101d983e1daf95b48dc3e136bf8525cb4257bf9eab1b768/numpy-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40f9e544c1c56ba8f1cf7686a8c9b5bb249e665d40d626a23899ba6d5d9e1484", size = 21217379 }, + { url = "https://files.pythonhosted.org/packages/9f/fd/2279000cf29f58ccfd3778cbf4670dfe3f7ce772df5e198c5abe9e88b7d7/numpy-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9b57eaa3b0cd8db52049ed0330747b0364e899e8a606a624813452b8203d5f7", size = 14388520 }, + { url = "https://files.pythonhosted.org/packages/58/b0/034eb5d5ba12d66ab658ff3455a31f20add0b78df8203c6a7451bd1bee21/numpy-2.2.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bc8a37ad5b22c08e2dbd27df2b3ef7e5c0864235805b1e718a235bcb200cf1cb", size = 5389286 }, + { url = "https://files.pythonhosted.org/packages/5d/69/6f3cccde92e82e7835fdb475c2bf439761cbf8a1daa7c07338e1e132dfec/numpy-2.2.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9036d6365d13b6cbe8f27a0eaf73ddcc070cae584e5ff94bb45e3e9d729feab5", size = 6930345 }, + { url = "https://files.pythonhosted.org/packages/d1/72/1cd38e91ab563e67f584293fcc6aca855c9ae46dba42e6b5ff4600022899/numpy-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51faf345324db860b515d3f364eaa93d0e0551a88d6218a7d61286554d190d73", size = 14335748 }, + { url = "https://files.pythonhosted.org/packages/f2/d4/f999444e86986f3533e7151c272bd8186c55dda554284def18557e013a2a/numpy-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38efc1e56b73cc9b182fe55e56e63b044dd26a72128fd2fbd502f75555d92591", size = 16391057 }, + { url = "https://files.pythonhosted.org/packages/99/7b/85cef6a3ae1b19542b7afd97d0b296526b6ef9e3c43ea0c4d9c4404fb2d0/numpy-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:31b89fa67a8042e96715c68e071a1200c4e172f93b0fbe01a14c0ff3ff820fc8", size = 15556943 }, + { url = "https://files.pythonhosted.org/packages/69/7e/b83cc884c3508e91af78760f6b17ab46ad649831b1fa35acb3eb26d9e6d2/numpy-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c86e2a209199ead7ee0af65e1d9992d1dce7e1f63c4b9a616500f93820658d0", size = 18180785 }, + { url = "https://files.pythonhosted.org/packages/b2/9f/eb4a9a38867de059dcd4b6e18d47c3867fbd3795d4c9557bb49278f94087/numpy-2.2.1-cp311-cp311-win32.whl", hash = "sha256:b34d87e8a3090ea626003f87f9392b3929a7bbf4104a05b6667348b6bd4bf1cd", size = 6568983 }, + { url = "https://files.pythonhosted.org/packages/6d/1e/be3b9f3073da2f8c7fa361fcdc231b548266b0781029fdbaf75eeab997fd/numpy-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:360137f8fb1b753c5cde3ac388597ad680eccbbbb3865ab65efea062c4a1fd16", size = 12917260 }, + { url = "https://files.pythonhosted.org/packages/62/12/b928871c570d4a87ab13d2cc19f8817f17e340d5481621930e76b80ffb7d/numpy-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:694f9e921a0c8f252980e85bce61ebbd07ed2b7d4fa72d0e4246f2f8aa6642ab", size = 20909861 }, + { url = "https://files.pythonhosted.org/packages/3d/c3/59df91ae1d8ad7c5e03efd63fd785dec62d96b0fe56d1f9ab600b55009af/numpy-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3683a8d166f2692664262fd4900f207791d005fb088d7fdb973cc8d663626faa", size = 14095776 }, + { url = "https://files.pythonhosted.org/packages/af/4e/8ed5868efc8e601fb69419644a280e9c482b75691466b73bfaab7d86922c/numpy-2.2.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:780077d95eafc2ccc3ced969db22377b3864e5b9a0ea5eb347cc93b3ea900315", size = 5126239 }, + { url = "https://files.pythonhosted.org/packages/1a/74/dd0bbe650d7bc0014b051f092f2de65e34a8155aabb1287698919d124d7f/numpy-2.2.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:55ba24ebe208344aa7a00e4482f65742969a039c2acfcb910bc6fcd776eb4355", size = 6659296 }, + { url = "https://files.pythonhosted.org/packages/7f/11/4ebd7a3f4a655764dc98481f97bd0a662fb340d1001be6050606be13e162/numpy-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b1d07b53b78bf84a96898c1bc139ad7f10fda7423f5fd158fd0f47ec5e01ac7", size = 14047121 }, + { url = "https://files.pythonhosted.org/packages/7f/a7/c1f1d978166eb6b98ad009503e4d93a8c1962d0eb14a885c352ee0276a54/numpy-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5062dc1a4e32a10dc2b8b13cedd58988261416e811c1dc4dbdea4f57eea61b0d", size = 16096599 }, + { url = "https://files.pythonhosted.org/packages/3d/6d/0e22afd5fcbb4d8d0091f3f46bf4e8906399c458d4293da23292c0ba5022/numpy-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fce4f615f8ca31b2e61aa0eb5865a21e14f5629515c9151850aa936c02a1ee51", size = 15243932 }, + { url = "https://files.pythonhosted.org/packages/03/39/e4e5832820131ba424092b9610d996b37e5557180f8e2d6aebb05c31ae54/numpy-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:67d4cda6fa6ffa073b08c8372aa5fa767ceb10c9a0587c707505a6d426f4e046", size = 17861032 }, + { url = "https://files.pythonhosted.org/packages/5f/8a/3794313acbf5e70df2d5c7d2aba8718676f8d054a05abe59e48417fb2981/numpy-2.2.1-cp312-cp312-win32.whl", hash = "sha256:32cb94448be47c500d2c7a95f93e2f21a01f1fd05dd2beea1ccd049bb6001cd2", size = 6274018 }, + { url = "https://files.pythonhosted.org/packages/17/c1/c31d3637f2641e25c7a19adf2ae822fdaf4ddd198b05d79a92a9ce7cb63e/numpy-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:ba5511d8f31c033a5fcbda22dd5c813630af98c70b2661f2d2c654ae3cdfcfc8", size = 12613843 }, + { url = "https://files.pythonhosted.org/packages/20/d6/91a26e671c396e0c10e327b763485ee295f5a5a7a48c553f18417e5a0ed5/numpy-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1d09e520217618e76396377c81fba6f290d5f926f50c35f3a5f72b01a0da780", size = 20896464 }, + { url = "https://files.pythonhosted.org/packages/8c/40/5792ccccd91d45e87d9e00033abc4f6ca8a828467b193f711139ff1f1cd9/numpy-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3ecc47cd7f6ea0336042be87d9e7da378e5c7e9b3c8ad0f7c966f714fc10d821", size = 14111350 }, + { url = "https://files.pythonhosted.org/packages/c0/2a/fb0a27f846cb857cef0c4c92bef89f133a3a1abb4e16bba1c4dace2e9b49/numpy-2.2.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f419290bc8968a46c4933158c91a0012b7a99bb2e465d5ef5293879742f8797e", size = 5111629 }, + { url = "https://files.pythonhosted.org/packages/eb/e5/8e81bb9d84db88b047baf4e8b681a3e48d6390bc4d4e4453eca428ecbb49/numpy-2.2.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b6c390bfaef8c45a260554888966618328d30e72173697e5cabe6b285fb2348", size = 6645865 }, + { url = "https://files.pythonhosted.org/packages/7a/1a/a90ceb191dd2f9e2897c69dde93ccc2d57dd21ce2acbd7b0333e8eea4e8d/numpy-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:526fc406ab991a340744aad7e25251dd47a6720a685fa3331e5c59fef5282a59", size = 14043508 }, + { url = "https://files.pythonhosted.org/packages/f1/5a/e572284c86a59dec0871a49cd4e5351e20b9c751399d5f1d79628c0542cb/numpy-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74e6fdeb9a265624ec3a3918430205dff1df7e95a230779746a6af78bc615af", size = 16094100 }, + { url = "https://files.pythonhosted.org/packages/0c/2c/a79d24f364788386d85899dd280a94f30b0950be4b4a545f4fa4ed1d4ca7/numpy-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:53c09385ff0b72ba79d8715683c1168c12e0b6e84fb0372e97553d1ea91efe51", size = 15239691 }, + { url = "https://files.pythonhosted.org/packages/cf/79/1e20fd1c9ce5a932111f964b544facc5bb9bde7865f5b42f00b4a6a9192b/numpy-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3eac17d9ec51be534685ba877b6ab5edc3ab7ec95c8f163e5d7b39859524716", size = 17856571 }, + { url = "https://files.pythonhosted.org/packages/be/5b/cc155e107f75d694f562bdc84a26cc930569f3dfdfbccb3420b626065777/numpy-2.2.1-cp313-cp313-win32.whl", hash = "sha256:9ad014faa93dbb52c80d8f4d3dcf855865c876c9660cb9bd7553843dd03a4b1e", size = 6270841 }, + { url = "https://files.pythonhosted.org/packages/44/be/0e5cd009d2162e4138d79a5afb3b5d2341f0fe4777ab6e675aa3d4a42e21/numpy-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:164a829b6aacf79ca47ba4814b130c4020b202522a93d7bff2202bfb33b61c60", size = 12606618 }, + { url = "https://files.pythonhosted.org/packages/a8/87/04ddf02dd86fb17c7485a5f87b605c4437966d53de1e3745d450343a6f56/numpy-2.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4dfda918a13cc4f81e9118dea249e192ab167a0bb1966272d5503e39234d694e", size = 20921004 }, + { url = "https://files.pythonhosted.org/packages/6e/3e/d0e9e32ab14005425d180ef950badf31b862f3839c5b927796648b11f88a/numpy-2.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:733585f9f4b62e9b3528dd1070ec4f52b8acf64215b60a845fa13ebd73cd0712", size = 14119910 }, + { url = "https://files.pythonhosted.org/packages/b5/5b/aa2d1905b04a8fb681e08742bb79a7bddfc160c7ce8e1ff6d5c821be0236/numpy-2.2.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:89b16a18e7bba224ce5114db863e7029803c179979e1af6ad6a6b11f70545008", size = 5153612 }, + { url = "https://files.pythonhosted.org/packages/ce/35/6831808028df0648d9b43c5df7e1051129aa0d562525bacb70019c5f5030/numpy-2.2.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:676f4eebf6b2d430300f1f4f4c2461685f8269f94c89698d832cdf9277f30b84", size = 6668401 }, + { url = "https://files.pythonhosted.org/packages/b1/38/10ef509ad63a5946cc042f98d838daebfe7eaf45b9daaf13df2086b15ff9/numpy-2.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f5cdf9f493b35f7e41e8368e7d7b4bbafaf9660cba53fb21d2cd174ec09631", size = 14014198 }, + { url = "https://files.pythonhosted.org/packages/df/f8/c80968ae01df23e249ee0a4487fae55a4c0fe2f838dfe9cc907aa8aea0fa/numpy-2.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1ad395cf254c4fbb5b2132fee391f361a6e8c1adbd28f2cd8e79308a615fe9d", size = 16076211 }, + { url = "https://files.pythonhosted.org/packages/09/69/05c169376016a0b614b432967ac46ff14269eaffab80040ec03ae1ae8e2c/numpy-2.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:08ef779aed40dbc52729d6ffe7dd51df85796a702afbf68a4f4e41fafdc8bda5", size = 15220266 }, + { url = "https://files.pythonhosted.org/packages/f1/ff/94a4ce67ea909f41cf7ea712aebbe832dc67decad22944a1020bb398a5ee/numpy-2.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:26c9c4382b19fcfbbed3238a14abf7ff223890ea1936b8890f058e7ba35e8d71", size = 17852844 }, + { url = "https://files.pythonhosted.org/packages/46/72/8a5dbce4020dfc595592333ef2fbb0a187d084ca243b67766d29d03e0096/numpy-2.2.1-cp313-cp313t-win32.whl", hash = "sha256:93cf4e045bae74c90ca833cba583c14b62cb4ba2cba0abd2b141ab52548247e2", size = 6326007 }, + { url = "https://files.pythonhosted.org/packages/7b/9c/4fce9cf39dde2562584e4cfd351a0140240f82c0e3569ce25a250f47037d/numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268", size = 12693107 }, + { url = "https://files.pythonhosted.org/packages/f1/65/d36a76b811ffe0a4515e290cb05cb0e22171b1b0f0db6bee9141cf023545/numpy-2.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7ba9cc93a91d86365a5d270dee221fdc04fb68d7478e6bf6af650de78a8339e3", size = 21044672 }, + { url = "https://files.pythonhosted.org/packages/aa/3f/b644199f165063154df486d95198d814578f13dd4d8c1651e075bf1cb8af/numpy-2.2.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3d03883435a19794e41f147612a77a8f56d4e52822337844fff3d4040a142964", size = 6789873 }, + { url = "https://files.pythonhosted.org/packages/d7/df/2adb0bb98a3cbe8a6c3c6d1019aede1f1d8b83927ced228a46cc56c7a206/numpy-2.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4511d9e6071452b944207c8ce46ad2f897307910b402ea5fa975da32e0102800", size = 16194933 }, + { url = "https://files.pythonhosted.org/packages/13/3e/1959d5219a9e6d200638d924cedda6a606392f7186a4ed56478252e70d55/numpy-2.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5c5cc0cbabe9452038ed984d05ac87910f89370b9242371bd9079cb4af61811e", size = 12820057 }, +] + +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + +[[package]] +name = "pandas" +version = "2.0.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "python-dateutil", marker = "python_full_version < '3.9'" }, + { name = "pytz", marker = "python_full_version < '3.9'" }, + { name = "tzdata", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908 }, + { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486 }, + { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897 }, + { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421 }, + { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792 }, + { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333 }, + { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672 }, + { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229 }, + { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591 }, + { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370 }, + { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935 }, + { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692 }, + { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303 }, + { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932 }, + { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018 }, + { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723 }, + { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403 }, + { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638 }, + { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160 }, + { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752 }, + { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852 }, + { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496 }, + { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766 }, + { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902 }, +] + +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "python-dateutil", marker = "python_full_version >= '3.9'" }, + { name = "pytz", marker = "python_full_version >= '3.9'" }, + { name = "tzdata", marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827 }, + { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897 }, + { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908 }, + { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210 }, + { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292 }, + { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379 }, + { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471 }, + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222 }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274 }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836 }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505 }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420 }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457 }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166 }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445 }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235 }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756 }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643 }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573 }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085 }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809 }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316 }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055 }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175 }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650 }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526 }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013 }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620 }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 }, + { url = "https://files.pythonhosted.org/packages/ca/8c/8848a4c9b8fdf5a534fe2077af948bf53cd713d77ffbcd7bd15710348fd7/pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39", size = 12595535 }, + { url = "https://files.pythonhosted.org/packages/9c/b9/5cead4f63b6d31bdefeb21a679bc5a7f4aaf262ca7e07e2bc1c341b68470/pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30", size = 11319822 }, + { url = "https://files.pythonhosted.org/packages/31/af/89e35619fb573366fa68dc26dad6ad2c08c17b8004aad6d98f1a31ce4bb3/pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c", size = 15625439 }, + { url = "https://files.pythonhosted.org/packages/3d/dd/bed19c2974296661493d7acc4407b1d2db4e2a482197df100f8f965b6225/pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c", size = 13068928 }, + { url = "https://files.pythonhosted.org/packages/31/a3/18508e10a31ea108d746c848b5a05c0711e0278fa0d6f1c52a8ec52b80a5/pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea", size = 16783266 }, + { url = "https://files.pythonhosted.org/packages/c4/a5/3429bd13d82bebc78f4d78c3945efedef63a7cd0c15c17b2eeb838d1121f/pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761", size = 14450871 }, + { url = "https://files.pythonhosted.org/packages/2f/49/5c30646e96c684570925b772eac4eb0a8cb0ca590fa978f56c5d3ae73ea1/pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e", size = 11618011 }, +] + +[[package]] +name = "parso" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 }, +] + +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, +] + +[[package]] +name = "pickleshare" +version = "0.7.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/b6/df3c1c9b616e9c0edbc4fbab6ddd09df9535849c64ba51fcb6531c32d4d8/pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", size = 6161 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/41/220f49aaea88bc6fa6cba8d05ecf24676326156c23b991e80b3f2fc24c77/pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56", size = 6877 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.48" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2d/4f/feb5e137aff82f7c7f3248267b97451da3644f6cdc218edfe549fb354127/prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90", size = 424684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/6a/fd08d94654f7e67c52ca30523a178b3f8ccc4237fce4be90d39c938a831a/prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e", size = 386595 }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, +] + +[[package]] +name = "pyarrow" +version = "17.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/27/4e/ea6d43f324169f8aec0e57569443a38bab4b398d09769ca64f7b4d467de3/pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", size = 1112479 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/5d/78d4b040bc5ff2fc6c3d03e80fca396b742f6c125b8af06bcf7427f931bc/pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", size = 28994846 }, + { url = "https://files.pythonhosted.org/packages/3b/73/8ed168db7642e91180330e4ea9f3ff8bab404678f00d32d7df0871a4933b/pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", size = 27165908 }, + { url = "https://files.pythonhosted.org/packages/81/36/e78c24be99242063f6d0590ef68c857ea07bdea470242c361e9a15bd57a4/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", size = 39264209 }, + { url = "https://files.pythonhosted.org/packages/18/4c/3db637d7578f683b0a8fb8999b436bdbedd6e3517bd4f90c70853cf3ad20/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", size = 39862883 }, + { url = "https://files.pythonhosted.org/packages/81/3c/0580626896c842614a523e66b351181ed5bb14e5dfc263cd68cea2c46d90/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8", size = 38723009 }, + { url = "https://files.pythonhosted.org/packages/ee/fb/c1b47f0ada36d856a352da261a44d7344d8f22e2f7db3945f8c3b81be5dd/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", size = 39855626 }, + { url = "https://files.pythonhosted.org/packages/19/09/b0a02908180a25d57312ab5919069c39fddf30602568980419f4b02393f6/pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", size = 25147242 }, + { url = "https://files.pythonhosted.org/packages/f9/46/ce89f87c2936f5bb9d879473b9663ce7a4b1f4359acc2f0eb39865eaa1af/pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", size = 29028748 }, + { url = "https://files.pythonhosted.org/packages/8d/8e/ce2e9b2146de422f6638333c01903140e9ada244a2a477918a368306c64c/pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", size = 27190965 }, + { url = "https://files.pythonhosted.org/packages/3b/c8/5675719570eb1acd809481c6d64e2136ffb340bc387f4ca62dce79516cea/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", size = 39269081 }, + { url = "https://files.pythonhosted.org/packages/5e/78/3931194f16ab681ebb87ad252e7b8d2c8b23dad49706cadc865dff4a1dd3/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", size = 39864921 }, + { url = "https://files.pythonhosted.org/packages/d8/81/69b6606093363f55a2a574c018901c40952d4e902e670656d18213c71ad7/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", size = 38740798 }, + { url = "https://files.pythonhosted.org/packages/4c/21/9ca93b84b92ef927814cb7ba37f0774a484c849d58f0b692b16af8eebcfb/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", size = 39871877 }, + { url = "https://files.pythonhosted.org/packages/30/d1/63a7c248432c71c7d3ee803e706590a0b81ce1a8d2b2ae49677774b813bb/pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", size = 25151089 }, + { url = "https://files.pythonhosted.org/packages/d4/62/ce6ac1275a432b4a27c55fe96c58147f111d8ba1ad800a112d31859fae2f/pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", size = 29019418 }, + { url = "https://files.pythonhosted.org/packages/8e/0a/dbd0c134e7a0c30bea439675cc120012337202e5fac7163ba839aa3691d2/pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", size = 27152197 }, + { url = "https://files.pythonhosted.org/packages/cb/05/3f4a16498349db79090767620d6dc23c1ec0c658a668d61d76b87706c65d/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", size = 39263026 }, + { url = "https://files.pythonhosted.org/packages/c2/0c/ea2107236740be8fa0e0d4a293a095c9f43546a2465bb7df34eee9126b09/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", size = 39880798 }, + { url = "https://files.pythonhosted.org/packages/f6/b0/b9164a8bc495083c10c281cc65064553ec87b7537d6f742a89d5953a2a3e/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", size = 38715172 }, + { url = "https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", size = 39874508 }, + { url = "https://files.pythonhosted.org/packages/ae/49/baafe2a964f663413be3bd1cf5c45ed98c5e42e804e2328e18f4570027c1/pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", size = 25099235 }, + { url = "https://files.pythonhosted.org/packages/8d/bd/8f52c1d7b430260f80a349cffa2df351750a737b5336313d56dcadeb9ae1/pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", size = 28999345 }, + { url = "https://files.pythonhosted.org/packages/64/d9/51e35550f2f18b8815a2ab25948f735434db32000c0e91eba3a32634782a/pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", size = 27168441 }, + { url = "https://files.pythonhosted.org/packages/18/d8/7161d87d07ea51be70c49f615004c1446d5723622a18b2681f7e4b71bf6e/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", size = 39363163 }, + { url = "https://files.pythonhosted.org/packages/3f/08/bc497130789833de09e345e3ce4647e3ce86517c4f70f2144f0367ca378b/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", size = 39965253 }, + { url = "https://files.pythonhosted.org/packages/d3/2e/493dd7db889402b4c7871ca7dfdd20f2c5deedbff802d3eb8576359930f9/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", size = 38805378 }, + { url = "https://files.pythonhosted.org/packages/e6/c1/4c6bcdf7a820034aa91a8b4d25fef38809be79b42ca7aaa16d4680b0bbac/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", size = 39958364 }, + { url = "https://files.pythonhosted.org/packages/d1/db/42ac644453cfdfc60fe002b46d647fe7a6dfad753ef7b28e99b4c936ad5d/pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", size = 25229211 }, + { url = "https://files.pythonhosted.org/packages/43/e0/a898096d35be240aa61fb2d54db58b86d664b10e1e51256f9300f47565e8/pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", size = 29007881 }, + { url = "https://files.pythonhosted.org/packages/59/22/f7d14907ed0697b5dd488d393129f2738629fa5bcba863e00931b7975946/pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", size = 27178117 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/661211feac0ed48467b1d5c57298c91403809ec3ab78b1d175e1d6ad03cf/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", size = 39273896 }, + { url = "https://files.pythonhosted.org/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", size = 39875438 }, + { url = "https://files.pythonhosted.org/packages/75/63/29d1bfcc57af73cde3fc3baccab2f37548de512dbe0ab294b033cd203516/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", size = 38735092 }, + { url = "https://files.pythonhosted.org/packages/39/f4/90258b4de753df7cc61cefb0312f8abcf226672e96cc64996e66afce817a/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", size = 39867610 }, + { url = "https://files.pythonhosted.org/packages/e7/f6/b75d4816c32f1618ed31a005ee635dd1d91d8164495d94f2ea092f594661/pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", size = 25148611 }, +] + +[[package]] +name = "pyarrow" +version = "18.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size = 29531620 }, + { url = "https://files.pythonhosted.org/packages/30/90/893acfad917533b624a97b9e498c0e8393908508a0a72d624fe935e632bf/pyarrow-18.1.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4", size = 30836521 }, + { url = "https://files.pythonhosted.org/packages/a3/2a/526545a7464b5fb2fa6e2c4bad16ca90e59e1843025c534fd907b7f73e5a/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b", size = 39213905 }, + { url = "https://files.pythonhosted.org/packages/8a/77/4b3fab91a30e19e233e738d0c5eca5a8f6dd05758bc349a2ca262c65de79/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71", size = 40128881 }, + { url = "https://files.pythonhosted.org/packages/aa/e2/a88e16c5e45e562449c52305bd3bc2f9d704295322d3434656e7ccac1444/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470", size = 38627517 }, + { url = "https://files.pythonhosted.org/packages/6d/84/8037c20005ccc7b869726465be0957bd9c29cfc88612962030f08292ad06/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56", size = 40060187 }, + { url = "https://files.pythonhosted.org/packages/2a/38/d6435c723ff73df8ae74626ea778262fbcc2b9b0d1a4f3db915b61711b05/pyarrow-18.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812", size = 25118314 }, + { url = "https://files.pythonhosted.org/packages/9e/4d/a4988e7d82f4fbc797715db4185939a658eeffb07a25bab7262bed1ea076/pyarrow-18.1.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854", size = 29554860 }, + { url = "https://files.pythonhosted.org/packages/59/03/3a42c5c1e4bd4c900ab62aa1ff6b472bdb159ba8f1c3e5deadab7222244f/pyarrow-18.1.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c", size = 30867076 }, + { url = "https://files.pythonhosted.org/packages/75/7e/332055ac913373e89256dce9d14b7708f55f7bd5be631456c897f0237738/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21", size = 39212135 }, + { url = "https://files.pythonhosted.org/packages/8c/64/5099cdb325828722ef7ffeba9a4696f238eb0cdeae227f831c2d77fcf1bd/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6", size = 40125195 }, + { url = "https://files.pythonhosted.org/packages/83/88/1938d783727db1b178ff71bc6a6143d7939e406db83a9ec23cad3dad325c/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe", size = 38641884 }, + { url = "https://files.pythonhosted.org/packages/5e/b5/9e14e9f7590e0eaa435ecea84dabb137284a4dbba7b3c337b58b65b76d95/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0", size = 40076877 }, + { url = "https://files.pythonhosted.org/packages/4d/a3/817ac7fe0891a2d66e247e223080f3a6a262d8aefd77e11e8c27e6acf4e1/pyarrow-18.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a", size = 25119811 }, + { url = "https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size = 29514620 }, + { url = "https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size = 30856494 }, + { url = "https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size = 39203624 }, + { url = "https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size = 40139341 }, + { url = "https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size = 38618629 }, + { url = "https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size = 40078661 }, + { url = "https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size = 25092330 }, + { url = "https://files.pythonhosted.org/packages/cb/87/aa4d249732edef6ad88899399047d7e49311a55749d3c373007d034ee471/pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b", size = 29497406 }, + { url = "https://files.pythonhosted.org/packages/3c/c7/ed6adb46d93a3177540e228b5ca30d99fc8ea3b13bdb88b6f8b6467e2cb7/pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2", size = 30835095 }, + { url = "https://files.pythonhosted.org/packages/41/d7/ed85001edfb96200ff606943cff71d64f91926ab42828676c0fc0db98963/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191", size = 39194527 }, + { url = "https://files.pythonhosted.org/packages/59/16/35e28eab126342fa391593415d79477e89582de411bb95232f28b131a769/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa", size = 40131443 }, + { url = "https://files.pythonhosted.org/packages/0c/95/e855880614c8da20f4cd74fa85d7268c725cf0013dc754048593a38896a0/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c", size = 38608750 }, + { url = "https://files.pythonhosted.org/packages/54/9d/f253554b1457d4fdb3831b7bd5f8f00f1795585a606eabf6fec0a58a9c38/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c", size = 40066690 }, + { url = "https://files.pythonhosted.org/packages/2f/58/8912a2563e6b8273e8aa7b605a345bba5a06204549826f6493065575ebc0/pyarrow-18.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181", size = 25081054 }, + { url = "https://files.pythonhosted.org/packages/82/f9/d06ddc06cab1ada0c2f2fd205ac8c25c2701182de1b9c4bf7a0a44844431/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc", size = 29525542 }, + { url = "https://files.pythonhosted.org/packages/ab/94/8917e3b961810587ecbdaa417f8ebac0abb25105ae667b7aa11c05876976/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386", size = 30829412 }, + { url = "https://files.pythonhosted.org/packages/5e/e3/3b16c3190f3d71d3b10f6758d2d5f7779ef008c4fd367cedab3ed178a9f7/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324", size = 39119106 }, + { url = "https://files.pythonhosted.org/packages/1d/d6/5d704b0d25c3c79532f8c0639f253ec2803b897100f64bcb3f53ced236e5/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8", size = 40090940 }, + { url = "https://files.pythonhosted.org/packages/37/29/366bc7e588220d74ec00e497ac6710c2833c9176f0372fe0286929b2d64c/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9", size = 38548177 }, + { url = "https://files.pythonhosted.org/packages/c8/11/fabf6ecabb1fe5b7d96889228ca2a9158c4c3bb732e3b8ee3f7f6d40b703/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba", size = 40043567 }, + { url = "https://files.pythonhosted.org/packages/fd/9b/60516e3876ec6f25b0909afa70f90a15de83b48c7c0d8042fac4e64c4411/pyarrow-18.1.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:0b331e477e40f07238adc7ba7469c36b908f07c89b95dd4bd3a0ec84a3d1e21e", size = 29543752 }, + { url = "https://files.pythonhosted.org/packages/14/a7/bd08b6f1a2bd2e71dc6bb0451fc1872607e44c83daf1ee63c82764a2d233/pyarrow-18.1.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c4dd0c9010a25ba03e198fe743b1cc03cd33c08190afff371749c52ccbbaf76", size = 30850753 }, + { url = "https://files.pythonhosted.org/packages/84/c9/62ef9c6281c0e5b4ee1afa9d7bd556e72e06da6706b7906c32c15e69b3d6/pyarrow-18.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f97b31b4c4e21ff58c6f330235ff893cc81e23da081b1a4b1c982075e0ed4e9", size = 39226870 }, + { url = "https://files.pythonhosted.org/packages/b2/99/a6e89e71655a38475e76b060777c8bf69c078b772bec3b7daf7361440f05/pyarrow-18.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a4813cb8ecf1809871fd2d64a8eff740a1bd3691bbe55f01a3cf6c5ec869754", size = 40139114 }, + { url = "https://files.pythonhosted.org/packages/64/a9/06d79923890682e4fe7a16524abee307407008a413115354aaf3226b8410/pyarrow-18.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:05a5636ec3eb5cc2a36c6edb534a38ef57b2ab127292a716d00eabb887835f1e", size = 38639231 }, + { url = "https://files.pythonhosted.org/packages/3b/8c/4c3ed19026a00740b81fe1c87f3ff235b2763a0a1ddf5711a9d026b775ce/pyarrow-18.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:73eeed32e724ea3568bb06161cad5fa7751e45bc2228e33dcb10c614044165c7", size = 40070949 }, + { url = "https://files.pythonhosted.org/packages/87/d8/94161a7ca5c55199484e926165e9e33f318ea1d1b0d7cdbcbc3652b933ec/pyarrow-18.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052", size = 25301373 }, +] + +[[package]] +name = "pydata-sphinx-theme" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/d6/3921de802cf1ee771f0e76c9068b52498aeb8eeec6b830ff931c81c7ecf3/pydata_sphinx_theme-0.8.0.tar.gz", hash = "sha256:9f72015d9c572ea92e3007ab221a8325767c426783b6b9941813e65fa988dc90", size = 1123746 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/26/0694318d46c7d90ab602ae27b24431e939f1600f9a4c69d1e727ec57289f/pydata_sphinx_theme-0.8.0-py3-none-any.whl", hash = "sha256:fbcbb833a07d3ad8dd997dd40dc94da18d98b41c68123ab0182b58fe92271204", size = 3284997 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "pytest" +version = "8.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pytz" +version = "2024.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/31/3c70bf7603cc2dca0f19bdc53b4537a797747a58875b552c8c413d963a3f/pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a", size = 319692 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199 }, + { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758 }, + { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463 }, + { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280 }, + { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239 }, + { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802 }, + { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527 }, + { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052 }, + { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774 }, + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, + { url = "https://files.pythonhosted.org/packages/74/d9/323a59d506f12f498c2097488d80d16f4cf965cee1791eab58b56b19f47a/PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", size = 183218 }, + { url = "https://files.pythonhosted.org/packages/74/cc/20c34d00f04d785f2028737e2e2a8254e1425102e730fee1d6396f832577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", size = 728067 }, + { url = "https://files.pythonhosted.org/packages/20/52/551c69ca1501d21c0de51ddafa8c23a0191ef296ff098e98358f69080577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", size = 757812 }, + { url = "https://files.pythonhosted.org/packages/fd/7f/2c3697bba5d4aa5cc2afe81826d73dfae5f049458e44732c7a0938baa673/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", size = 746531 }, + { url = "https://files.pythonhosted.org/packages/8c/ab/6226d3df99900e580091bb44258fde77a8433511a86883bd4681ea19a858/PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", size = 800820 }, + { url = "https://files.pythonhosted.org/packages/a0/99/a9eb0f3e710c06c5d922026f6736e920d431812ace24aae38228d0d64b04/PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", size = 145514 }, + { url = "https://files.pythonhosted.org/packages/75/8a/ee831ad5fafa4431099aa4e078d4c8efd43cd5e48fbc774641d233b683a9/PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", size = 162702 }, + { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777 }, + { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318 }, + { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 }, + { url = "https://files.pythonhosted.org/packages/e9/6c/6e1b7f40181bc4805e2e07f4abc10a88ce4648e7e95ff1abe4ae4014a9b2/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", size = 722614 }, + { url = "https://files.pythonhosted.org/packages/3d/32/e7bd8535d22ea2874cef6a81021ba019474ace0d13a4819c2a4bce79bd6a/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", size = 737360 }, + { url = "https://files.pythonhosted.org/packages/d7/12/7322c1e30b9be969670b672573d45479edef72c9a0deac3bb2868f5d7469/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", size = 699006 }, + { url = "https://files.pythonhosted.org/packages/82/72/04fcad41ca56491995076630c3ec1e834be241664c0c09a64c9a2589b507/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", size = 723577 }, + { url = "https://files.pythonhosted.org/packages/ed/5e/46168b1f2757f1fcd442bc3029cd8767d88a98c9c05770d8b420948743bb/PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", size = 144593 }, + { url = "https://files.pythonhosted.org/packages/19/87/5124b1c1f2412bb95c59ec481eaf936cd32f0fe2a7b16b97b81c4c017a6a/PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", size = 162312 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "ruff" +version = "0.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/67/3e/e89f736f01aa9517a97e2e7e0ce8d34a4d8207087b3cfdec95133fee13b5/ruff-0.9.1.tar.gz", hash = "sha256:fd2b25ecaf907d6458fa842675382c8597b3c746a2dde6717fe3415425df0c17", size = 3498844 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/05/c3a2e0feb3d5d394cdfd552de01df9d3ec8a3a3771bbff247fab7e668653/ruff-0.9.1-py3-none-linux_armv6l.whl", hash = "sha256:84330dda7abcc270e6055551aca93fdde1b0685fc4fd358f26410f9349cf1743", size = 10645241 }, + { url = "https://files.pythonhosted.org/packages/dd/da/59f0a40e5f88ee5c054ad175caaa2319fc96571e1d29ab4730728f2aad4f/ruff-0.9.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3cae39ba5d137054b0e5b472aee3b78a7c884e61591b100aeb544bcd1fc38d4f", size = 10391066 }, + { url = "https://files.pythonhosted.org/packages/b7/fe/85e1c1acf0ba04a3f2d54ae61073da030f7a5dc386194f96f3c6ca444a78/ruff-0.9.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:50c647ff96f4ba288db0ad87048257753733763b409b2faf2ea78b45c8bb7fcb", size = 10012308 }, + { url = "https://files.pythonhosted.org/packages/6f/9b/780aa5d4bdca8dcea4309264b8faa304bac30e1ce0bcc910422bfcadd203/ruff-0.9.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0c8b149e9c7353cace7d698e1656ffcf1e36e50f8ea3b5d5f7f87ff9986a7ca", size = 10881960 }, + { url = "https://files.pythonhosted.org/packages/12/f4/dac4361afbfe520afa7186439e8094e4884ae3b15c8fc75fb2e759c1f267/ruff-0.9.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:beb3298604540c884d8b282fe7625651378e1986c25df51dec5b2f60cafc31ce", size = 10414803 }, + { url = "https://files.pythonhosted.org/packages/f0/a2/057a3cb7999513cb78d6cb33a7d1cc6401c82d7332583786e4dad9e38e44/ruff-0.9.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39d0174ccc45c439093971cc06ed3ac4dc545f5e8bdacf9f067adf879544d969", size = 11464929 }, + { url = "https://files.pythonhosted.org/packages/eb/c6/1ccfcc209bee465ced4874dcfeaadc88aafcc1ea9c9f31ef66f063c187f0/ruff-0.9.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:69572926c0f0c9912288915214ca9b2809525ea263603370b9e00bed2ba56dbd", size = 12170717 }, + { url = "https://files.pythonhosted.org/packages/84/97/4a524027518525c7cf6931e9fd3b2382be5e4b75b2b61bec02681a7685a5/ruff-0.9.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:937267afce0c9170d6d29f01fcd1f4378172dec6760a9f4dface48cdabf9610a", size = 11708921 }, + { url = "https://files.pythonhosted.org/packages/a6/a4/4e77cf6065c700d5593b25fca6cf725b1ab6d70674904f876254d0112ed0/ruff-0.9.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:186c2313de946f2c22bdf5954b8dd083e124bcfb685732cfb0beae0c47233d9b", size = 13058074 }, + { url = "https://files.pythonhosted.org/packages/f9/d6/fcb78e0531e863d0a952c4c5600cc5cd317437f0e5f031cd2288b117bb37/ruff-0.9.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f94942a3bb767675d9a051867c036655fe9f6c8a491539156a6f7e6b5f31831", size = 11281093 }, + { url = "https://files.pythonhosted.org/packages/e4/3b/7235bbeff00c95dc2d073cfdbf2b871b5bbf476754c5d277815d286b4328/ruff-0.9.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:728d791b769cc28c05f12c280f99e8896932e9833fef1dd8756a6af2261fd1ab", size = 10882610 }, + { url = "https://files.pythonhosted.org/packages/2a/66/5599d23257c61cf038137f82999ca8f9d0080d9d5134440a461bef85b461/ruff-0.9.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2f312c86fb40c5c02b44a29a750ee3b21002bd813b5233facdaf63a51d9a85e1", size = 10489273 }, + { url = "https://files.pythonhosted.org/packages/78/85/de4aa057e2532db0f9761e2c2c13834991e087787b93e4aeb5f1cb10d2df/ruff-0.9.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ae017c3a29bee341ba584f3823f805abbe5fe9cd97f87ed07ecbf533c4c88366", size = 11003314 }, + { url = "https://files.pythonhosted.org/packages/00/42/afedcaa089116d81447347f76041ff46025849fedb0ed2b187d24cf70fca/ruff-0.9.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5dc40a378a0e21b4cfe2b8a0f1812a6572fc7b230ef12cd9fac9161aa91d807f", size = 11342982 }, + { url = "https://files.pythonhosted.org/packages/39/c6/fe45f3eb27e3948b41a305d8b768e949bf6a39310e9df73f6c576d7f1d9f/ruff-0.9.1-py3-none-win32.whl", hash = "sha256:46ebf5cc106cf7e7378ca3c28ce4293b61b449cd121b98699be727d40b79ba72", size = 8819750 }, + { url = "https://files.pythonhosted.org/packages/38/8d/580db77c3b9d5c3d9479e55b0b832d279c30c8f00ab0190d4cd8fc67831c/ruff-0.9.1-py3-none-win_amd64.whl", hash = "sha256:342a824b46ddbcdddd3abfbb332fa7fcaac5488bf18073e841236aadf4ad5c19", size = 9701331 }, + { url = "https://files.pythonhosted.org/packages/b2/94/0498cdb7316ed67a1928300dd87d659c933479f44dec51b4f62bfd1f8028/ruff-0.9.1-py3-none-win_arm64.whl", hash = "sha256:1cd76c7f9c679e6e8f2af8f778367dca82b95009bc7b1a85a47f1521ae524fa7", size = 9145708 }, +] + +[[package]] +name = "setuptools" +version = "75.3.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 }, +] + +[[package]] +name = "setuptools" +version = "75.8.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/7b/af302bebf22c749c56c9c3e8ae13190b5b5db37a33d9068652e8f73b7089/snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", size = 86699 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a", size = 93002 }, +] + +[[package]] +name = "soupsieve" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, +] + +[[package]] +name = "sphinx" +version = "7.1.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "alabaster", version = "0.7.13", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "babel", marker = "python_full_version < '3.9'" }, + { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, + { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "imagesize", marker = "python_full_version < '3.9'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.9'" }, + { name = "jinja2", marker = "python_full_version < '3.9'" }, + { name = "packaging", marker = "python_full_version < '3.9'" }, + { name = "pygments", marker = "python_full_version < '3.9'" }, + { name = "requests", marker = "python_full_version < '3.9'" }, + { name = "snowballstemmer", marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-applehelp", version = "1.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-devhelp", version = "1.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-htmlhelp", version = "2.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-qthelp", version = "1.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-serializinghtml", version = "1.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/01/688bdf9282241dca09fe6e3a1110eda399fa9b10d0672db609e37c2e7a39/sphinx-7.1.2.tar.gz", hash = "sha256:780f4d32f1d7d1126576e0e5ecc19dc32ab76cd24e950228dcf7b1f6d3d9e22f", size = 6828258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/17/325cf6a257d84751a48ae90752b3d8fe0be8f9535b6253add61c49d0d9bc/sphinx-7.1.2-py3-none-any.whl", hash = "sha256:d170a81825b2fcacb6dfd5a0d7f578a053e45d3f2b153fecc948c37344eb4cbe", size = 3169543 }, +] + +[[package]] +name = "sphinx" +version = "7.4.7" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "babel", marker = "python_full_version == '3.9.*'" }, + { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "imagesize", marker = "python_full_version == '3.9.*'" }, + { name = "importlib-metadata", marker = "python_full_version == '3.9.*'" }, + { name = "jinja2", marker = "python_full_version == '3.9.*'" }, + { name = "packaging", marker = "python_full_version == '3.9.*'" }, + { name = "pygments", marker = "python_full_version == '3.9.*'" }, + { name = "requests", marker = "python_full_version == '3.9.*'" }, + { name = "snowballstemmer", marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "tomli", marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/ef/153f6803c5d5f8917dbb7f7fcf6d34a871ede3296fa89c2c703f5f8a6c8e/sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239", size = 3401624 }, +] + +[[package]] +name = "sphinx" +version = "8.1.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "babel", marker = "python_full_version >= '3.10'" }, + { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "imagesize", marker = "python_full_version >= '3.10'" }, + { name = "jinja2", marker = "python_full_version >= '3.10'" }, + { name = "packaging", marker = "python_full_version >= '3.10'" }, + { name = "pygments", marker = "python_full_version >= '3.10'" }, + { name = "requests", marker = "python_full_version >= '3.10'" }, + { name = "snowballstemmer", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "tomli", marker = "python_full_version == '3.10.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125 }, +] + +[[package]] +name = "sphinx-autoapi" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "astroid", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "astroid", version = "3.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "jinja2" }, + { name = "pyyaml" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "stdlib-list", version = "0.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "stdlib-list", version = "0.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4a/eb/cc243583bb1d518ca3b10998c203d919a8ed90affd4831f2b61ad09043d2/sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c", size = 29292 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/d6/f2acdc2567337fd5f5dc091a4e58d8a0fb14927b9779fc1e5ecee96d9824/sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92", size = 34095 }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/32/df/45e827f4d7e7fcc84e853bcef1d836effd762d63ccb86f43ede4e98b478c/sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e", size = 24766 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228", size = 120601 }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/98/33/dc28393f16385f722c893cb55539c641c9aaec8d1bc1c15b69ce0ac2dbb3/sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4", size = 17398 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", size = 84690 }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/47/64cff68ea3aa450c373301e5bebfbb9fce0a3e70aca245fcadd4af06cd75/sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff", size = 27967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903", size = 99833 }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, +] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/8e/c4846e59f38a5f2b4a0e3b27af38f2fcf904d4bfd82095bf92de0b114ebd/sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", size = 21658 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6", size = 90609 }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/72/835d6fadb9e5d02304cf39b18f93d227cd93abd3c41ebf58e6853eeb1455/sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952", size = 21019 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", size = 94021 }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 }, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, +] + +[[package]] +name = "stdlib-list" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/39/bb/1cdbc326a5ab0026602e0489cbf02357e78140253c4b57cd866d380eb355/stdlib_list-0.10.0.tar.gz", hash = "sha256:6519c50d645513ed287657bfe856d527f277331540691ddeaf77b25459964a14", size = 59447 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/d9/9085375f0d23a4896b307bf14dcc61b49ec8cc67cb33e06cf95bf3af3966/stdlib_list-0.10.0-py3-none-any.whl", hash = "sha256:b3a911bc441d03e0332dd1a9e7d0870ba3bb0a542a74d7524f54fb431256e214", size = 79814 }, +] + +[[package]] +name = "stdlib-list" +version = "0.11.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/04/6b37a71e92ddca16b190b7df62494ac4779d58ced4787f73584eb32c8f03/stdlib_list-0.11.0.tar.gz", hash = "sha256:b74a7b643a77a12637e907f3f62f0ab9f67300bce4014f6b2d3c8b4c8fd63c66", size = 60335 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/fe/e07300c027a868d32d8ed7a425503401e91a03ff90e7ca525c115c634ffb/stdlib_list-0.11.0-py3-none-any.whl", hash = "sha256:8bf8decfffaaf273d4cfeb5bd852b910a00dec1037dcf163576803622bccf597", size = 83617 }, +] + +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 }, +] + +[[package]] +name = "tomli" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, +] + +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + +[[package]] +name = "tzdata" +version = "2024.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, +] + +[[package]] +name = "urllib3" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, +] + +[[package]] +name = "zipp" +version = "3.20.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/54/bf/5c0000c44ebc80123ecbdddba1f5dcd94a5ada602a9c225d84b5aaa55e86/zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29", size = 24199 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 }, +] + +[[package]] +name = "zipp" +version = "3.21.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, +] From dfe4242ce4097a2f923939e443c6686c9d20c0af Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 22 Jan 2025 05:52:32 -0500 Subject: [PATCH 038/248] Update dependencies prior to release (#999) --- Cargo.lock | 247 ++++++++++++++++++++++++++++------------------------- Cargo.toml | 4 +- 2 files changed, 133 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 105cc30c2..5a74a4839 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" +checksum = "eaf3437355979f1e93ba84ba108c38be5767713051f3c8ffbf07c094e2e61f9f" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" +checksum = "31dce77d2985522288edae7206bffd5fc4996491841dda01a13a58415867e681" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" +checksum = "2d45fe6d3faed0435b7313e59a02583b14c6c6339fa7729e94c32a20af319a79" dependencies = [ "ahash", "arrow-buffer", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" +checksum = "2b02656a35cc103f28084bc80a0159668e0a680d919cef127bd7e0aaccb06ec1" dependencies = [ "bytes", "half", @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" +checksum = "c73c6233c5b5d635a56f6010e6eb1ab9e30e94707db21cea03da317f67d84cf3" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" +checksum = "ec222848d70fea5a32af9c3602b08f5d740d5e2d33fbd76bf6fd88759b5b13a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" +checksum = "b7f2861ffa86f107b8ab577d86cff7c7a490243eabe961ba1e1af4f27542bb79" dependencies = [ "arrow-buffer", "arrow-schema", @@ -296,9 +296,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" +checksum = "0270dc511f11bb5fa98a25020ad51a99ca5b08d8a8dfbd17503bb9dba0388f0b" dependencies = [ "arrow-array", "arrow-buffer", @@ -311,9 +311,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" +checksum = "0eff38eeb8a971ad3a4caf62c5d57f0cff8a48b64a55e3207c4fd696a9234aad" dependencies = [ "arrow-array", "arrow-buffer", @@ -331,9 +331,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" +checksum = "c6f202a879d287099139ff0d121e7f55ae5e0efe634b8cf2106ebc27a8715dee" dependencies = [ "arrow-array", "arrow-buffer", @@ -346,9 +346,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" +checksum = "a8f936954991c360ba762dff23f5dda16300774fafd722353d9683abd97630ae" dependencies = [ "ahash", "arrow-array", @@ -360,18 +360,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" +checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" +checksum = "7471ba126d0b0aaa24b50a36bc6c25e4e74869a1fd1a5553357027a0b1c8d1f1" dependencies = [ "ahash", "arrow-array", @@ -383,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" +checksum = "72993b01cb62507b06f1fb49648d7286c8989ecfabdb7b77a750fcb54410731b" dependencies = [ "arrow-array", "arrow-buffer", @@ -444,7 +444,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -455,7 +455,7 @@ checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -528,9 +528,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "blake2" @@ -635,9 +635,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.7" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" +checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" dependencies = [ "jobserver", "libc", @@ -671,9 +671,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" +checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" dependencies = [ "chrono", "chrono-tz-build", @@ -1189,7 +1189,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1406,7 +1406,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1475,6 +1475,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1540,7 +1546,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1662,6 +1668,11 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -1916,7 +1927,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1994,9 +2005,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", @@ -2152,9 +2163,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" [[package]] name = "lz4_flex" @@ -2218,9 +2229,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" dependencies = [ "adler2", ] @@ -2404,9 +2415,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" +checksum = "8957c0c95a6a1804f3e51a18f69df29be53856a8c5768cc9b6d00fcafcd2917c" dependencies = [ "ahash", "arrow-array", @@ -2579,19 +2590,19 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.27" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -2622,7 +2633,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.95", + "syn 2.0.96", "tempfile", ] @@ -2636,7 +2647,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2726,7 +2737,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2739,7 +2750,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2771,7 +2782,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror 2.0.10", + "thiserror 2.0.11", "tokio", "tracing", ] @@ -2790,7 +2801,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.10", + "thiserror 2.0.11", "tinyvec", "tracing", "web-time", @@ -2866,7 +2877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2875,7 +2886,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] @@ -2915,11 +2926,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" +checksum = "4f56e622c2378013c6c61e2bd776604c46dc1087b2dc5293275a0c20a44f0771" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.15.2", "memchr", ] @@ -3026,7 +3037,7 @@ version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", @@ -3035,9 +3046,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.20" +version = "0.23.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" +checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" dependencies = [ "once_cell", "ring", @@ -3139,7 +3150,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3154,7 +3165,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "core-foundation", "core-foundation-sys", "libc", @@ -3212,7 +3223,7 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3223,14 +3234,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "336a0c23cf42a38d9eaa7cd22c7040d04e1228a19a933890805ffd00a16437d2" dependencies = [ "itoa", "memchr", @@ -3247,7 +3258,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3331,7 +3342,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3374,7 +3385,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3418,7 +3429,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3442,7 +3453,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.95", + "syn 2.0.96", "typify", "walkdir", ] @@ -3466,9 +3477,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.95" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -3492,7 +3503,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3526,11 +3537,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3ac7f54ca534db81081ef1c1e7f6ea8a3ef428d2fc069097c079443d24124d3" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" dependencies = [ - "thiserror-impl 2.0.10", + "thiserror-impl 2.0.11", ] [[package]] @@ -3541,18 +3552,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] name = "thiserror-impl" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9465d30713b56a37ede7185763c3492a91be2f5fa68d958c44e41ab9248beb" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3624,7 +3635,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3696,7 +3707,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3762,7 +3773,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3796,7 +3807,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.95", + "syn 2.0.96", "thiserror 1.0.69", "unicode-ident", ] @@ -3814,7 +3825,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.95", + "syn 2.0.96", "typify-impl", ] @@ -3879,9 +3890,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" dependencies = [ "getrandom", "serde", @@ -3920,34 +3931,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.49" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", @@ -3958,9 +3970,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3968,22 +3980,25 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-streams" @@ -4000,9 +4015,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", @@ -4211,7 +4226,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "synstructure", ] @@ -4233,7 +4248,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4253,7 +4268,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "synstructure", ] @@ -4282,7 +4297,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 48219414a..10cffccb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.41", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} arrow = { version = "53", features = ["pyarrow"] } @@ -43,7 +43,7 @@ datafusion-substrait = { version = "44.0.0", optional = true } datafusion-proto = { version = "44.0.0" } datafusion-ffi = { version = "44.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` -uuid = { version = "1.11", features = ["v4"] } +uuid = { version = "1.12", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" From 78e72c9445db4e78dcda2562e251beea4f1ad470 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 22 Jan 2025 05:53:13 -0500 Subject: [PATCH 039/248] Apply import ordering in ruff check (#1001) --- benchmarks/db-benchmark/groupby-datafusion.py | 12 +-- benchmarks/db-benchmark/join-datafusion.py | 6 +- benchmarks/tpch/tpch.py | 3 +- dev/release/generate-changelog.py | 5 +- examples/export.py | 1 - .../python/tests/_test_table_provider.py | 2 +- examples/import.py | 3 +- examples/python-udaf.py | 5 +- examples/python-udf-comparisons.py | 6 +- examples/python-udf.py | 3 +- examples/python-udwf.py | 7 +- examples/query-pyarrow-data.py | 3 +- examples/sql-parquet-s3.py | 1 + examples/sql-to-pandas.py | 1 - examples/sql-using-python-udaf.py | 2 +- examples/sql-using-python-udf.py | 2 +- examples/tpch/_tests.py | 6 +- examples/tpch/convert_data_to_parquet.py | 3 +- examples/tpch/q01_pricing_summary_report.py | 3 +- examples/tpch/q02_minimum_cost_supplier.py | 3 +- examples/tpch/q03_shipping_priority.py | 3 +- examples/tpch/q04_order_priority_checking.py | 4 +- examples/tpch/q05_local_supplier_volume.py | 5 +- .../tpch/q06_forecasting_revenue_change.py | 4 +- examples/tpch/q07_volume_shipping.py | 4 +- examples/tpch/q08_market_share.py | 4 +- .../tpch/q09_product_type_profit_measure.py | 3 +- examples/tpch/q10_returned_item_reporting.py | 4 +- .../q11_important_stock_identification.py | 3 +- examples/tpch/q12_ship_mode_order_priority.py | 4 +- examples/tpch/q13_customer_distribution.py | 3 +- examples/tpch/q14_promotion_effect.py | 4 +- examples/tpch/q15_top_supplier.py | 4 +- .../tpch/q16_part_supplier_relationship.py | 3 +- examples/tpch/q17_small_quantity_order.py | 3 +- examples/tpch/q18_large_volume_customer.py | 3 +- examples/tpch/q19_discounted_revenue.py | 3 +- examples/tpch/q20_potential_part_promotion.py | 4 +- .../tpch/q21_suppliers_kept_orders_waiting.py | 3 +- examples/tpch/q22_global_sales_opportunity.py | 3 +- pyproject.toml | 4 +- python/datafusion/__init__.py | 30 +++---- python/datafusion/catalog.py | 4 +- python/datafusion/common.py | 3 +- python/datafusion/context.py | 23 ++--- python/datafusion/dataframe.py | 21 +++-- python/datafusion/expr.py | 5 +- python/datafusion/functions.py | 16 ++-- python/datafusion/input/location.py | 2 +- python/datafusion/plan.py | 4 +- python/datafusion/record_batch.py | 3 +- python/datafusion/substrait.py | 8 +- python/datafusion/udf.py | 9 +- python/tests/conftest.py | 2 +- python/tests/test_aggregation.py | 1 - python/tests/test_config.py | 2 +- python/tests/test_context.py | 3 +- python/tests/test_dataframe.py | 5 +- python/tests/test_functions.py | 6 +- python/tests/test_imports.py | 89 +++++++++---------- python/tests/test_indexing.py | 1 - python/tests/test_input.py | 1 + python/tests/test_plans.py | 2 +- python/tests/test_sql.py | 5 +- python/tests/test_store.py | 1 - python/tests/test_substrait.py | 3 +- python/tests/test_udaf.py | 1 - python/tests/test_udf.py | 2 +- python/tests/test_udwf.py | 6 +- 69 files changed, 221 insertions(+), 189 deletions(-) diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 960c8ba9a..04bf7a149 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -15,21 +15,23 @@ # specific language governing permissions and limitations # under the License. -import os import gc +import os import timeit + import datafusion as df +import pyarrow from datafusion import ( - col, - functions as f, RuntimeEnvBuilder, SessionConfig, SessionContext, + col, +) +from datafusion import ( + functions as f, ) -import pyarrow from pyarrow import csv as pacsv - print("# groupby-datafusion.py", flush=True) exec(open("./_helpers/helpers.py").read()) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index 811ad8707..b45ebf632 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -15,15 +15,15 @@ # specific language governing permissions and limitations # under the License. -import os import gc +import os import timeit + import datafusion as df -from datafusion import functions as f from datafusion import col +from datafusion import functions as f from pyarrow import csv as pacsv - print("# join-datafusion.py", flush=True) exec(open("./_helpers/helpers.py").read()) diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index daa831b55..fb86b12b6 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -16,9 +16,10 @@ # under the License. import argparse -from datafusion import SessionContext import time +from datafusion import SessionContext + def bench(data_path, query_path): with open("results.csv", "w") as results: diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 0f07457d0..2564eea86 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -16,11 +16,12 @@ # limitations under the License. import argparse -import sys -from github import Github import os import re import subprocess +import sys + +from github import Github def print_pulls(repo_name, title, pulls): diff --git a/examples/export.py b/examples/export.py index cc02de52b..c7a387bcb 100644 --- a/examples/export.py +++ b/examples/export.py @@ -17,7 +17,6 @@ import datafusion - # create a context ctx = datafusion.SessionContext() diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/ffi-table-provider/python/tests/_test_table_provider.py index 56c05e4fa..0db3ec561 100644 --- a/examples/ffi-table-provider/python/tests/_test_table_provider.py +++ b/examples/ffi-table-provider/python/tests/_test_table_provider.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa from datafusion import SessionContext from ffi_table_provider import MyTableProvider -import pyarrow as pa def test_table_loading(): diff --git a/examples/import.py b/examples/import.py index c9d2e8cb6..7b5ab5082 100644 --- a/examples/import.py +++ b/examples/import.py @@ -16,10 +16,9 @@ # under the License. import datafusion -import pyarrow as pa import pandas as pd import polars as pl - +import pyarrow as pa # Create a context ctx = datafusion.SessionContext() diff --git a/examples/python-udaf.py b/examples/python-udaf.py index ed705f5a9..538f69571 100644 --- a/examples/python-udaf.py +++ b/examples/python-udaf.py @@ -15,11 +15,10 @@ # specific language governing permissions and limitations # under the License. +import datafusion import pyarrow import pyarrow.compute -import datafusion -from datafusion import udaf, Accumulator -from datafusion import col +from datafusion import Accumulator, col, udaf class MyAccumulator(Accumulator): diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py index 9a84dd730..c5d5ec8dd 100644 --- a/examples/python-udf-comparisons.py +++ b/examples/python-udf-comparisons.py @@ -15,11 +15,13 @@ # specific language governing permissions and limitations # under the License. -from datafusion import SessionContext, col, lit, udf, functions as F import os +import time + import pyarrow as pa import pyarrow.compute as pc -import time +from datafusion import SessionContext, col, lit, udf +from datafusion import functions as F path = os.path.dirname(os.path.abspath(__file__)) filepath = os.path.join(path, "./tpch/data/lineitem.parquet") diff --git a/examples/python-udf.py b/examples/python-udf.py index 30edd4198..fb2bc253e 100644 --- a/examples/python-udf.py +++ b/examples/python-udf.py @@ -16,7 +16,8 @@ # under the License. import pyarrow -from datafusion import udf, SessionContext, functions as f +from datafusion import SessionContext, udf +from datafusion import functions as f def is_null(array: pyarrow.Array) -> pyarrow.Array: diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 55de2bdc7..32f8fadaa 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pyarrow as pa import datafusion -from datafusion import udwf, functions as f, col, lit -from datafusion.udf import WindowEvaluator +import pyarrow as pa +from datafusion import col, lit, udwf +from datafusion import functions as f from datafusion.expr import WindowFrame +from datafusion.udf import WindowEvaluator # This example creates five different examples of user defined window functions in order # to demonstrate the variety of ways a user may need to implement. diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py index 83e6884a7..e3456fb5b 100644 --- a/examples/query-pyarrow-data.py +++ b/examples/query-pyarrow-data.py @@ -16,9 +16,8 @@ # under the License. import datafusion -from datafusion import col import pyarrow - +from datafusion import col # create a context ctx = datafusion.SessionContext() diff --git a/examples/sql-parquet-s3.py b/examples/sql-parquet-s3.py index 61f1e0c50..866e2ac68 100644 --- a/examples/sql-parquet-s3.py +++ b/examples/sql-parquet-s3.py @@ -16,6 +16,7 @@ # under the License. import os + import datafusion from datafusion.object_store import AmazonS3 diff --git a/examples/sql-to-pandas.py b/examples/sql-to-pandas.py index 3e99b22de..34f7bde1b 100644 --- a/examples/sql-to-pandas.py +++ b/examples/sql-to-pandas.py @@ -17,7 +17,6 @@ from datafusion import SessionContext - # Create a DataFusion context ctx = SessionContext() diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py index 7ccf5d3cb..60ab8d134 100644 --- a/examples/sql-using-python-udaf.py +++ b/examples/sql-using-python-udaf.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udaf, SessionContext, Accumulator import pyarrow as pa +from datafusion import Accumulator, SessionContext, udaf # Define a user-defined aggregation function (UDAF) diff --git a/examples/sql-using-python-udf.py b/examples/sql-using-python-udf.py index d6bbe3ab0..2f0a0b67d 100644 --- a/examples/sql-using-python-udf.py +++ b/examples/sql-using-python-udf.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udf, SessionContext import pyarrow as pa +from datafusion import SessionContext, udf # Define a user-defined function (UDF) diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 3ce9cdfe5..c4d872085 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -15,10 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pytest from importlib import import_module + import pyarrow as pa -from datafusion import DataFrame, col, lit, functions as F +import pytest +from datafusion import DataFrame, col, lit +from datafusion import functions as F from util import get_answer_file diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py index cb0b2f0bd..73097fac5 100644 --- a/examples/tpch/convert_data_to_parquet.py +++ b/examples/tpch/convert_data_to_parquet.py @@ -23,8 +23,9 @@ """ import os -import pyarrow + import datafusion +import pyarrow ctx = datafusion.SessionContext() diff --git a/examples/tpch/q01_pricing_summary_report.py b/examples/tpch/q01_pricing_summary_report.py index cb9485a7a..3f97f00dc 100644 --- a/examples/tpch/q01_pricing_summary_report.py +++ b/examples/tpch/q01_pricing_summary_report.py @@ -30,7 +30,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path ctx = SessionContext() diff --git a/examples/tpch/q02_minimum_cost_supplier.py b/examples/tpch/q02_minimum_cost_supplier.py index c4ccf8ad3..7390d0892 100644 --- a/examples/tpch/q02_minimum_cost_supplier.py +++ b/examples/tpch/q02_minimum_cost_supplier.py @@ -30,7 +30,8 @@ """ import datafusion -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # This is the part we're looking for. Values selected here differ from the spec in order to run diff --git a/examples/tpch/q03_shipping_priority.py b/examples/tpch/q03_shipping_priority.py index 5ebab13c0..fc1231e0a 100644 --- a/examples/tpch/q03_shipping_priority.py +++ b/examples/tpch/q03_shipping_priority.py @@ -27,7 +27,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path SEGMENT_OF_INTEREST = "BUILDING" diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index 8bf02cb83..426338aea 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Ideally we could put 3 months into the interval. See note below. diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index 413a4acb9..fa2b01dea 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -30,11 +30,12 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path - DATE_OF_INTEREST = "1994-01-01" INTERVAL_DAYS = 365 REGION_OF_INTEREST = "ASIA" diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py index eaf9b0c29..1de5848b1 100644 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ b/examples/tpch/q06_forecasting_revenue_change.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Variables from the example query diff --git a/examples/tpch/q07_volume_shipping.py b/examples/tpch/q07_volume_shipping.py index 18c290d9c..a84cf728a 100644 --- a/examples/tpch/q07_volume_shipping.py +++ b/examples/tpch/q07_volume_shipping.py @@ -29,8 +29,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Variables of interest to query over diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index 7138ab65a..d46df30f2 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -28,8 +28,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path supplier_nation = lit("BRAZIL") diff --git a/examples/tpch/q09_product_type_profit_measure.py b/examples/tpch/q09_product_type_profit_measure.py index aa47d76c0..e2abbd095 100644 --- a/examples/tpch/q09_product_type_profit_measure.py +++ b/examples/tpch/q09_product_type_profit_measure.py @@ -30,7 +30,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path part_color = lit("green") diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index 94b398c1d..ed822e264 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path DATE_START_OF_QUARTER = "1993-10-01" diff --git a/examples/tpch/q11_important_stock_identification.py b/examples/tpch/q11_important_stock_identification.py index 707265e16..22829ab7c 100644 --- a/examples/tpch/q11_important_stock_identification.py +++ b/examples/tpch/q11_important_stock_identification.py @@ -27,7 +27,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path NATION = "GERMANY" diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index def2a6c30..f1d894940 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path SHIP_MODE_1 = "MAIL" diff --git a/examples/tpch/q13_customer_distribution.py b/examples/tpch/q13_customer_distribution.py index 67365a96a..93f082ea3 100644 --- a/examples/tpch/q13_customer_distribution.py +++ b/examples/tpch/q13_customer_distribution.py @@ -28,7 +28,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path WORD_1 = "special" diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index cd26ee2bd..d62f76e3c 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path DATE = "1995-09-01" diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index 0bc316f7a..c321048f2 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path DATE = "1996-01-01" diff --git a/examples/tpch/q16_part_supplier_relationship.py b/examples/tpch/q16_part_supplier_relationship.py index a6a0c43eb..65043ffda 100644 --- a/examples/tpch/q16_part_supplier_relationship.py +++ b/examples/tpch/q16_part_supplier_relationship.py @@ -29,7 +29,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path BRAND = "Brand#45" diff --git a/examples/tpch/q17_small_quantity_order.py b/examples/tpch/q17_small_quantity_order.py index d7b43d498..6d76fe506 100644 --- a/examples/tpch/q17_small_quantity_order.py +++ b/examples/tpch/q17_small_quantity_order.py @@ -28,7 +28,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path BRAND = "Brand#23" diff --git a/examples/tpch/q18_large_volume_customer.py b/examples/tpch/q18_large_volume_customer.py index 165fce033..834d181c9 100644 --- a/examples/tpch/q18_large_volume_customer.py +++ b/examples/tpch/q18_large_volume_customer.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path QUANTITY = 300 diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index 4aed0cbae..2b87e1120 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -27,7 +27,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, udf, functions as F +from datafusion import SessionContext, col, lit, udf +from datafusion import functions as F from util import get_data_path items_of_interest = { diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index d720cdce6..a25188d31 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -28,8 +28,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path COLOR_OF_INTEREST = "forest" diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index 27cf816fa..9bbaad779 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path NATION_OF_INTEREST = "SAUDI ARABIA" diff --git a/examples/tpch/q22_global_sales_opportunity.py b/examples/tpch/q22_global_sales_opportunity.py index 72dce5289..c4d115b74 100644 --- a/examples/tpch/q22_global_sales_opportunity.py +++ b/examples/tpch/q22_global_sales_opportunity.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path NATION_CODES = [13, 31, 23, 29, 30, 18, 17] diff --git a/pyproject.toml b/pyproject.toml index 6e8acfe71..32bb28d21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "D", "W"] +select = ["E4", "E7", "E9", "F", "D", "W", "I"] [tool.ruff.lint.pydocstyle] convention = "google" @@ -100,4 +100,4 @@ docs = [ "pickleshare>=0.7.5", "sphinx-autoapi>=3.4.0", "setuptools>=75.3.0", -] \ No newline at end of file +] diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 2d8db42c8..85aefcce7 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,36 +26,28 @@ except ImportError: import importlib_metadata -from .context import ( - SessionContext, - SessionConfig, - RuntimeEnvBuilder, - SQLOptions, -) - -from .catalog import Catalog, Database, Table +from . import functions, object_store, substrait # The following imports are okay to remain as opaque to the user. from ._internal import Config - -from .record_batch import RecordBatchStream, RecordBatch - -from .udf import ScalarUDF, AggregateUDF, Accumulator, WindowUDF - +from .catalog import Catalog, Database, Table from .common import ( DFSchema, ) - +from .context import ( + RuntimeEnvBuilder, + SessionConfig, + SessionContext, + SQLOptions, +) from .dataframe import DataFrame - from .expr import ( Expr, WindowFrame, ) - -from .plan import LogicalPlan, ExecutionPlan - -from . import functions, object_store, substrait +from .plan import ExecutionPlan, LogicalPlan +from .record_batch import RecordBatch, RecordBatchStream +from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF __version__ = importlib_metadata.version(__name__) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index acd28f33d..703037665 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -19,10 +19,10 @@ from __future__ import annotations -import datafusion._internal as df_internal - from typing import TYPE_CHECKING +import datafusion._internal as df_internal + if TYPE_CHECKING: import pyarrow diff --git a/python/datafusion/common.py b/python/datafusion/common.py index 7db8333f2..a2298c634 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -16,9 +16,10 @@ # under the License. """Common data types used throughout the DataFusion project.""" -from ._internal import common as common_internal from enum import Enum +from ._internal import common as common_internal + # TODO these should all have proper wrapper classes DFSchema = common_internal.DFSchema diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 3c284c9f9..864ef1c8b 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -19,26 +19,29 @@ from __future__ import annotations -from ._internal import SessionConfig as SessionConfigInternal -from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal -from ._internal import SQLOptions as SQLOptionsInternal -from ._internal import SessionContext as SessionContextInternal +from typing import TYPE_CHECKING, Any, Protocol + +from typing_extensions import deprecated from datafusion.catalog import Catalog, Table from datafusion.dataframe import DataFrame from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream -from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF +from datafusion.udf import AggregateUDF, ScalarUDF, WindowUDF -from typing import Any, TYPE_CHECKING, Protocol -from typing_extensions import deprecated +from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal +from ._internal import SessionConfig as SessionConfigInternal +from ._internal import SessionContext as SessionContextInternal +from ._internal import SQLOptions as SQLOptionsInternal if TYPE_CHECKING: - import pyarrow + import pathlib + import pandas import polars - import pathlib - from datafusion.plan import LogicalPlan, ExecutionPlan + import pyarrow + + from datafusion.plan import ExecutionPlan, LogicalPlan class ArrowStreamExportable(Protocol): diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index b0c1abdad..7413a5fa3 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -20,31 +20,36 @@ """ from __future__ import annotations + import warnings from typing import ( + TYPE_CHECKING, Any, Iterable, List, - TYPE_CHECKING, Literal, - overload, Optional, Union, + overload, ) -from datafusion.record_batch import RecordBatchStream + from typing_extensions import deprecated -from datafusion.plan import LogicalPlan, ExecutionPlan + +from datafusion.plan import ExecutionPlan, LogicalPlan +from datafusion.record_batch import RecordBatchStream if TYPE_CHECKING: - import pyarrow as pa - import pandas as pd - import polars as pl import pathlib from typing import Callable, Sequence + import pandas as pd + import polars as pl + import pyarrow as pa + +from enum import Enum + from datafusion._internal import DataFrame as DataFrameInternal from datafusion.expr import Expr, SortExpr, sort_or_default -from enum import Enum # excerpt from deltalake diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 16add16f4..68ddd7c9a 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -22,12 +22,13 @@ from __future__ import annotations -from typing import Any, Optional, Type, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional, Type import pyarrow as pa -from datafusion.common import DataTypeMap, NullTreatment, RexType from typing_extensions import deprecated +from datafusion.common import DataTypeMap, NullTreatment, RexType + from ._internal import expr as expr_internal from ._internal import functions as functions_internal diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index c0097c6ab..7c2fa9a8f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -18,21 +18,21 @@ from __future__ import annotations +from typing import Any, Optional + +import pyarrow as pa + from datafusion._internal import functions as f +from datafusion.common import NullTreatment +from datafusion.context import SessionContext from datafusion.expr import ( CaseBuilder, Expr, - WindowFrame, SortExpr, - sort_list_to_raw_sort_list, + WindowFrame, expr_list_to_raw_expr_list, + sort_list_to_raw_sort_list, ) -from datafusion.context import SessionContext -from datafusion.common import NullTreatment - -from typing import Any, Optional - -import pyarrow as pa __all__ = [ "abs", diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index b274539fc..a8252b53c 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -17,8 +17,8 @@ """The default input source for DataFusion.""" -import os import glob +import os from typing import Any from datafusion.common import DataTypeMap, SqlTable diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index a71965f41..133fc446d 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -19,9 +19,9 @@ from __future__ import annotations -import datafusion._internal as df_internal +from typing import TYPE_CHECKING, Any, List -from typing import List, Any, TYPE_CHECKING +import datafusion._internal as df_internal if TYPE_CHECKING: from datafusion.context import SessionContext diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py index 75e58998f..772cd9089 100644 --- a/python/datafusion/record_batch.py +++ b/python/datafusion/record_batch.py @@ -27,9 +27,10 @@ if TYPE_CHECKING: import pyarrow - import datafusion._internal as df_internal import typing_extensions + import datafusion._internal as df_internal + class RecordBatch: """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`.""" diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index dea47acca..402184d3f 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -23,13 +23,15 @@ from __future__ import annotations -from ._internal import substrait as substrait_internal - +import pathlib from typing import TYPE_CHECKING + from typing_extensions import deprecated -import pathlib + from datafusion.plan import LogicalPlan +from ._internal import substrait as substrait_internal + if TYPE_CHECKING: from datafusion.context import SessionContext diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index d9d994b22..c97f453d0 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -19,14 +19,15 @@ from __future__ import annotations -import datafusion._internal as df_internal -from datafusion.expr import Expr -from typing import Callable, TYPE_CHECKING, TypeVar from abc import ABCMeta, abstractmethod -from typing import List, Optional from enum import Enum +from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar + import pyarrow +import datafusion._internal as df_internal +from datafusion.expr import Expr + if TYPE_CHECKING: _R = TypeVar("_R", bound=pyarrow.DataType) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 1cc07e500..9548fbfe4 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa import pytest from datafusion import SessionContext -import pyarrow as pa from pyarrow.csv import write_csv diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 243a8c3c9..5ef46131b 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -18,7 +18,6 @@ import numpy as np import pyarrow as pa import pytest - from datafusion import SessionContext, column, lit from datafusion import functions as f from datafusion.common import NullTreatment diff --git a/python/tests/test_config.py b/python/tests/test_config.py index 12d9fc3ff..c1d7f97e1 100644 --- a/python/tests/test_config.py +++ b/python/tests/test_config.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import Config import pytest +from datafusion import Config @pytest.fixture diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 10e8ad0e9..91046e6b8 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -14,15 +14,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import datetime as dt import gzip import os -import datetime as dt import pathlib import pyarrow as pa import pyarrow.dataset as ds import pytest - from datafusion import ( DataFrame, RuntimeEnvBuilder, diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index a1a871e9a..5bc3fb094 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -18,11 +18,8 @@ from typing import Any import pyarrow as pa -from pyarrow.csv import write_csv import pyarrow.parquet as pq import pytest - -from datafusion import functions as f from datafusion import ( DataFrame, SessionContext, @@ -30,7 +27,9 @@ column, literal, ) +from datafusion import functions as f from datafusion.expr import Window +from pyarrow.csv import write_csv @pytest.fixture diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index add170c17..ad6aa7c0a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,15 +15,13 @@ # specific language governing permissions and limitations # under the License. import math +from datetime import datetime import numpy as np import pyarrow as pa import pytest -from datetime import datetime - -from datafusion import SessionContext, column +from datafusion import SessionContext, column, literal, string_literal from datafusion import functions as f -from datafusion import literal, string_literal np.seterr(invalid="ignore") diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 6ea77b15f..0c155cbde 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -15,72 +15,69 @@ # specific language governing permissions and limitations # under the License. -import pytest - import datafusion +import pytest from datafusion import ( AggregateUDF, DataFrame, - SessionContext, ScalarUDF, + SessionContext, functions, ) - from datafusion.common import ( DFSchema, ) - from datafusion.expr import ( - Expr, - Column, - Literal, - BinaryExpr, - AggregateFunction, - Projection, - TableScan, - Filter, - Limit, Aggregate, - Sort, - Analyze, - Join, - JoinType, - JoinConstraint, - Union, - Like, - ILike, - SimilarTo, - ScalarVariable, + AggregateFunction, Alias, - Not, - IsNotNull, - IsTrue, - IsFalse, - IsUnknown, - IsNotTrue, - IsNotFalse, - IsNotUnknown, - Negative, - InList, - Exists, - Subquery, - InSubquery, - ScalarSubquery, - GroupingSet, - Placeholder, + Analyze, + Between, + BinaryExpr, Case, Cast, - TryCast, - SubqueryAlias, - Between, - Explain, - Extension, + Column, CreateMemoryTable, CreateView, Distinct, DropTable, - Repartition, + Exists, + Explain, + Expr, + Extension, + Filter, + GroupingSet, + ILike, + InList, + InSubquery, + IsFalse, + IsNotFalse, + IsNotNull, + IsNotTrue, + IsNotUnknown, + IsTrue, + IsUnknown, + Join, + JoinConstraint, + JoinType, + Like, + Limit, + Literal, + Negative, + Not, Partitioning, + Placeholder, + Projection, + Repartition, + ScalarSubquery, + ScalarVariable, + SimilarTo, + Sort, + Subquery, + SubqueryAlias, + TableScan, + TryCast, + Union, ) diff --git a/python/tests/test_indexing.py b/python/tests/test_indexing.py index 8ca3eab19..5b0d08610 100644 --- a/python/tests/test_indexing.py +++ b/python/tests/test_indexing.py @@ -17,7 +17,6 @@ import pyarrow as pa import pytest - from datafusion import SessionContext diff --git a/python/tests/test_input.py b/python/tests/test_input.py index fb53d86e5..806471357 100644 --- a/python/tests/test_input.py +++ b/python/tests/test_input.py @@ -16,6 +16,7 @@ # under the License. import os + from datafusion.input.location import LocationInputPlugin diff --git a/python/tests/test_plans.py b/python/tests/test_plans.py index 0283a4e6a..396acbe97 100644 --- a/python/tests/test_plans.py +++ b/python/tests/test_plans.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import SessionContext, LogicalPlan, ExecutionPlan import pytest +from datafusion import ExecutionPlan, LogicalPlan, SessionContext # Note: We must use CSV because memory tables are currently not supported for diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index a2521dd09..862f745bf 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -19,12 +19,11 @@ import numpy as np import pyarrow as pa -from pyarrow.csv import write_csv import pyarrow.dataset as ds import pytest +from datafusion import col, udf from datafusion.object_store import Http - -from datafusion import udf, col +from pyarrow.csv import write_csv from . import generic as helpers diff --git a/python/tests/test_store.py b/python/tests/test_store.py index f85b28311..53ffc3acf 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -18,7 +18,6 @@ import os import pytest - from datafusion import SessionContext diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index 2071c8f3b..feada7cde 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -16,10 +16,9 @@ # under the License. import pyarrow as pa - +import pytest from datafusion import SessionContext from datafusion import substrait as ss -import pytest @pytest.fixture diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 8f31748e0..0005a3da8 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -20,7 +20,6 @@ import pyarrow as pa import pyarrow.compute as pc import pytest - from datafusion import Accumulator, column, udaf diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index 568a66dbb..3a5dce6d6 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udf, column import pyarrow as pa import pytest +from datafusion import column, udf @pytest.fixture diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 2099ac9bc..0ffa04179 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -17,10 +17,10 @@ import pyarrow as pa import pytest - -from datafusion import SessionContext, column, udwf, lit, functions as f -from datafusion.udf import WindowEvaluator +from datafusion import SessionContext, column, lit, udwf +from datafusion import functions as f from datafusion.expr import WindowFrame +from datafusion.udf import WindowEvaluator class ExponentialSmoothDefault(WindowEvaluator): From 8b513906315a0749b9f5cd6f34bf259ab4dd1add Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 1 Feb 2025 08:29:48 -0500 Subject: [PATCH 040/248] feat: remove DataFusion pyarrow feat (#1000) * Add developer instructions to speed up build processes * Remove pyarrow dep from datafusion. Add in PyScalarValue wrapper and rename DataFusionError to PyDataFusionError to be less confusing * Removed unnecessary cloning of scalar value when going from rust to python. Also removed the rust unit tests copied over from upstream repo that were failing due to #941 in pyo3 * Change return types to PyDataFusionError to simplify code * Update exception handling to fix build errors in recent rust toolchains --- Cargo.lock | 145 +++++++++++------- Cargo.toml | 2 +- .../source/contributor-guide/introduction.rst | 53 +++++++ python/tests/test_indexing.py | 3 +- src/catalog.rs | 8 +- src/common/data_type.rs | 14 ++ src/config.rs | 11 +- src/context.rs | 136 ++++++++-------- src/dataframe.rs | 119 +++++++------- src/dataset_exec.rs | 6 +- src/errors.rs | 42 ++--- src/expr.rs | 38 ++--- src/expr/conditional_expr.rs | 6 +- src/expr/literal.rs | 4 +- src/expr/window.rs | 13 +- src/functions.rs | 55 ++++--- src/lib.rs | 1 + src/physical_plan.rs | 13 +- src/pyarrow_filter_expression.rs | 24 +-- src/pyarrow_util.rs | 61 ++++++++ src/record_batch.rs | 3 +- src/sql/exceptions.rs | 16 +- src/sql/logical.rs | 14 +- src/substrait.rs | 54 ++++--- src/udaf.rs | 21 ++- src/udwf.rs | 4 +- src/utils.rs | 6 +- 27 files changed, 524 insertions(+), 348 deletions(-) create mode 100644 src/pyarrow_util.rs diff --git a/Cargo.lock b/Cargo.lock index 5a74a4839..c6590fd21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,7 +79,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", - "getrandom", + "getrandom 0.2.15", "once_cell", "version_check", "zerocopy", @@ -449,9 +449,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.85" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", @@ -576,9 +576,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.1" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -586,9 +586,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "byteorder" @@ -635,9 +635,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.10" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" +checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" dependencies = [ "jobserver", "libc", @@ -692,9 +692,9 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.52" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" +checksum = "e24a03c8b52922d68a1589ad61032f2c1aa5a8158d2aa0d93c6e9534944bbad6" dependencies = [ "cc", ] @@ -725,7 +725,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom", + "getrandom 0.2.15", "once_cell", "tiny-keccak", ] @@ -784,9 +784,9 @@ checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" [[package]] name = "cpufeatures" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -817,9 +817,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "crypto-common" @@ -961,7 +961,6 @@ dependencies = [ "object_store", "parquet", "paste", - "pyo3", "recursive", "sqlparser", "tokio", @@ -1411,9 +1410,9 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35" [[package]] name = "either" @@ -1607,10 +1606,22 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets", +] + [[package]] name = "gimli" version = "0.31.1" @@ -1722,9 +1733,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" [[package]] name = "humantime" @@ -1734,9 +1745,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.5.2" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ "bytes", "futures-channel", @@ -1953,9 +1964,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1975,9 +1986,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ipnet" -version = "2.10.1" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "itertools" @@ -2243,7 +2254,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -2377,9 +2388,9 @@ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "ordered-float" @@ -2661,9 +2672,9 @@ dependencies = [ [[package]] name = "protobuf-src" -version = "2.1.0+27.1" +version = "2.1.1+27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7edafa3bcc668fa93efafcbdf58d7821bbda0f4b458ac7fae3d57ec0fec8167" +checksum = "6217c3504da19b85a3a4b2e9a5183d635822d83507ba0986624b5c05b83bfc40" dependencies = [ "cmake", ] @@ -2794,7 +2805,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", - "getrandom", + "getrandom 0.2.15", "rand", "ring", "rustc-hash", @@ -2857,7 +2868,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -2926,9 +2937,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f56e622c2378013c6c61e2bd776604c46dc1087b2dc5293275a0c20a44f0771" +checksum = "78ef7fa9ed0256d64a688a3747d0fef7a88851c18a5e1d57f115f38ec2e09366" dependencies = [ "hashbrown 0.15.2", "memchr", @@ -2997,7 +3008,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", "cfg-if", - "getrandom", + "getrandom 0.2.15", "libc", "spin", "untrusted", @@ -3033,9 +3044,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags 2.8.0", "errno", @@ -3046,9 +3057,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.21" +version = "0.23.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" +checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" dependencies = [ "once_cell", "ring", @@ -3081,9 +3092,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" dependencies = [ "web-time", ] @@ -3107,9 +3118,9 @@ checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "same-file" @@ -3184,9 +3195,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" dependencies = [ "serde", ] @@ -3239,9 +3250,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.136" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "336a0c23cf42a38d9eaa7cd22c7040d04e1228a19a933890805ffd00a16437d2" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ "itoa", "memchr", @@ -3514,13 +3525,13 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.15.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" +checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" dependencies = [ "cfg-if", "fastrand", - "getrandom", + "getrandom 0.3.1", "once_cell", "rustix", "windows-sys 0.59.0", @@ -3831,9 +3842,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-segmentation" @@ -3890,11 +3901,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ - "getrandom", + "getrandom 0.2.15", "serde", ] @@ -3929,6 +3940,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -4185,6 +4205,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/Cargo.toml b/Cargo.toml index 10cffccb1..003ba36e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "44.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion = { version = "44.0.0", features = ["avro", "unicode_expressions"] } datafusion-substrait = { version = "44.0.0", optional = true } datafusion-proto = { version = "44.0.0" } datafusion-ffi = { version = "44.0.0" } diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index fb98cfd1d..25f2c21a4 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -95,3 +95,56 @@ To update dependencies, run .. code-block:: shell uv sync --dev --no-install-package datafusion + +Improving Build Speed +--------------------- + +The `pyo3 `_ dependency of this project contains a ``build.rs`` file which +can cause it to rebuild frequently. You can prevent this from happening by defining a ``PYO3_CONFIG_FILE`` +environment variable that points to a file with your build configuration. Whenever your build configuration +changes, such as during some major version updates, you will need to regenerate this file. This variable +should point to a fully resolved path on your build machine. + +To generate this file, use the following command: + +.. code-block:: shell + + PYO3_PRINT_CONFIG=1 cargo build + +This will generate some output that looks like the following. You will want to copy these contents intro +a file. If you place this file in your project directory with filename ``.pyo3_build_config`` it will +be ignored by ``git``. + +.. code-block:: + + implementation=CPython + version=3.8 + shared=true + abi3=true + lib_name=python3.12 + lib_dir=/opt/homebrew/opt/python@3.12/Frameworks/Python.framework/Versions/3.12/lib + executable=/Users/myusername/src/datafusion-python/.venv/bin/python + pointer_width=64 + build_flags= + suppress_build_script_link_lines=false + +Add the environment variable to your system. + +.. code-block:: shell + + export PYO3_CONFIG_FILE="/Users//myusername/src/datafusion-python/.pyo3_build_config" + +If you are on a Mac and you use VS Code for your IDE, you will want to add these variables +to your settings. You can find the appropriate rust flags by looking in the +``.cargo/config.toml`` file. + +.. code-block:: + + "rust-analyzer.cargo.extraEnv": { + "RUSTFLAGS": "-C link-arg=-undefined -C link-arg=dynamic_lookup", + "PYO3_CONFIG_FILE": "/Users/myusername/src/datafusion-python/.pyo3_build_config" + }, + "rust-analyzer.runnables.extraEnv": { + "RUSTFLAGS": "-C link-arg=-undefined -C link-arg=dynamic_lookup", + "PYO3_CONFIG_FILE": "/Users/myusername/src/personal/datafusion-python/.pyo3_build_config" + } diff --git a/python/tests/test_indexing.py b/python/tests/test_indexing.py index 5b0d08610..327decd2f 100644 --- a/python/tests/test_indexing.py +++ b/python/tests/test_indexing.py @@ -43,7 +43,8 @@ def test_err(df): with pytest.raises(Exception) as e_info: df["c"] - assert "Schema error: No field named c." in e_info.value.args[0] + for e in ["SchemaError", "FieldNotFound", 'name: "c"']: + assert e in e_info.value.args[0] with pytest.raises(Exception) as e_info: df[1] diff --git a/src/catalog.rs b/src/catalog.rs index 1ce66a4dc..1e189a5aa 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use pyo3::exceptions::PyKeyError; use pyo3::prelude::*; -use crate::errors::DataFusionError; +use crate::errors::{PyDataFusionError, PyDataFusionResult}; use crate::utils::wait_for_future; use datafusion::{ arrow::pyarrow::ToPyArrow, @@ -96,11 +96,13 @@ impl PyDatabase { self.database.table_names().into_iter().collect() } - fn table(&self, name: &str, py: Python) -> PyResult { + fn table(&self, name: &str, py: Python) -> PyDataFusionResult { if let Some(table) = wait_for_future(py, self.database.table(name))? { Ok(PyTable::new(table)) } else { - Err(DataFusionError::Common(format!("Table not found: {name}")).into()) + Err(PyDataFusionError::Common(format!( + "Table not found: {name}" + ))) } } diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 7f9c75bfd..f5f8a6b06 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -23,6 +23,20 @@ use pyo3::{exceptions::PyValueError, prelude::*}; use crate::errors::py_datafusion_err; +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub struct PyScalarValue(pub ScalarValue); + +impl From for PyScalarValue { + fn from(value: ScalarValue) -> Self { + Self(value) + } +} +impl From for ScalarValue { + fn from(value: PyScalarValue) -> Self { + value.0 + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[pyclass(eq, eq_int, name = "RexType", module = "datafusion.common")] pub enum RexType { diff --git a/src/config.rs b/src/config.rs index 3f2a05580..cc725b9a3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -21,6 +21,8 @@ use pyo3::types::*; use datafusion::common::ScalarValue; use datafusion::config::ConfigOptions; +use crate::errors::PyDataFusionResult; + #[pyclass(name = "Config", module = "datafusion", subclass)] #[derive(Clone)] pub(crate) struct PyConfig { @@ -38,7 +40,7 @@ impl PyConfig { /// Get configurations from environment variables #[staticmethod] - pub fn from_env() -> PyResult { + pub fn from_env() -> PyDataFusionResult { Ok(Self { config: ConfigOptions::from_env()?, }) @@ -56,11 +58,10 @@ impl PyConfig { } /// Set a configuration option - pub fn set(&mut self, key: &str, value: PyObject, py: Python) -> PyResult<()> { + pub fn set(&mut self, key: &str, value: PyObject, py: Python) -> PyDataFusionResult<()> { let scalar_value = py_obj_to_scalar_value(py, value); - self.config - .set(key, scalar_value.to_string().as_str()) - .map_err(|e| e.into()) + self.config.set(key, scalar_value.to_string().as_str())?; + Ok(()) } /// Get all configuration options diff --git a/src/context.rs b/src/context.rs index bab7fd42a..f53b15576 100644 --- a/src/context.rs +++ b/src/context.rs @@ -28,16 +28,17 @@ use object_store::ObjectStore; use url::Url; use uuid::Uuid; -use pyo3::exceptions::{PyKeyError, PyNotImplementedError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; use crate::catalog::{PyCatalog, PyTable}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; -use crate::errors::{py_datafusion_err, DataFusionError}; +use crate::errors::{py_datafusion_err, PyDataFusionResult}; use crate::expr::sort_expr::PySortExpr; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; +use crate::sql::exceptions::py_value_err; use crate::sql::logical::PyLogicalPlan; use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; @@ -277,7 +278,7 @@ impl PySessionContext { pub fn new( config: Option, runtime: Option, - ) -> PyResult { + ) -> PyDataFusionResult { let config = if let Some(c) = config { c.config } else { @@ -348,7 +349,7 @@ impl PySessionContext { schema: Option>, file_sort_order: Option>>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let options = ListingOptions::new(Arc::new(ParquetFormat::new())) .with_file_extension(file_extension) .with_table_partition_cols(convert_table_partition_cols(table_partition_cols)?) @@ -365,7 +366,7 @@ impl PySessionContext { None => { let state = self.ctx.state(); let schema = options.infer_schema(&state, &table_path); - wait_for_future(py, schema).map_err(DataFusionError::from)? + wait_for_future(py, schema)? } }; let config = ListingTableConfig::new(table_path) @@ -382,9 +383,9 @@ impl PySessionContext { } /// Returns a PyDataFrame whose plan corresponds to the SQL statement. - pub fn sql(&mut self, query: &str, py: Python) -> PyResult { + pub fn sql(&mut self, query: &str, py: Python) -> PyDataFusionResult { let result = self.ctx.sql(query); - let df = wait_for_future(py, result).map_err(DataFusionError::from)?; + let df = wait_for_future(py, result)?; Ok(PyDataFrame::new(df)) } @@ -394,14 +395,14 @@ impl PySessionContext { query: &str, options: Option, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let options = if let Some(options) = options { options.options } else { SQLOptions::new() }; let result = self.ctx.sql_with_options(query, options); - let df = wait_for_future(py, result).map_err(DataFusionError::from)?; + let df = wait_for_future(py, result)?; Ok(PyDataFrame::new(df)) } @@ -412,14 +413,14 @@ impl PySessionContext { name: Option<&str>, schema: Option>, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let schema = if let Some(schema) = schema { SchemaRef::from(schema.0) } else { partitions.0[0][0].schema() }; - let table = MemTable::try_new(schema, partitions.0).map_err(DataFusionError::from)?; + let table = MemTable::try_new(schema, partitions.0)?; // generate a random (unique) name for this table if none is provided // table name cannot start with numeric digit @@ -433,11 +434,9 @@ impl PySessionContext { } }; - self.ctx - .register_table(&*table_name, Arc::new(table)) - .map_err(DataFusionError::from)?; + self.ctx.register_table(&*table_name, Arc::new(table))?; - let table = wait_for_future(py, self._table(&table_name)).map_err(DataFusionError::from)?; + let table = wait_for_future(py, self._table(&table_name))?; let df = PyDataFrame::new(table); Ok(df) @@ -495,15 +494,14 @@ impl PySessionContext { data: Bound<'_, PyAny>, name: Option<&str>, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let (schema, batches) = if let Ok(stream_reader) = ArrowArrayStreamReader::from_pyarrow_bound(&data) { // Works for any object that implements __arrow_c_stream__ in pycapsule. let schema = stream_reader.schema().as_ref().to_owned(); let batches = stream_reader - .collect::, arrow::error::ArrowError>>() - .map_err(DataFusionError::from)?; + .collect::, arrow::error::ArrowError>>()?; (schema, batches) } else if let Ok(array) = RecordBatch::from_pyarrow_bound(&data) { @@ -512,8 +510,8 @@ impl PySessionContext { (array.schema().as_ref().to_owned(), vec![array]) } else { - return Err(PyTypeError::new_err( - "Expected either a Arrow Array or Arrow Stream in from_arrow().", + return Err(crate::errors::PyDataFusionError::Common( + "Expected either a Arrow Array or Arrow Stream in from_arrow().".to_string(), )); }; @@ -559,17 +557,13 @@ impl PySessionContext { Ok(df) } - pub fn register_table(&mut self, name: &str, table: &PyTable) -> PyResult<()> { - self.ctx - .register_table(name, table.table()) - .map_err(DataFusionError::from)?; + pub fn register_table(&mut self, name: &str, table: &PyTable) -> PyDataFusionResult<()> { + self.ctx.register_table(name, table.table())?; Ok(()) } - pub fn deregister_table(&mut self, name: &str) -> PyResult<()> { - self.ctx - .deregister_table(name) - .map_err(DataFusionError::from)?; + pub fn deregister_table(&mut self, name: &str) -> PyDataFusionResult<()> { + self.ctx.deregister_table(name)?; Ok(()) } @@ -578,10 +572,10 @@ impl PySessionContext { &mut self, name: &str, provider: Bound<'_, PyAny>, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { if provider.hasattr("__datafusion_table_provider__")? { let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; - let capsule = capsule.downcast::()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; validate_pycapsule(capsule, "datafusion_table_provider")?; let provider = unsafe { capsule.reference::() }; @@ -591,8 +585,9 @@ impl PySessionContext { Ok(()) } else { - Err(PyNotImplementedError::new_err( - "__datafusion_table_provider__ does not exist on Table Provider object.", + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_table_provider__ does not exist on Table Provider object." + .to_string(), )) } } @@ -601,12 +596,10 @@ impl PySessionContext { &mut self, name: &str, partitions: PyArrowType>>, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let schema = partitions.0[0][0].schema(); let table = MemTable::try_new(schema, partitions.0)?; - self.ctx - .register_table(name, Arc::new(table)) - .map_err(DataFusionError::from)?; + self.ctx.register_table(name, Arc::new(table))?; Ok(()) } @@ -628,7 +621,7 @@ impl PySessionContext { schema: Option>, file_sort_order: Option>>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let mut options = ParquetReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) .parquet_pruning(parquet_pruning) @@ -642,7 +635,7 @@ impl PySessionContext { .collect(); let result = self.ctx.register_parquet(name, path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; Ok(()) } @@ -666,12 +659,12 @@ impl PySessionContext { file_extension: &str, file_compression_type: Option, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let delimiter = delimiter.as_bytes(); if delimiter.len() != 1 { - return Err(PyValueError::new_err( + return Err(crate::errors::PyDataFusionError::PythonError(py_value_err( "Delimiter must be a single character", - )); + ))); } let mut options = CsvReadOptions::new() @@ -685,11 +678,11 @@ impl PySessionContext { if path.is_instance_of::() { let paths = path.extract::>()?; let result = self.register_csv_from_multiple_paths(name, paths, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; } else { let path = path.extract::()?; let result = self.ctx.register_csv(name, &path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; } Ok(()) @@ -713,7 +706,7 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_compression_type: Option, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; @@ -726,7 +719,7 @@ impl PySessionContext { options.schema = schema.as_ref().map(|x| &x.0); let result = self.ctx.register_json(name, path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; Ok(()) } @@ -745,7 +738,7 @@ impl PySessionContext { file_extension: &str, table_partition_cols: Vec<(String, String)>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; @@ -756,7 +749,7 @@ impl PySessionContext { options.schema = schema.as_ref().map(|x| &x.0); let result = self.ctx.register_avro(name, path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; Ok(()) } @@ -767,12 +760,10 @@ impl PySessionContext { name: &str, dataset: &Bound<'_, PyAny>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let table: Arc = Arc::new(Dataset::new(dataset, py)?); - self.ctx - .register_table(name, table) - .map_err(DataFusionError::from)?; + self.ctx.register_table(name, table)?; Ok(()) } @@ -824,11 +815,11 @@ impl PySessionContext { Ok(PyDataFrame::new(x)) } - pub fn table_exist(&self, name: &str) -> PyResult { + pub fn table_exist(&self, name: &str) -> PyDataFusionResult { Ok(self.ctx.table_exist(name)?) } - pub fn empty_table(&self) -> PyResult { + pub fn empty_table(&self) -> PyDataFusionResult { Ok(PyDataFrame::new(self.ctx.read_empty()?)) } @@ -847,7 +838,7 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_compression_type: Option, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; @@ -859,10 +850,10 @@ impl PySessionContext { let df = if let Some(schema) = schema { options.schema = Some(&schema.0); let result = self.ctx.read_json(path, options); - wait_for_future(py, result).map_err(DataFusionError::from)? + wait_for_future(py, result)? } else { let result = self.ctx.read_json(path, options); - wait_for_future(py, result).map_err(DataFusionError::from)? + wait_for_future(py, result)? }; Ok(PyDataFrame::new(df)) } @@ -888,12 +879,12 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_compression_type: Option, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let delimiter = delimiter.as_bytes(); if delimiter.len() != 1 { - return Err(PyValueError::new_err( + return Err(crate::errors::PyDataFusionError::PythonError(py_value_err( "Delimiter must be a single character", - )); + ))); }; let mut options = CsvReadOptions::new() @@ -909,12 +900,12 @@ impl PySessionContext { let paths = path.extract::>()?; let paths = paths.iter().map(|p| p as &str).collect::>(); let result = self.ctx.read_csv(paths, options); - let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); + let df = PyDataFrame::new(wait_for_future(py, result)?); Ok(df) } else { let path = path.extract::()?; let result = self.ctx.read_csv(path, options); - let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); + let df = PyDataFrame::new(wait_for_future(py, result)?); Ok(df) } } @@ -938,7 +929,7 @@ impl PySessionContext { schema: Option>, file_sort_order: Option>>, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let mut options = ParquetReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) .parquet_pruning(parquet_pruning) @@ -952,7 +943,7 @@ impl PySessionContext { .collect(); let result = self.ctx.read_parquet(path, options); - let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); + let df = PyDataFrame::new(wait_for_future(py, result)?); Ok(df) } @@ -965,26 +956,23 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_extension: &str, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let mut options = AvroReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?); options.file_extension = file_extension; let df = if let Some(schema) = schema { options.schema = Some(&schema.0); let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future).map_err(DataFusionError::from)? + wait_for_future(py, read_future)? } else { let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future).map_err(DataFusionError::from)? + wait_for_future(py, read_future)? }; Ok(PyDataFrame::new(df)) } - pub fn read_table(&self, table: &PyTable) -> PyResult { - let df = self - .ctx - .read_table(table.table()) - .map_err(DataFusionError::from)?; + pub fn read_table(&self, table: &PyTable) -> PyDataFusionResult { + let df = self.ctx.read_table(table.table())?; Ok(PyDataFrame::new(df)) } @@ -1011,7 +999,7 @@ impl PySessionContext { plan: PyExecutionPlan, part: usize, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let ctx: TaskContext = TaskContext::from(&self.ctx.state()); // create a Tokio runtime to run the async code let rt = &get_tokio_runtime().0; @@ -1071,13 +1059,13 @@ impl PySessionContext { pub fn convert_table_partition_cols( table_partition_cols: Vec<(String, String)>, -) -> Result, DataFusionError> { +) -> PyDataFusionResult> { table_partition_cols .into_iter() .map(|(name, ty)| match ty.as_str() { "string" => Ok((name, DataType::Utf8)), "int" => Ok((name, DataType::Int32)), - _ => Err(DataFusionError::Common(format!( + _ => Err(crate::errors::PyDataFusionError::Common(format!( "Unsupported data type '{ty}' for partition column. Supported types are 'string' and 'int'" ))), }) diff --git a/src/dataframe.rs b/src/dataframe.rs index b875480a7..6fb08ba25 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -33,20 +33,20 @@ use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; -use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyCapsule, PyTuple, PyTupleMethods}; use tokio::task::JoinHandle; -use crate::errors::py_datafusion_err; +use crate::errors::{py_datafusion_err, PyDataFusionError}; use crate::expr::sort_expr::to_sort_expressions; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; use crate::{ - errors::DataFusionError, + errors::PyDataFusionResult, expr::{sort_expr::PySortExpr, PyExpr}, }; @@ -69,7 +69,7 @@ impl PyDataFrame { #[pymethods] impl PyDataFrame { /// Enable selection for `df[col]`, `df[col1, col2, col3]`, and `df[[col1, col2, col3]]` - fn __getitem__(&self, key: Bound<'_, PyAny>) -> PyResult { + fn __getitem__(&self, key: Bound<'_, PyAny>) -> PyDataFusionResult { if let Ok(key) = key.extract::() { // df[col] self.select_columns(vec![key]) @@ -84,12 +84,12 @@ impl PyDataFrame { // df[[col1, col2, col3]] self.select_columns(keys) } else { - let message = "DataFrame can only be indexed by string index or indices"; - Err(PyTypeError::new_err(message)) + let message = "DataFrame can only be indexed by string index or indices".to_string(); + Err(PyDataFusionError::Common(message)) } } - fn __repr__(&self, py: Python) -> PyResult { + fn __repr__(&self, py: Python) -> PyDataFusionResult { let df = self.df.as_ref().clone().limit(0, Some(10))?; let batches = wait_for_future(py, df.collect())?; let batches_as_string = pretty::pretty_format_batches(&batches); @@ -99,7 +99,7 @@ impl PyDataFrame { } } - fn _repr_html_(&self, py: Python) -> PyResult { + fn _repr_html_(&self, py: Python) -> PyDataFusionResult { let mut html_str = "\n".to_string(); let df = self.df.as_ref().clone().limit(0, Some(10))?; @@ -145,7 +145,7 @@ impl PyDataFrame { } /// Calculate summary statistics for a DataFrame - fn describe(&self, py: Python) -> PyResult { + fn describe(&self, py: Python) -> PyDataFusionResult { let df = self.df.as_ref().clone(); let stat_df = wait_for_future(py, df.describe())?; Ok(Self::new(stat_df)) @@ -157,37 +157,37 @@ impl PyDataFrame { } #[pyo3(signature = (*args))] - fn select_columns(&self, args: Vec) -> PyResult { + fn select_columns(&self, args: Vec) -> PyDataFusionResult { let args = args.iter().map(|s| s.as_ref()).collect::>(); let df = self.df.as_ref().clone().select_columns(&args)?; Ok(Self::new(df)) } #[pyo3(signature = (*args))] - fn select(&self, args: Vec) -> PyResult { + fn select(&self, args: Vec) -> PyDataFusionResult { let expr = args.into_iter().map(|e| e.into()).collect(); let df = self.df.as_ref().clone().select(expr)?; Ok(Self::new(df)) } #[pyo3(signature = (*args))] - fn drop(&self, args: Vec) -> PyResult { + fn drop(&self, args: Vec) -> PyDataFusionResult { let cols = args.iter().map(|s| s.as_ref()).collect::>(); let df = self.df.as_ref().clone().drop_columns(&cols)?; Ok(Self::new(df)) } - fn filter(&self, predicate: PyExpr) -> PyResult { + fn filter(&self, predicate: PyExpr) -> PyDataFusionResult { let df = self.df.as_ref().clone().filter(predicate.into())?; Ok(Self::new(df)) } - fn with_column(&self, name: &str, expr: PyExpr) -> PyResult { + fn with_column(&self, name: &str, expr: PyExpr) -> PyDataFusionResult { let df = self.df.as_ref().clone().with_column(name, expr.into())?; Ok(Self::new(df)) } - fn with_columns(&self, exprs: Vec) -> PyResult { + fn with_columns(&self, exprs: Vec) -> PyDataFusionResult { let mut df = self.df.as_ref().clone(); for expr in exprs { let expr: Expr = expr.into(); @@ -199,7 +199,7 @@ impl PyDataFrame { /// Rename one column by applying a new projection. This is a no-op if the column to be /// renamed does not exist. - fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyResult { + fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyDataFusionResult { let df = self .df .as_ref() @@ -208,7 +208,7 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyResult { + fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyDataFusionResult { let group_by = group_by.into_iter().map(|e| e.into()).collect(); let aggs = aggs.into_iter().map(|e| e.into()).collect(); let df = self.df.as_ref().clone().aggregate(group_by, aggs)?; @@ -216,14 +216,14 @@ impl PyDataFrame { } #[pyo3(signature = (*exprs))] - fn sort(&self, exprs: Vec) -> PyResult { + fn sort(&self, exprs: Vec) -> PyDataFusionResult { let exprs = to_sort_expressions(exprs); let df = self.df.as_ref().clone().sort(exprs)?; Ok(Self::new(df)) } #[pyo3(signature = (count, offset=0))] - fn limit(&self, count: usize, offset: usize) -> PyResult { + fn limit(&self, count: usize, offset: usize) -> PyDataFusionResult { let df = self.df.as_ref().clone().limit(offset, Some(count))?; Ok(Self::new(df)) } @@ -232,14 +232,15 @@ impl PyDataFrame { /// Unless some order is specified in the plan, there is no /// guarantee of the order of the result. fn collect(&self, py: Python) -> PyResult> { - let batches = wait_for_future(py, self.df.as_ref().clone().collect())?; + let batches = wait_for_future(py, self.df.as_ref().clone().collect()) + .map_err(PyDataFusionError::from)?; // cannot use PyResult> return type due to // https://github.com/PyO3/pyo3/issues/1813 batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() } /// Cache DataFrame. - fn cache(&self, py: Python) -> PyResult { + fn cache(&self, py: Python) -> PyDataFusionResult { let df = wait_for_future(py, self.df.as_ref().clone().cache())?; Ok(Self::new(df)) } @@ -247,7 +248,8 @@ impl PyDataFrame { /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch /// maintaining the input partitioning. fn collect_partitioned(&self, py: Python) -> PyResult>> { - let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())?; + let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned()) + .map_err(PyDataFusionError::from)?; batches .into_iter() @@ -257,13 +259,13 @@ impl PyDataFrame { /// Print the result, 20 lines by default #[pyo3(signature = (num=20))] - fn show(&self, py: Python, num: usize) -> PyResult<()> { + fn show(&self, py: Python, num: usize) -> PyDataFusionResult<()> { let df = self.df.as_ref().clone().limit(0, Some(num))?; print_dataframe(py, df) } /// Filter out duplicate rows - fn distinct(&self) -> PyResult { + fn distinct(&self) -> PyDataFusionResult { let df = self.df.as_ref().clone().distinct()?; Ok(Self::new(df)) } @@ -274,7 +276,7 @@ impl PyDataFrame { how: &str, left_on: Vec, right_on: Vec, - ) -> PyResult { + ) -> PyDataFusionResult { let join_type = match how { "inner" => JoinType::Inner, "left" => JoinType::Left, @@ -283,10 +285,9 @@ impl PyDataFrame { "semi" => JoinType::LeftSemi, "anti" => JoinType::LeftAnti, how => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "The join type {how} does not exist or is not implemented" - )) - .into()); + ))); } }; @@ -303,7 +304,12 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn join_on(&self, right: PyDataFrame, on_exprs: Vec, how: &str) -> PyResult { + fn join_on( + &self, + right: PyDataFrame, + on_exprs: Vec, + how: &str, + ) -> PyDataFusionResult { let join_type = match how { "inner" => JoinType::Inner, "left" => JoinType::Left, @@ -312,10 +318,9 @@ impl PyDataFrame { "semi" => JoinType::LeftSemi, "anti" => JoinType::LeftAnti, how => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "The join type {how} does not exist or is not implemented" - )) - .into()); + ))); } }; let exprs: Vec = on_exprs.into_iter().map(|e| e.into()).collect(); @@ -330,7 +335,7 @@ impl PyDataFrame { /// Print the query plan #[pyo3(signature = (verbose=false, analyze=false))] - fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { + fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyDataFusionResult<()> { let df = self.df.as_ref().clone().explain(verbose, analyze)?; print_dataframe(py, df) } @@ -341,18 +346,18 @@ impl PyDataFrame { } /// Get the optimized logical plan for this `DataFrame` - fn optimized_logical_plan(&self) -> PyResult { + fn optimized_logical_plan(&self) -> PyDataFusionResult { Ok(self.df.as_ref().clone().into_optimized_plan()?.into()) } /// Get the execution plan for this `DataFrame` - fn execution_plan(&self, py: Python) -> PyResult { + fn execution_plan(&self, py: Python) -> PyDataFusionResult { let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())?; Ok(plan.into()) } /// Repartition a `DataFrame` based on a logical partitioning scheme. - fn repartition(&self, num: usize) -> PyResult { + fn repartition(&self, num: usize) -> PyDataFusionResult { let new_df = self .df .as_ref() @@ -363,7 +368,7 @@ impl PyDataFrame { /// Repartition a `DataFrame` based on a logical partitioning scheme. #[pyo3(signature = (*args, num))] - fn repartition_by_hash(&self, args: Vec, num: usize) -> PyResult { + fn repartition_by_hash(&self, args: Vec, num: usize) -> PyDataFusionResult { let expr = args.into_iter().map(|py_expr| py_expr.into()).collect(); let new_df = self .df @@ -376,7 +381,7 @@ impl PyDataFrame { /// Calculate the union of two `DataFrame`s, preserving duplicate rows.The /// two `DataFrame`s must have exactly the same schema #[pyo3(signature = (py_df, distinct=false))] - fn union(&self, py_df: PyDataFrame, distinct: bool) -> PyResult { + fn union(&self, py_df: PyDataFrame, distinct: bool) -> PyDataFusionResult { let new_df = if distinct { self.df .as_ref() @@ -391,7 +396,7 @@ impl PyDataFrame { /// Calculate the distinct union of two `DataFrame`s. The /// two `DataFrame`s must have exactly the same schema - fn union_distinct(&self, py_df: PyDataFrame) -> PyResult { + fn union_distinct(&self, py_df: PyDataFrame) -> PyDataFusionResult { let new_df = self .df .as_ref() @@ -401,7 +406,7 @@ impl PyDataFrame { } #[pyo3(signature = (column, preserve_nulls=true))] - fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyResult { + fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyDataFusionResult { // TODO: expose RecursionUnnestOptions // REF: https://github.com/apache/datafusion/pull/11577 let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); @@ -414,7 +419,11 @@ impl PyDataFrame { } #[pyo3(signature = (columns, preserve_nulls=true))] - fn unnest_columns(&self, columns: Vec, preserve_nulls: bool) -> PyResult { + fn unnest_columns( + &self, + columns: Vec, + preserve_nulls: bool, + ) -> PyDataFusionResult { // TODO: expose RecursionUnnestOptions // REF: https://github.com/apache/datafusion/pull/11577 let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); @@ -428,7 +437,7 @@ impl PyDataFrame { } /// Calculate the intersection of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema - fn intersect(&self, py_df: PyDataFrame) -> PyResult { + fn intersect(&self, py_df: PyDataFrame) -> PyDataFusionResult { let new_df = self .df .as_ref() @@ -438,13 +447,13 @@ impl PyDataFrame { } /// Calculate the exception of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema - fn except_all(&self, py_df: PyDataFrame) -> PyResult { + fn except_all(&self, py_df: PyDataFrame) -> PyDataFusionResult { let new_df = self.df.as_ref().clone().except(py_df.df.as_ref().clone())?; Ok(Self::new(new_df)) } /// Write a `DataFrame` to a CSV file. - fn write_csv(&self, path: &str, with_header: bool, py: Python) -> PyResult<()> { + fn write_csv(&self, path: &str, with_header: bool, py: Python) -> PyDataFusionResult<()> { let csv_options = CsvOptions { has_header: Some(with_header), ..Default::default() @@ -472,7 +481,7 @@ impl PyDataFrame { compression: &str, compression_level: Option, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { fn verify_compression_level(cl: Option) -> Result { cl.ok_or(PyValueError::new_err("compression_level is not defined")) } @@ -496,7 +505,7 @@ impl PyDataFrame { "lz4_raw" => Compression::LZ4_RAW, "uncompressed" => Compression::UNCOMPRESSED, _ => { - return Err(PyValueError::new_err(format!( + return Err(PyDataFusionError::Common(format!( "Unrecognized compression type {compression}" ))); } @@ -522,7 +531,7 @@ impl PyDataFrame { } /// Executes a query and writes the results to a partitioned JSON file. - fn write_json(&self, path: &str, py: Python) -> PyResult<()> { + fn write_json(&self, path: &str, py: Python) -> PyDataFusionResult<()> { wait_for_future( py, self.df @@ -551,7 +560,7 @@ impl PyDataFrame { &'py mut self, py: Python<'py>, requested_schema: Option>, - ) -> PyResult> { + ) -> PyDataFusionResult> { let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())?; let mut schema: Schema = self.df.schema().to_owned().into(); @@ -559,15 +568,14 @@ impl PyDataFrame { validate_pycapsule(&schema_capsule, "arrow_schema")?; let schema_ptr = unsafe { schema_capsule.reference::() }; - let desired_schema = Schema::try_from(schema_ptr).map_err(DataFusionError::from)?; + let desired_schema = Schema::try_from(schema_ptr)?; - schema = project_schema(schema, desired_schema).map_err(DataFusionError::ArrowError)?; + schema = project_schema(schema, desired_schema)?; batches = batches .into_iter() .map(|record_batch| record_batch_into_schema(record_batch, &schema)) - .collect::, ArrowError>>() - .map_err(DataFusionError::ArrowError)?; + .collect::, ArrowError>>()?; } let batches_wrapped = batches.into_iter().map(Ok); @@ -578,9 +586,10 @@ impl PyDataFrame { let ffi_stream = FFI_ArrowArrayStream::new(reader); let stream_capsule_name = CString::new("arrow_array_stream").unwrap(); PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name)) + .map_err(PyDataFusionError::from) } - fn execute_stream(&self, py: Python) -> PyResult { + fn execute_stream(&self, py: Python) -> PyDataFusionResult { // create a Tokio runtime to run the async code let rt = &get_tokio_runtime().0; let df = self.df.as_ref().clone(); @@ -647,13 +656,13 @@ impl PyDataFrame { } // Executes this DataFrame to get the total number of rows. - fn count(&self, py: Python) -> PyResult { + fn count(&self, py: Python) -> PyDataFusionResult { Ok(wait_for_future(py, self.df.as_ref().clone().count())?) } } /// Print DataFrame -fn print_dataframe(py: Python, df: DataFrame) -> PyResult<()> { +fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> { // Get string representation of record batches let batches = wait_for_future(py, df.collect())?; let batches_as_string = pretty::pretty_format_batches(&batches); diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 9d2559429..ace42115b 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -42,7 +42,7 @@ use datafusion::physical_plan::{ SendableRecordBatchStream, Statistics, }; -use crate::errors::DataFusionError; +use crate::errors::PyDataFusionResult; use crate::pyarrow_filter_expression::PyArrowFilterExpression; struct PyArrowBatchesAdapter { @@ -83,8 +83,8 @@ impl DatasetExec { dataset: &Bound<'_, PyAny>, projection: Option>, filters: &[Expr], - ) -> Result { - let columns: Option, DataFusionError>> = projection.map(|p| { + ) -> PyDataFusionResult { + let columns: Option>> = projection.map(|p| { p.iter() .map(|index| { let name: String = dataset diff --git a/src/errors.rs b/src/errors.rs index d12b6ade1..b02b754a2 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -24,10 +24,10 @@ use datafusion::error::DataFusionError as InnerDataFusionError; use prost::EncodeError; use pyo3::{exceptions::PyException, PyErr}; -pub type Result = std::result::Result; +pub type PyDataFusionResult = std::result::Result; #[derive(Debug)] -pub enum DataFusionError { +pub enum PyDataFusionError { ExecutionError(InnerDataFusionError), ArrowError(ArrowError), Common(String), @@ -35,46 +35,46 @@ pub enum DataFusionError { EncodeError(EncodeError), } -impl fmt::Display for DataFusionError { +impl fmt::Display for PyDataFusionError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e:?}"), - DataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), - DataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), - DataFusionError::Common(e) => write!(f, "{e}"), - DataFusionError::EncodeError(e) => write!(f, "Failed to encode substrait plan: {e}"), + PyDataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e:?}"), + PyDataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), + PyDataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), + PyDataFusionError::Common(e) => write!(f, "{e}"), + PyDataFusionError::EncodeError(e) => write!(f, "Failed to encode substrait plan: {e}"), } } } -impl From for DataFusionError { - fn from(err: ArrowError) -> DataFusionError { - DataFusionError::ArrowError(err) +impl From for PyDataFusionError { + fn from(err: ArrowError) -> PyDataFusionError { + PyDataFusionError::ArrowError(err) } } -impl From for DataFusionError { - fn from(err: InnerDataFusionError) -> DataFusionError { - DataFusionError::ExecutionError(err) +impl From for PyDataFusionError { + fn from(err: InnerDataFusionError) -> PyDataFusionError { + PyDataFusionError::ExecutionError(err) } } -impl From for DataFusionError { - fn from(err: PyErr) -> DataFusionError { - DataFusionError::PythonError(err) +impl From for PyDataFusionError { + fn from(err: PyErr) -> PyDataFusionError { + PyDataFusionError::PythonError(err) } } -impl From for PyErr { - fn from(err: DataFusionError) -> PyErr { +impl From for PyErr { + fn from(err: PyDataFusionError) -> PyErr { match err { - DataFusionError::PythonError(py_err) => py_err, + PyDataFusionError::PythonError(py_err) => py_err, _ => PyException::new_err(err.to_string()), } } } -impl Error for DataFusionError {} +impl Error for PyDataFusionError {} pub fn py_type_err(e: impl Debug) -> PyErr { PyErr::new::(format!("{e:?}")) diff --git a/src/expr.rs b/src/expr.rs index bca0cd3fa..1e9983d42 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -24,7 +24,6 @@ use std::convert::{From, Into}; use std::sync::Arc; use window::PyWindowFrame; -use arrow::pyarrow::ToPyArrow; use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::functions::core::expr_ext::FieldAccessor; @@ -33,15 +32,17 @@ use datafusion::logical_expr::{ expr::{AggregateFunction, InList, InSubquery, ScalarFunction, WindowFunction}, lit, Between, BinaryExpr, Case, Cast, Expr, Like, Operator, TryCast, }; -use datafusion::scalar::ScalarValue; -use crate::common::data_type::{DataTypeMap, NullTreatment, RexType}; -use crate::errors::{py_runtime_err, py_type_err, py_unsupported_variant_err, DataFusionError}; +use crate::common::data_type::{DataTypeMap, NullTreatment, PyScalarValue, RexType}; +use crate::errors::{ + py_runtime_err, py_type_err, py_unsupported_variant_err, PyDataFusionError, PyDataFusionResult, +}; use crate::expr::aggregate_expr::PyAggregateFunction; use crate::expr::binary_expr::PyBinaryExpr; use crate::expr::column::PyColumn; use crate::expr::literal::PyLiteral; use crate::functions::add_builder_fns_to_window; +use crate::pyarrow_util::scalar_to_pyarrow; use crate::sql::logical::PyLogicalPlan; use self::alias::PyAlias; @@ -261,8 +262,8 @@ impl PyExpr { } #[staticmethod] - pub fn literal(value: ScalarValue) -> PyExpr { - lit(value).into() + pub fn literal(value: PyScalarValue) -> PyExpr { + lit(value.0).into() } #[staticmethod] @@ -356,7 +357,7 @@ impl PyExpr { /// Extracts the Expr value into a PyObject that can be shared with Python pub fn python_value(&self, py: Python) -> PyResult { match &self.expr { - Expr::Literal(scalar_value) => Ok(scalar_value.to_pyarrow(py)?), + Expr::Literal(scalar_value) => scalar_to_pyarrow(scalar_value, py), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", &self.expr @@ -568,7 +569,7 @@ impl PyExpr { window_frame: Option, order_by: Option>, null_treatment: Option, - ) -> PyResult { + ) -> PyDataFusionResult { match &self.expr { Expr::AggregateFunction(agg_fn) => { let window_fn = Expr::WindowFunction(WindowFunction::new( @@ -592,10 +593,9 @@ impl PyExpr { null_treatment, ), _ => Err( - DataFusionError::ExecutionError(datafusion::error::DataFusionError::Plan( + PyDataFusionError::ExecutionError(datafusion::error::DataFusionError::Plan( format!("Using {} with `over` is not allowed. Must use an aggregate or window function.", self.expr.variant_name()), )) - .into(), ), } } @@ -649,34 +649,26 @@ impl PyExprFuncBuilder { .into() } - pub fn build(&self) -> PyResult { - self.builder - .clone() - .build() - .map(|expr| expr.into()) - .map_err(|err| err.into()) + pub fn build(&self) -> PyDataFusionResult { + Ok(self.builder.clone().build().map(|expr| expr.into())?) } } impl PyExpr { - pub fn _column_name(&self, plan: &LogicalPlan) -> Result { + pub fn _column_name(&self, plan: &LogicalPlan) -> PyDataFusionResult { let field = Self::expr_to_field(&self.expr, plan)?; Ok(field.name().to_owned()) } /// Create a [Field] representing an [Expr], given an input [LogicalPlan] to resolve against - pub fn expr_to_field( - expr: &Expr, - input_plan: &LogicalPlan, - ) -> Result, DataFusionError> { + pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> PyDataFusionResult> { match expr { Expr::Wildcard { .. } => { // Since * could be any of the valid column names just return the first one Ok(Arc::new(input_plan.schema().field(0).clone())) } _ => { - let fields = - exprlist_to_fields(&[expr.clone()], input_plan).map_err(PyErr::from)?; + let fields = exprlist_to_fields(&[expr.clone()], input_plan)?; Ok(fields[0].1.clone()) } } diff --git a/src/expr/conditional_expr.rs b/src/expr/conditional_expr.rs index a8a885c54..fe3af2e25 100644 --- a/src/expr/conditional_expr.rs +++ b/src/expr/conditional_expr.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::expr::PyExpr; +use crate::{errors::PyDataFusionResult, expr::PyExpr}; use datafusion::logical_expr::conditional_expressions::CaseBuilder; use pyo3::prelude::*; @@ -44,11 +44,11 @@ impl PyCaseBuilder { } } - fn otherwise(&mut self, else_expr: PyExpr) -> PyResult { + fn otherwise(&mut self, else_expr: PyExpr) -> PyDataFusionResult { Ok(self.case_builder.otherwise(else_expr.expr)?.clone().into()) } - fn end(&mut self) -> PyResult { + fn end(&mut self) -> PyDataFusionResult { Ok(self.case_builder.end()?.clone().into()) } } diff --git a/src/expr/literal.rs b/src/expr/literal.rs index 43084ba4b..2cb2079f1 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::errors::DataFusionError; +use crate::errors::PyDataFusionError; use datafusion::common::ScalarValue; use pyo3::prelude::*; @@ -154,5 +154,5 @@ impl PyLiteral { } fn unexpected_literal_value(value: &ScalarValue) -> PyErr { - DataFusionError::Common(format!("getValue() - Unexpected value: {value}")).into() + PyDataFusionError::Common(format!("getValue() - Unexpected value: {value}")).into() } diff --git a/src/expr/window.rs b/src/expr/window.rs index 6486dbb32..4dc6cb9c9 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -21,8 +21,9 @@ use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, Wind use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; +use crate::common::data_type::PyScalarValue; use crate::common::df_schema::PyDFSchema; -use crate::errors::py_type_err; +use crate::errors::{py_type_err, PyDataFusionResult}; use crate::expr::logical_node::LogicalNode; use crate::expr::sort_expr::{py_sort_expr_list, PySortExpr}; use crate::expr::PyExpr; @@ -171,8 +172,8 @@ impl PyWindowFrame { #[pyo3(signature=(unit, start_bound, end_bound))] pub fn new( unit: &str, - start_bound: Option, - end_bound: Option, + start_bound: Option, + end_bound: Option, ) -> PyResult { let units = unit.to_ascii_lowercase(); let units = match units.as_str() { @@ -187,7 +188,7 @@ impl PyWindowFrame { } }; let start_bound = match start_bound { - Some(start_bound) => WindowFrameBound::Preceding(start_bound), + Some(start_bound) => WindowFrameBound::Preceding(start_bound.0), None => match units { WindowFrameUnits::Range => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), @@ -200,7 +201,7 @@ impl PyWindowFrame { }, }; let end_bound = match end_bound { - Some(end_bound) => WindowFrameBound::Following(end_bound), + Some(end_bound) => WindowFrameBound::Following(end_bound.0), None => match units { WindowFrameUnits::Rows => WindowFrameBound::Following(ScalarValue::UInt64(None)), WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), @@ -253,7 +254,7 @@ impl PyWindowFrameBound { matches!(self.frame_bound, WindowFrameBound::Following(_)) } /// Returns the offset of the window frame - pub fn get_offset(&self) -> PyResult> { + pub fn get_offset(&self) -> PyDataFusionResult> { match &self.frame_bound { WindowFrameBound::Preceding(val) | WindowFrameBound::Following(val) => match val { x if x.is_null() => Ok(None), diff --git a/src/functions.rs b/src/functions.rs index ae032d702..46c748cf8 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -22,8 +22,10 @@ use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; use crate::common::data_type::NullTreatment; +use crate::common::data_type::PyScalarValue; use crate::context::PySessionContext; -use crate::errors::DataFusionError; +use crate::errors::PyDataFusionError; +use crate::errors::PyDataFusionResult; use crate::expr::conditional_expr::PyCaseBuilder; use crate::expr::sort_expr::to_sort_expressions; use crate::expr::sort_expr::PySortExpr; @@ -44,7 +46,7 @@ fn add_builder_fns_to_aggregate( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { // Since ExprFuncBuilder::new() is private, we can guarantee initializing // a builder with an `null_treatment` with option None let mut builder = agg_fn.null_treatment(None); @@ -228,7 +230,10 @@ fn when(when: PyExpr, then: PyExpr) -> PyResult { /// 1) If no function has been found, search default aggregate functions. /// /// NOTE: we search the built-ins first because the `UDAF` versions currently do not have the same behavior. -fn find_window_fn(name: &str, ctx: Option) -> PyResult { +fn find_window_fn( + name: &str, + ctx: Option, +) -> PyDataFusionResult { if let Some(ctx) = ctx { // search UDAFs let udaf = ctx @@ -284,7 +289,9 @@ fn find_window_fn(name: &str, ctx: Option) -> PyResult, order_by: Option>, null_treatment: Option - ) -> PyResult { + ) -> PyDataFusionResult { let agg_fn = functions_aggregate::expr_fn::$NAME($($arg.into()),*); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) @@ -362,7 +369,7 @@ macro_rules! aggregate_function_vec_args { filter: Option, order_by: Option>, null_treatment: Option - ) -> PyResult { + ) -> PyDataFusionResult { let agg_fn = functions_aggregate::expr_fn::$NAME(vec![$($arg.into()),*]); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) @@ -642,7 +649,7 @@ pub fn approx_percentile_cont( percentile: f64, num_centroids: Option, // enforces optional arguments at the end, currently filter: Option, -) -> PyResult { +) -> PyDataFusionResult { let args = if let Some(num_centroids) = num_centroids { vec![expression.expr, lit(percentile), lit(num_centroids)] } else { @@ -661,7 +668,7 @@ pub fn approx_percentile_cont_with_weight( weight: PyExpr, percentile: f64, filter: Option, -) -> PyResult { +) -> PyDataFusionResult { let agg_fn = functions_aggregate::expr_fn::approx_percentile_cont_with_weight( expression.expr, weight.expr, @@ -683,7 +690,7 @@ pub fn first_value( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { // If we initialize the UDAF with order_by directly, then it gets over-written by the builder let agg_fn = functions_aggregate::expr_fn::first_value(expr.expr, None); @@ -700,7 +707,7 @@ pub fn nth_value( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(expr.expr, n, vec![]); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) } @@ -715,7 +722,7 @@ pub fn string_agg( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { let agg_fn = datafusion::functions_aggregate::string_agg::string_agg(expr.expr, lit(delimiter)); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) } @@ -726,7 +733,7 @@ pub(crate) fn add_builder_fns_to_window( window_frame: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { let null_treatment = null_treatment.map(|n| n.into()); let mut builder = window_fn.null_treatment(null_treatment); @@ -748,7 +755,7 @@ pub(crate) fn add_builder_fns_to_window( builder = builder.window_frame(window_frame.into()); } - builder.build().map(|e| e.into()).map_err(|err| err.into()) + Ok(builder.build().map(|e| e.into())?) } #[pyfunction] @@ -756,10 +763,11 @@ pub(crate) fn add_builder_fns_to_window( pub fn lead( arg: PyExpr, shift_offset: i64, - default_value: Option, + default_value: Option, partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { + let default_value = default_value.map(|v| v.into()); let window_fn = functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -770,10 +778,11 @@ pub fn lead( pub fn lag( arg: PyExpr, shift_offset: i64, - default_value: Option, + default_value: Option, partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { + let default_value = default_value.map(|v| v.into()); let window_fn = functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -784,7 +793,7 @@ pub fn lag( pub fn row_number( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::row_number(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -795,7 +804,7 @@ pub fn row_number( pub fn rank( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -806,7 +815,7 @@ pub fn rank( pub fn dense_rank( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::dense_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -817,7 +826,7 @@ pub fn dense_rank( pub fn percent_rank( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::percent_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -828,7 +837,7 @@ pub fn percent_rank( pub fn cume_dist( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::cume_dist(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -840,7 +849,7 @@ pub fn ntile( arg: PyExpr, partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::ntile(arg.into()); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) diff --git a/src/lib.rs b/src/lib.rs index 1111d5d06..317c3a49a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,6 +48,7 @@ pub mod expr; mod functions; pub mod physical_plan; mod pyarrow_filter_expression; +pub mod pyarrow_util; mod record_batch; pub mod sql; pub mod store; diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 9ef2f0ebb..295908dc7 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyBytes}; -use crate::{context::PySessionContext, errors::DataFusionError}; +use crate::{context::PySessionContext, errors::PyDataFusionResult}; #[pyclass(name = "ExecutionPlan", module = "datafusion", subclass)] #[derive(Debug, Clone)] @@ -58,7 +58,7 @@ impl PyExecutionPlan { format!("{}", d.indent(false)) } - pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyResult> { + pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyDataFusionResult> { let codec = DefaultPhysicalExtensionCodec {}; let proto = datafusion_proto::protobuf::PhysicalPlanNode::try_from_physical_plan( self.plan.clone(), @@ -70,7 +70,10 @@ impl PyExecutionPlan { } #[staticmethod] - pub fn from_proto(ctx: PySessionContext, proto_msg: Bound<'_, PyBytes>) -> PyResult { + pub fn from_proto( + ctx: PySessionContext, + proto_msg: Bound<'_, PyBytes>, + ) -> PyDataFusionResult { let bytes: &[u8] = proto_msg.extract()?; let proto_plan = datafusion_proto::protobuf::PhysicalPlanNode::decode(bytes).map_err(|e| { @@ -81,9 +84,7 @@ impl PyExecutionPlan { })?; let codec = DefaultPhysicalExtensionCodec {}; - let plan = proto_plan - .try_into_physical_plan(&ctx.ctx, &ctx.ctx.runtime_env(), &codec) - .map_err(DataFusionError::from)?; + let plan = proto_plan.try_into_physical_plan(&ctx.ctx, &ctx.ctx.runtime_env(), &codec)?; Ok(Self::new(plan)) } diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index 0f97ea442..314eebf4f 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -21,11 +21,11 @@ use pyo3::prelude::*; use std::convert::TryFrom; use std::result::Result; -use arrow::pyarrow::ToPyArrow; use datafusion::common::{Column, ScalarValue}; use datafusion::logical_expr::{expr::InList, Between, BinaryExpr, Expr, Operator}; -use crate::errors::DataFusionError; +use crate::errors::{PyDataFusionError, PyDataFusionResult}; +use crate::pyarrow_util::scalar_to_pyarrow; #[derive(Debug)] #[repr(transparent)] @@ -34,7 +34,7 @@ pub(crate) struct PyArrowFilterExpression(PyObject); fn operator_to_py<'py>( operator: &Operator, op: &Bound<'py, PyModule>, -) -> Result, DataFusionError> { +) -> PyDataFusionResult> { let py_op: Bound<'_, PyAny> = match operator { Operator::Eq => op.getattr("eq")?, Operator::NotEq => op.getattr("ne")?, @@ -45,7 +45,7 @@ fn operator_to_py<'py>( Operator::And => op.getattr("and_")?, Operator::Or => op.getattr("or_")?, _ => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "Unsupported operator {operator:?}" ))) } @@ -53,8 +53,8 @@ fn operator_to_py<'py>( Ok(py_op) } -fn extract_scalar_list(exprs: &[Expr], py: Python) -> Result, DataFusionError> { - let ret: Result, DataFusionError> = exprs +fn extract_scalar_list(exprs: &[Expr], py: Python) -> PyDataFusionResult> { + let ret = exprs .iter() .map(|expr| match expr { // TODO: should we also leverage `ScalarValue::to_pyarrow` here? @@ -71,11 +71,11 @@ fn extract_scalar_list(exprs: &[Expr], py: Python) -> Result, Data ScalarValue::Float32(Some(f)) => Ok(f.into_py(py)), ScalarValue::Float64(Some(f)) => Ok(f.into_py(py)), ScalarValue::Utf8(Some(s)) => Ok(s.into_py(py)), - _ => Err(DataFusionError::Common(format!( + _ => Err(PyDataFusionError::Common(format!( "PyArrow can't handle ScalarValue: {v:?}" ))), }, - _ => Err(DataFusionError::Common(format!( + _ => Err(PyDataFusionError::Common(format!( "Only a list of Literals are supported got {expr:?}" ))), }) @@ -90,7 +90,7 @@ impl PyArrowFilterExpression { } impl TryFrom<&Expr> for PyArrowFilterExpression { - type Error = DataFusionError; + type Error = PyDataFusionError; // Converts a Datafusion filter Expr into an expression string that can be evaluated by Python // Note that pyarrow.compute.{field,scalar} are put into Python globals() when evaluated @@ -100,9 +100,9 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { Python::with_gil(|py| { let pc = Python::import_bound(py, "pyarrow.compute")?; let op_module = Python::import_bound(py, "operator")?; - let pc_expr: Result, DataFusionError> = match expr { + let pc_expr: PyDataFusionResult> = match expr { Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?), - Expr::Literal(scalar) => Ok(scalar.to_pyarrow(py)?.into_bound(py)), + Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)), Expr::BinaryExpr(BinaryExpr { left, op, right }) => { let operator = operator_to_py(op, &op_module)?; let left = PyArrowFilterExpression::try_from(left.as_ref())?.0; @@ -167,7 +167,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { Ok(if *negated { invert.call1((ret,))? } else { ret }) } - _ => Err(DataFusionError::Common(format!( + _ => Err(PyDataFusionError::Common(format!( "Unsupported Datafusion expression {expr:?}" ))), }; diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs new file mode 100644 index 000000000..2b31467f8 --- /dev/null +++ b/src/pyarrow_util.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Conversions between PyArrow and DataFusion types + +use arrow::array::{Array, ArrayData}; +use arrow::pyarrow::{FromPyArrow, ToPyArrow}; +use datafusion::scalar::ScalarValue; +use pyo3::types::{PyAnyMethods, PyList}; +use pyo3::{Bound, FromPyObject, PyAny, PyObject, PyResult, Python}; + +use crate::common::data_type::PyScalarValue; +use crate::errors::PyDataFusionError; + +impl FromPyArrow for PyScalarValue { + fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> PyResult { + let py = value.py(); + let typ = value.getattr("type")?; + let val = value.call_method0("as_py")?; + + // construct pyarrow array from the python value and pyarrow type + let factory = py.import_bound("pyarrow")?.getattr("array")?; + let args = PyList::new_bound(py, [val]); + let array = factory.call1((args, typ))?; + + // convert the pyarrow array to rust array using C data interface + let array = arrow::array::make_array(ArrayData::from_pyarrow_bound(&array)?); + let scalar = ScalarValue::try_from_array(&array, 0).map_err(PyDataFusionError::from)?; + + Ok(PyScalarValue(scalar)) + } +} + +impl<'source> FromPyObject<'source> for PyScalarValue { + fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult { + Self::from_pyarrow_bound(value) + } +} + +pub fn scalar_to_pyarrow(scalar: &ScalarValue, py: Python) -> PyResult { + let array = scalar.to_array().map_err(PyDataFusionError::from)?; + // convert to pyarrow array using C data interface + let pyarray = array.to_data().to_pyarrow(py)?; + let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?; + + Ok(pyscalar) +} diff --git a/src/record_batch.rs b/src/record_batch.rs index eacdb5867..ec61c263f 100644 --- a/src/record_batch.rs +++ b/src/record_batch.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::errors::PyDataFusionError; use crate::utils::wait_for_future; use datafusion::arrow::pyarrow::ToPyArrow; use datafusion::arrow::record_batch::RecordBatch; @@ -90,7 +91,7 @@ async fn next_stream( let mut stream = stream.lock().await; match stream.next().await { Some(Ok(batch)) => Ok(batch.into()), - Some(Err(e)) => Err(e.into()), + Some(Err(e)) => Err(PyDataFusionError::from(e))?, None => { // Depending on whether the iteration is sync or not, we raise either a // StopIteration or a StopAsyncIteration diff --git a/src/sql/exceptions.rs b/src/sql/exceptions.rs index c458402a0..cfb02274b 100644 --- a/src/sql/exceptions.rs +++ b/src/sql/exceptions.rs @@ -17,13 +17,7 @@ use std::fmt::{Debug, Display}; -use pyo3::{create_exception, PyErr}; - -// Identifies exceptions that occur while attempting to generate a `LogicalPlan` from a SQL string -create_exception!(rust, ParsingException, pyo3::exceptions::PyException); - -// Identifies exceptions that occur during attempts to optimization an existing `LogicalPlan` -create_exception!(rust, OptimizationException, pyo3::exceptions::PyException); +use pyo3::PyErr; pub fn py_type_err(e: impl Debug + Display) -> PyErr { PyErr::new::(format!("{e}")) @@ -33,10 +27,6 @@ pub fn py_runtime_err(e: impl Debug + Display) -> PyErr { PyErr::new::(format!("{e}")) } -pub fn py_parsing_exp(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) -} - -pub fn py_optimization_exp(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) +pub fn py_value_err(e: impl Debug + Display) -> PyErr { + PyErr::new::(format!("{e}")) } diff --git a/src/sql/logical.rs b/src/sql/logical.rs index a541889c7..1be33b75f 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::errors::PyDataFusionResult; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; use crate::expr::distinct::PyDistinct; @@ -34,7 +35,7 @@ use crate::expr::table_scan::PyTableScan; use crate::expr::unnest::PyUnnest; use crate::expr::window::PyWindowExpr; use crate::{context::PySessionContext, errors::py_unsupported_variant_err}; -use datafusion::{error::DataFusionError, logical_expr::LogicalPlan}; +use datafusion::logical_expr::LogicalPlan; use datafusion_proto::logical_plan::{AsLogicalPlan, DefaultLogicalExtensionCodec}; use prost::Message; use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyBytes}; @@ -125,7 +126,7 @@ impl PyLogicalPlan { format!("{}", self.plan.display_graphviz()) } - pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyResult> { + pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyDataFusionResult> { let codec = DefaultLogicalExtensionCodec {}; let proto = datafusion_proto::protobuf::LogicalPlanNode::try_from_logical_plan(&self.plan, &codec)?; @@ -135,7 +136,10 @@ impl PyLogicalPlan { } #[staticmethod] - pub fn from_proto(ctx: PySessionContext, proto_msg: Bound<'_, PyBytes>) -> PyResult { + pub fn from_proto( + ctx: PySessionContext, + proto_msg: Bound<'_, PyBytes>, + ) -> PyDataFusionResult { let bytes: &[u8] = proto_msg.extract()?; let proto_plan = datafusion_proto::protobuf::LogicalPlanNode::decode(bytes).map_err(|e| { @@ -146,9 +150,7 @@ impl PyLogicalPlan { })?; let codec = DefaultLogicalExtensionCodec {}; - let plan = proto_plan - .try_into_logical_plan(&ctx.ctx, &codec) - .map_err(DataFusionError::from)?; + let plan = proto_plan.try_into_logical_plan(&ctx.ctx, &codec)?; Ok(Self::new(plan)) } } diff --git a/src/substrait.rs b/src/substrait.rs index 16e8c9507..8dcf3e8a7 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -18,7 +18,7 @@ use pyo3::{prelude::*, types::PyBytes}; use crate::context::PySessionContext; -use crate::errors::{py_datafusion_err, DataFusionError}; +use crate::errors::{py_datafusion_err, PyDataFusionError, PyDataFusionResult}; use crate::sql::logical::PyLogicalPlan; use crate::utils::wait_for_future; @@ -39,7 +39,7 @@ impl PyPlan { let mut proto_bytes = Vec::::new(); self.plan .encode(&mut proto_bytes) - .map_err(DataFusionError::EncodeError)?; + .map_err(PyDataFusionError::EncodeError)?; Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) } } @@ -66,41 +66,47 @@ pub struct PySubstraitSerializer; #[pymethods] impl PySubstraitSerializer { #[staticmethod] - pub fn serialize(sql: &str, ctx: PySessionContext, path: &str, py: Python) -> PyResult<()> { - wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path)) - .map_err(DataFusionError::from)?; + pub fn serialize( + sql: &str, + ctx: PySessionContext, + path: &str, + py: Python, + ) -> PyDataFusionResult<()> { + wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))?; Ok(()) } #[staticmethod] - pub fn serialize_to_plan(sql: &str, ctx: PySessionContext, py: Python) -> PyResult { - match PySubstraitSerializer::serialize_bytes(sql, ctx, py) { - Ok(proto_bytes) => { - let proto_bytes = proto_bytes.bind(py).downcast::().unwrap(); - PySubstraitSerializer::deserialize_bytes(proto_bytes.as_bytes().to_vec(), py) - } - Err(e) => Err(py_datafusion_err(e)), - } + pub fn serialize_to_plan( + sql: &str, + ctx: PySessionContext, + py: Python, + ) -> PyDataFusionResult { + PySubstraitSerializer::serialize_bytes(sql, ctx, py).and_then(|proto_bytes| { + let proto_bytes = proto_bytes.bind(py).downcast::().unwrap(); + PySubstraitSerializer::deserialize_bytes(proto_bytes.as_bytes().to_vec(), py) + }) } #[staticmethod] - pub fn serialize_bytes(sql: &str, ctx: PySessionContext, py: Python) -> PyResult { - let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx)) - .map_err(DataFusionError::from)?; + pub fn serialize_bytes( + sql: &str, + ctx: PySessionContext, + py: Python, + ) -> PyDataFusionResult { + let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?; Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) } #[staticmethod] - pub fn deserialize(path: &str, py: Python) -> PyResult { - let plan = - wait_for_future(py, serializer::deserialize(path)).map_err(DataFusionError::from)?; + pub fn deserialize(path: &str, py: Python) -> PyDataFusionResult { + let plan = wait_for_future(py, serializer::deserialize(path))?; Ok(PyPlan { plan: *plan }) } #[staticmethod] - pub fn deserialize_bytes(proto_bytes: Vec, py: Python) -> PyResult { - let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes)) - .map_err(DataFusionError::from)?; + pub fn deserialize_bytes(proto_bytes: Vec, py: Python) -> PyDataFusionResult { + let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))?; Ok(PyPlan { plan: *plan }) } } @@ -134,10 +140,10 @@ impl PySubstraitConsumer { ctx: &mut PySessionContext, plan: PyPlan, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let session_state = ctx.ctx.state(); let result = consumer::from_substrait_plan(&session_state, &plan.plan); - let logical_plan = wait_for_future(py, result).map_err(DataFusionError::from)?; + let logical_plan = wait_for_future(py, result)?; Ok(PyLogicalPlan::new(logical_plan)) } } diff --git a/src/udaf.rs b/src/udaf.rs index a6aa59ac3..5f21533e0 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -28,6 +28,7 @@ use datafusion::logical_expr::{ create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, }; +use crate::common::data_type::PyScalarValue; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -44,13 +45,25 @@ impl RustAccumulator { impl Accumulator for RustAccumulator { fn state(&mut self) -> Result> { - Python::with_gil(|py| self.accum.bind(py).call_method0("state")?.extract()) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) + Python::with_gil(|py| { + self.accum + .bind(py) + .call_method0("state")? + .extract::>() + }) + .map(|v| v.into_iter().map(|x| x.0).collect()) + .map_err(|e| DataFusionError::Execution(format!("{e}"))) } fn evaluate(&mut self) -> Result { - Python::with_gil(|py| self.accum.bind(py).call_method0("evaluate")?.extract()) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) + Python::with_gil(|py| { + self.accum + .bind(py) + .call_method0("evaluate")? + .extract::() + }) + .map(|v| v.0) + .map_err(|e| DataFusionError::Execution(format!("{e}"))) } fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { diff --git a/src/udwf.rs b/src/udwf.rs index 689eb79e3..04a4a1640 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -26,6 +26,7 @@ use datafusion::scalar::ScalarValue; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use crate::common::data_type::PyScalarValue; use crate::expr::PyExpr; use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; @@ -133,7 +134,8 @@ impl PartitionEvaluator for RustPartitionEvaluator { self.evaluator .bind(py) .call_method1("evaluate", py_args) - .and_then(|v| v.extract()) + .and_then(|v| v.extract::()) + .map(|v| v.0) .map_err(|e| DataFusionError::Execution(format!("{e}"))) }) } diff --git a/src/utils.rs b/src/utils.rs index 795589752..ed224b364 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::errors::DataFusionError; +use crate::errors::{PyDataFusionError, PyDataFusionResult}; use crate::TokioRuntime; use datafusion::logical_expr::Volatility; use pyo3::exceptions::PyValueError; @@ -47,13 +47,13 @@ where py.allow_threads(|| runtime.block_on(f)) } -pub(crate) fn parse_volatility(value: &str) -> Result { +pub(crate) fn parse_volatility(value: &str) -> PyDataFusionResult { Ok(match value { "immutable" => Volatility::Immutable, "stable" => Volatility::Stable, "volatile" => Volatility::Volatile, value => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "Unsupportad volatility type: `{value}`, supported \ values are: immutable, stable and volatile." ))) From d3c4dabe3c24d419911106bdde3dfe1244e1224c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 09:42:03 -0500 Subject: [PATCH 041/248] Fix verify-release-candidate script by removing reference to requirements-310.txt (#1012) * Fix verify-release-candidate script by removing requirements.txt * Update dev/release/verify-release-candidate.sh Co-authored-by: Kevin Liu --------- Co-authored-by: Kevin Liu --- dev/release/verify-release-candidate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 1a9104b55..2bfce0e2d 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -128,7 +128,7 @@ test_source_distribution() { python3 -m venv .venv source .venv/bin/activate python3 -m pip install -U pip - python3 -m pip install -r requirements-310.txt + python3 -m pip install -U maturin maturin develop #TODO: we should really run tests here as well From 93ac6a820353b3ddea014be1eddad8bd004b0fce Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 7 Feb 2025 10:39:51 -0500 Subject: [PATCH 042/248] Prepare release 44.0.0 (#1009) --- Cargo.lock | 2 +- Cargo.toml | 2 +- dev/changelog/44.0.0.md | 58 ++++++++ pyproject.toml | 1 + uv.lock | 301 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 dev/changelog/44.0.0.md diff --git a/Cargo.lock b/Cargo.lock index c6590fd21..50809696b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1327,7 +1327,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "43.0.0" +version = "44.0.0" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 003ba36e5..44e6e2244 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "43.0.0" +version = "44.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/44.0.0.md b/dev/changelog/44.0.0.md new file mode 100644 index 000000000..c5ed4bdb0 --- /dev/null +++ b/dev/changelog/44.0.0.md @@ -0,0 +1,58 @@ + + +# Apache DataFusion Python 44.0.0 Changelog + +This release consists of 12 commits from 5 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: support enable_url_table config [#980](https://github.com/apache/datafusion-python/pull/980) (chenkovsky) +- feat: remove DataFusion pyarrow feat [#1000](https://github.com/apache/datafusion-python/pull/1000) (timsaucer) + +**Fixed bugs:** + +- fix: correct LZ0 to LZO in compression options [#995](https://github.com/apache/datafusion-python/pull/995) (kosiew) + +**Other:** + +- Add arrow cast [#962](https://github.com/apache/datafusion-python/pull/962) (kosiew) +- Fix small issues in pyproject.toml [#976](https://github.com/apache/datafusion-python/pull/976) (kylebarron) +- chore: set validation and type hint for ffi tableprovider [#983](https://github.com/apache/datafusion-python/pull/983) (ion-elgreco) +- Support async iteration of RecordBatchStream [#975](https://github.com/apache/datafusion-python/pull/975) (kylebarron) +- Chore/upgrade datafusion 44 [#973](https://github.com/apache/datafusion-python/pull/973) (timsaucer) +- Default to ZSTD compression when writing Parquet [#981](https://github.com/apache/datafusion-python/pull/981) (kosiew) +- Feat/use uv python management [#994](https://github.com/apache/datafusion-python/pull/994) (timsaucer) +- minor: Update dependencies prior to release [#999](https://github.com/apache/datafusion-python/pull/999) (timsaucer) +- Apply import ordering in ruff check [#1001](https://github.com/apache/datafusion-python/pull/1001) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 5 Tim Saucer + 3 kosiew + 2 Kyle Barron + 1 Chongchen Chen + 1 Ion Koutsouris +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/pyproject.toml b/pyproject.toml index 32bb28d21..f416e02a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ dev = [ "pytest>=7.4.4", "ruff>=0.9.1", "toml>=0.10.2", + "pygithub==2.5.0", ] docs = [ "sphinx>=7.1.2", diff --git a/uv.lock b/uv.lock index 75d9ed018..587ddc8b7 100644 --- a/uv.lock +++ b/uv.lock @@ -139,6 +139,83 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a5/32/8f6669fc4798494966bf446c8c4a162e0b5d893dff088afddf76414f70e1/certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56", size = 164927 }, ] +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/07/f44ca684db4e4f08a3fdc6eeb9a0d15dc6883efc7b8c90357fdbf74e186c/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", size = 182191 }, + { url = "https://files.pythonhosted.org/packages/08/fd/cc2fedbd887223f9f5d170c96e57cbf655df9831a6546c1727ae13fa977a/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", size = 178592 }, + { url = "https://files.pythonhosted.org/packages/de/cc/4635c320081c78d6ffc2cab0a76025b691a91204f4aa317d568ff9280a2d/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", size = 426024 }, + { url = "https://files.pythonhosted.org/packages/b6/7b/3b2b250f3aab91abe5f8a51ada1b717935fdaec53f790ad4100fe2ec64d1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", size = 448188 }, + { url = "https://files.pythonhosted.org/packages/d3/48/1b9283ebbf0ec065148d8de05d647a986c5f22586b18120020452fff8f5d/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", size = 455571 }, + { url = "https://files.pythonhosted.org/packages/40/87/3b8452525437b40f39ca7ff70276679772ee7e8b394934ff60e63b7b090c/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", size = 436687 }, + { url = "https://files.pythonhosted.org/packages/8d/fb/4da72871d177d63649ac449aec2e8a29efe0274035880c7af59101ca2232/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", size = 446211 }, + { url = "https://files.pythonhosted.org/packages/ab/a0/62f00bcb411332106c02b663b26f3545a9ef136f80d5df746c05878f8c4b/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", size = 461325 }, + { url = "https://files.pythonhosted.org/packages/36/83/76127035ed2e7e27b0787604d99da630ac3123bfb02d8e80c633f218a11d/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", size = 438784 }, + { url = "https://files.pythonhosted.org/packages/21/81/a6cd025db2f08ac88b901b745c163d884641909641f9b826e8cb87645942/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", size = 461564 }, + { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804 }, + { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299 }, + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264 }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651 }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259 }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200 }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235 }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721 }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242 }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999 }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242 }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604 }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727 }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400 }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989 }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802 }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792 }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893 }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810 }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200 }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447 }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358 }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, + { url = "https://files.pythonhosted.org/packages/48/08/15bf6b43ae9bd06f6b00ad8a91f5a8fe1069d4c9fab550a866755402724e/cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", size = 182457 }, + { url = "https://files.pythonhosted.org/packages/c2/5b/f1523dd545f92f7df468e5f653ffa4df30ac222f3c884e51e139878f1cb5/cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", size = 425932 }, + { url = "https://files.pythonhosted.org/packages/53/93/7e547ab4105969cc8c93b38a667b82a835dd2cc78f3a7dad6130cfd41e1d/cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", size = 448585 }, + { url = "https://files.pythonhosted.org/packages/56/c4/a308f2c332006206bb511de219efeff090e9d63529ba0a77aae72e82248b/cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", size = 456268 }, + { url = "https://files.pythonhosted.org/packages/ca/5b/b63681518265f2f4060d2b60755c1c77ec89e5e045fc3773b72735ddaad5/cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", size = 436592 }, + { url = "https://files.pythonhosted.org/packages/bb/19/b51af9f4a4faa4a8ac5a0e5d5c2522dcd9703d07fac69da34a36c4d960d3/cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", size = 446512 }, + { url = "https://files.pythonhosted.org/packages/e2/63/2bed8323890cb613bbecda807688a31ed11a7fe7afe31f8faaae0206a9a3/cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", size = 171576 }, + { url = "https://files.pythonhosted.org/packages/2f/70/80c33b044ebc79527447fd4fbc5455d514c3bb840dede4455de97da39b4d/cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", size = 181229 }, + { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220 }, + { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605 }, + { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910 }, + { url = "https://files.pythonhosted.org/packages/42/7a/9d086fab7c66bd7c4d0f27c57a1b6b068ced810afc498cc8c49e0088661c/cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", size = 447200 }, + { url = "https://files.pythonhosted.org/packages/da/63/1785ced118ce92a993b0ec9e0d0ac8dc3e5dbfbcaa81135be56c69cabbb6/cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", size = 454565 }, + { url = "https://files.pythonhosted.org/packages/74/06/90b8a44abf3556599cdec107f7290277ae8901a58f75e6fe8f970cd72418/cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", size = 435635 }, + { url = "https://files.pythonhosted.org/packages/bd/62/a1f468e5708a70b1d86ead5bab5520861d9c7eacce4a885ded9faa7729c3/cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", size = 445218 }, + { url = "https://files.pythonhosted.org/packages/5b/95/b34462f3ccb09c2594aa782d90a90b045de4ff1f70148ee79c69d37a0a5a/cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", size = 460486 }, + { url = "https://files.pythonhosted.org/packages/fc/fc/a1e4bebd8d680febd29cf6c8a40067182b64f00c7d105f8f26b5bc54317b/cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", size = 437911 }, + { url = "https://files.pythonhosted.org/packages/e6/c3/21cab7a6154b6a5ea330ae80de386e7665254835b9e98ecc1340b3a7de9a/cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", size = 460632 }, + { url = "https://files.pythonhosted.org/packages/cb/b5/fd9f8b5a84010ca169ee49f4e4ad6f8c05f4e3545b72ee041dbbcb159882/cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", size = 171820 }, + { url = "https://files.pythonhosted.org/packages/8c/52/b08750ce0bce45c143e1b5d7357ee8c55341b52bdef4b0f081af1eb248c2/cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", size = 181290 }, +] + [[package]] name = "charset-normalizer" version = "3.4.1" @@ -235,9 +312,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "cryptography" +version = "44.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/4c/45dfa6829acffa344e3967d6006ee4ae8be57af746ae2eba1c431949b32c/cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02", size = 710657 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/09/8cc67f9b84730ad330b3b72cf867150744bf07ff113cda21a15a1c6d2c7c/cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123", size = 6541833 }, + { url = "https://files.pythonhosted.org/packages/7e/5b/3759e30a103144e29632e7cb72aec28cedc79e514b2ea8896bb17163c19b/cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092", size = 3922710 }, + { url = "https://files.pythonhosted.org/packages/5f/58/3b14bf39f1a0cfd679e753e8647ada56cddbf5acebffe7db90e184c76168/cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f", size = 4137546 }, + { url = "https://files.pythonhosted.org/packages/98/65/13d9e76ca19b0ba5603d71ac8424b5694415b348e719db277b5edc985ff5/cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb", size = 3915420 }, + { url = "https://files.pythonhosted.org/packages/b1/07/40fe09ce96b91fc9276a9ad272832ead0fddedcba87f1190372af8e3039c/cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b", size = 4154498 }, + { url = "https://files.pythonhosted.org/packages/75/ea/af65619c800ec0a7e4034207aec543acdf248d9bffba0533342d1bd435e1/cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543", size = 3932569 }, + { url = "https://files.pythonhosted.org/packages/c7/af/d1deb0c04d59612e3d5e54203159e284d3e7a6921e565bb0eeb6269bdd8a/cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e", size = 4016721 }, + { url = "https://files.pythonhosted.org/packages/bd/69/7ca326c55698d0688db867795134bdfac87136b80ef373aaa42b225d6dd5/cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e", size = 4240915 }, + { url = "https://files.pythonhosted.org/packages/ef/d4/cae11bf68c0f981e0413906c6dd03ae7fa864347ed5fac40021df1ef467c/cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053", size = 2757925 }, + { url = "https://files.pythonhosted.org/packages/64/b1/50d7739254d2002acae64eed4fc43b24ac0cc44bf0a0d388d1ca06ec5bb1/cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd", size = 3202055 }, + { url = "https://files.pythonhosted.org/packages/11/18/61e52a3d28fc1514a43b0ac291177acd1b4de00e9301aaf7ef867076ff8a/cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591", size = 6542801 }, + { url = "https://files.pythonhosted.org/packages/1a/07/5f165b6c65696ef75601b781a280fc3b33f1e0cd6aa5a92d9fb96c410e97/cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7", size = 3922613 }, + { url = "https://files.pythonhosted.org/packages/28/34/6b3ac1d80fc174812486561cf25194338151780f27e438526f9c64e16869/cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc", size = 4137925 }, + { url = "https://files.pythonhosted.org/packages/d0/c7/c656eb08fd22255d21bc3129625ed9cd5ee305f33752ef2278711b3fa98b/cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289", size = 3915417 }, + { url = "https://files.pythonhosted.org/packages/ef/82/72403624f197af0db6bac4e58153bc9ac0e6020e57234115db9596eee85d/cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7", size = 4155160 }, + { url = "https://files.pythonhosted.org/packages/a2/cd/2f3c440913d4329ade49b146d74f2e9766422e1732613f57097fea61f344/cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c", size = 3932331 }, + { url = "https://files.pythonhosted.org/packages/7f/df/8be88797f0a1cca6e255189a57bb49237402b1880d6e8721690c5603ac23/cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64", size = 4017372 }, + { url = "https://files.pythonhosted.org/packages/af/36/5ccc376f025a834e72b8e52e18746b927f34e4520487098e283a719c205e/cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285", size = 4239657 }, + { url = "https://files.pythonhosted.org/packages/46/b0/f4f7d0d0bcfbc8dd6296c1449be326d04217c57afb8b2594f017eed95533/cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417", size = 2758672 }, + { url = "https://files.pythonhosted.org/packages/97/9b/443270b9210f13f6ef240eff73fd32e02d381e7103969dc66ce8e89ee901/cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede", size = 3202071 }, + { url = "https://files.pythonhosted.org/packages/77/d4/fea74422326388bbac0c37b7489a0fcb1681a698c3b875959430ba550daa/cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731", size = 3338857 }, + { url = "https://files.pythonhosted.org/packages/1a/aa/ba8a7467c206cb7b62f09b4168da541b5109838627f582843bbbe0235e8e/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4", size = 3850615 }, + { url = "https://files.pythonhosted.org/packages/89/fa/b160e10a64cc395d090105be14f399b94e617c879efd401188ce0fea39ee/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756", size = 4081622 }, + { url = "https://files.pythonhosted.org/packages/47/8f/20ff0656bb0cf7af26ec1d01f780c5cfbaa7666736063378c5f48558b515/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c", size = 3867546 }, + { url = "https://files.pythonhosted.org/packages/38/d9/28edf32ee2fcdca587146bcde90102a7319b2f2c690edfa627e46d586050/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa", size = 4090937 }, + { url = "https://files.pythonhosted.org/packages/cc/9d/37e5da7519de7b0b070a3fedd4230fe76d50d2a21403e0f2153d70ac4163/cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c", size = 3128774 }, +] + [[package]] name = "datafusion" -version = "43.0.0" +version = "44.0.0" source = { editable = "." } dependencies = [ { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, @@ -249,6 +363,7 @@ dependencies = [ dev = [ { name = "maturin" }, { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pygithub" }, { name = "pytest" }, { name = "ruff" }, { name = "toml" }, @@ -282,6 +397,7 @@ requires-dist = [ dev = [ { name = "maturin", specifier = ">=1.8.1" }, { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" }, + { name = "pygithub", specifier = "==2.5.0" }, { name = "pytest", specifier = ">=7.4.4" }, { name = "ruff", specifier = ">=0.9.1" }, { name = "toml", specifier = ">=0.10.2" }, @@ -307,6 +423,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, ] +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, +] + [[package]] name = "docutils" version = "0.20.1" @@ -1189,6 +1317,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/d8/94161a7ca5c55199484e926165e9e33f318ea1d1b0d7cdbcbc3652b933ec/pyarrow-18.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052", size = 25301373 }, ] +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + [[package]] name = "pydata-sphinx-theme" version = "0.8.0" @@ -1206,6 +1343,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/26/0694318d46c7d90ab602ae27b24431e939f1600f9a4c69d1e727ec57289f/pydata_sphinx_theme-0.8.0-py3-none-any.whl", hash = "sha256:fbcbb833a07d3ad8dd997dd40dc94da18d98b41c68123ab0182b58fe92271204", size = 3284997 }, ] +[[package]] +name = "pygithub" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "pyjwt", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version < '3.9'" }, + { name = "pyjwt", version = "2.10.1", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version >= '3.9'" }, + { name = "pynacl" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/05/bfbdbbc5d8aafd8dae9b3b6877edca561fccd8528ef5edc4e7b6d23721b5/PyGithub-2.5.0-py3-none-any.whl", hash = "sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2", size = 375935 }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -1215,6 +1371,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pyjwt" +version = "2.9.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/68/ce067f09fca4abeca8771fe667d89cc347d1e99da3e093112ac329c6020e/pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c", size = 78825 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/84/0fdf9b18ba31d69877bd39c9cd6052b47f3761e9910c15de788e519f079f/PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850", size = 22344 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography", marker = "python_full_version < '3.9'" }, +] + +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography", marker = "python_full_version >= '3.9'" }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + [[package]] name = "pytest" version = "8.3.4" @@ -1817,6 +2030,92 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486 }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777 }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314 }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947 }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778 }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716 }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548 }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334 }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427 }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774 }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488 }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776 }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420 }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307 }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025 }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879 }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419 }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773 }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799 }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821 }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919 }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721 }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899 }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222 }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707 }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685 }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567 }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672 }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865 }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/0c/66/95b9e90e6e1274999b183c9c3f984996d870e933ca9560115bd1cd1d6f77/wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9", size = 53234 }, + { url = "https://files.pythonhosted.org/packages/a4/b6/6eced5e2db5924bf6d9223d2bb96b62e00395aae77058e6a9e11bf16b3bd/wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119", size = 38462 }, + { url = "https://files.pythonhosted.org/packages/5d/a4/c8472fe2568978b5532df84273c53ddf713f689d408a4335717ab89547e0/wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6", size = 38730 }, + { url = "https://files.pythonhosted.org/packages/3c/70/1d259c6b1ad164eb23ff70e3e452dd1950f96e6473f72b7207891d0fd1f0/wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9", size = 86225 }, + { url = "https://files.pythonhosted.org/packages/a9/68/6b83367e1afb8de91cbea4ef8e85b58acdf62f034f05d78c7b82afaa23d8/wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a", size = 78055 }, + { url = "https://files.pythonhosted.org/packages/0d/21/09573d2443916705c57fdab85d508f592c0a58d57becc53e15755d67fba2/wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2", size = 85592 }, + { url = "https://files.pythonhosted.org/packages/45/ce/700e17a852dd5dec894e241c72973ea82363486bcc1fb05d47b4fbd1d683/wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a", size = 83906 }, + { url = "https://files.pythonhosted.org/packages/37/14/bd210faf0a66faeb8529d42b6b45a25d6aa6ce25ddfc19168e4161aed227/wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04", size = 76763 }, + { url = "https://files.pythonhosted.org/packages/34/0c/85af70d291f44659c422416f0272046109e785bf6db8c081cfeeae5715c5/wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f", size = 83573 }, + { url = "https://files.pythonhosted.org/packages/f8/1e/b215068e824878f69ea945804fa26c176f7c2735a3ad5367d78930bd076a/wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7", size = 36408 }, + { url = "https://files.pythonhosted.org/packages/52/27/3dd9ad5f1097b33c95d05929e409cc86d7c765cb5437b86694dc8f8e9af0/wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3", size = 38737 }, + { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 }, + { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/cf/cb/7a07b51762dcd59bdbe07aa97f87b3169766cadf240f48d1cbe70a1be9db/wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9", size = 83050 }, + { url = "https://files.pythonhosted.org/packages/a5/51/a42757dd41032afd6d8037617aa3bc6803ba971850733b24dfb7d5c627c4/wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f", size = 74718 }, + { url = "https://files.pythonhosted.org/packages/bf/bb/d552bfe47db02fcfc950fc563073a33500f8108efa5f7b41db2f83a59028/wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b", size = 82590 }, + { url = "https://files.pythonhosted.org/packages/77/99/77b06b3c3c410dbae411105bf22496facf03a5496bfaca8fbcf9da381889/wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f", size = 81462 }, + { url = "https://files.pythonhosted.org/packages/2d/21/cf0bd85ae66f92600829ea1de8e1da778e5e9f6e574ccbe74b66db0d95db/wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8", size = 74309 }, + { url = "https://files.pythonhosted.org/packages/6d/16/112d25e9092398a0dd6fec50ab7ac1b775a0c19b428f049785096067ada9/wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9", size = 81081 }, + { url = "https://files.pythonhosted.org/packages/2b/49/364a615a0cc0872685646c495c7172e4fc7bf1959e3b12a1807a03014e05/wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb", size = 36423 }, + { url = "https://files.pythonhosted.org/packages/00/ad/5d2c1b34ba3202cd833d9221833e74d6500ce66730974993a8dc9a94fb8c/wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb", size = 38772 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +] + [[package]] name = "zipp" version = "3.20.2" From d635d56ecdc0cf2667c01cfcc51f26733ec796dc Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 7 Feb 2025 14:36:54 -0500 Subject: [PATCH 043/248] Chore/upgrade datafusion 45 (#1010) * upgrade dep * resolve errors * match new pyo3 version * upgrade dep * back to 43 * use released v45 * remove unnecessary pyarrow feature * Update unit test return type * fix test_relational_expr --------- Co-authored-by: Tim Saucer --- Cargo.lock | 400 +++++++++++--------- Cargo.toml | 16 +- examples/ffi-table-provider/Cargo.lock | 493 ++++++++++++++----------- examples/ffi-table-provider/Cargo.toml | 14 +- examples/ffi-table-provider/src/lib.rs | 2 +- python/tests/test_expr.py | 3 +- python/tests/test_functions.py | 2 +- src/context.rs | 2 +- src/dataframe.rs | 2 +- 9 files changed, 514 insertions(+), 420 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50809696b..f1b1ed50a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf3437355979f1e93ba84ba108c38be5767713051f3c8ffbf07c094e2e61f9f" +checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" dependencies = [ "arrow-arith", "arrow-array", @@ -201,24 +201,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31dce77d2985522288edae7206bffd5fc4996491841dda01a13a58415867e681" +checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half", "num", ] [[package]] name = "arrow-array" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d45fe6d3faed0435b7313e59a02583b14c6c6339fa7729e94c32a20af319a79" +checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" dependencies = [ "ahash", "arrow-buffer", @@ -233,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b02656a35cc103f28084bc80a0159668e0a680d919cef127bd7e0aaccb06ec1" +checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" dependencies = [ "bytes", "half", @@ -244,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c73c6233c5b5d635a56f6010e6eb1ab9e30e94707db21cea03da317f67d84cf3" +checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,28 +264,25 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec222848d70fea5a32af9c3602b08f5d740d5e2d33fbd76bf6fd88759b5b13a7" +checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" dependencies = [ "arrow-array", - "arrow-buffer", "arrow-cast", - "arrow-data", "arrow-schema", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", "regex", ] [[package]] name = "arrow-data" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f2861ffa86f107b8ab577d86cff7c7a490243eabe961ba1e1af4f27542bb79" +checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" dependencies = [ "arrow-buffer", "arrow-schema", @@ -296,13 +292,12 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0270dc511f11bb5fa98a25020ad51a99ca5b08d8a8dfbd17503bb9dba0388f0b" +checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-schema", "flatbuffers", @@ -311,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eff38eeb8a971ad3a4caf62c5d57f0cff8a48b64a55e3207c4fd696a9234aad" +checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" dependencies = [ "arrow-array", "arrow-buffer", @@ -331,26 +326,23 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6f202a879d287099139ff0d121e7f55ae5e0efe634b8cf2106ebc27a8715dee" +checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half", - "num", ] [[package]] name = "arrow-row" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f936954991c360ba762dff23f5dda16300774fafd722353d9683abd97630ae" +checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" dependencies = [ - "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -360,18 +352,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e" +checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7471ba126d0b0aaa24b50a36bc6c25e4e74869a1fd1a5553357027a0b1c8d1f1" +checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" dependencies = [ "ahash", "arrow-array", @@ -383,9 +375,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72993b01cb62507b06f1fb49648d7286c8989ecfabdb7b77a750fcb54410731b" +checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" dependencies = [ "arrow-array", "arrow-buffer", @@ -444,7 +436,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -455,7 +447,7 @@ checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -598,9 +590,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "bzip2" @@ -635,9 +627,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" +checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2" dependencies = [ "jobserver", "libc", @@ -874,9 +866,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9" +checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" dependencies = [ "apache-avro", "arrow", @@ -888,7 +880,6 @@ dependencies = [ "bytes", "bzip2 0.5.0", "chrono", - "dashmap", "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", @@ -908,7 +899,7 @@ dependencies = [ "flate2", "futures", "glob", - "itertools", + "itertools 0.14.0", "log", "num-traits", "object_store", @@ -928,31 +919,39 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e" +checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" dependencies = [ - "arrow-schema", + "arrow", "async-trait", + "dashmap", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", "parking_lot", + "sqlparser", ] [[package]] name = "datafusion-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b" +checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" dependencies = [ "ahash", "apache-avro", "arrow", "arrow-array", "arrow-buffer", + "arrow-ipc", "arrow-schema", + "base64 0.22.1", "half", "hashbrown 0.14.5", "indexmap", @@ -969,9 +968,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72fbf14d4079f7ce5306393084fe5057dddfdc2113577e0049310afa12e94281" +checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" dependencies = [ "log", "tokio", @@ -979,15 +978,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3" +checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" [[package]] name = "datafusion-execution" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486" +checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" dependencies = [ "arrow", "dashmap", @@ -1004,9 +1003,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c" +checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" dependencies = [ "arrow", "chrono", @@ -1025,23 +1024,26 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55" +checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" dependencies = [ "arrow", "datafusion-common", - "itertools", + "itertools 0.14.0", + "paste", ] [[package]] name = "datafusion-ffi" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "114e944790756b84c2cc5971eae24f5430980149345601939ac222885d4db5f7" +checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" dependencies = [ "abi_stable", "arrow", + "arrow-array", + "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1049,13 +1051,15 @@ dependencies = [ "futures", "log", "prost", + "semver", + "tokio", ] [[package]] name = "datafusion-functions" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da" +checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" dependencies = [ "arrow", "arrow-buffer", @@ -1071,7 +1075,7 @@ dependencies = [ "datafusion-macros", "hashbrown 0.14.5", "hex", - "itertools", + "itertools 0.14.0", "log", "md-5", "rand", @@ -1083,12 +1087,13 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9" +checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" dependencies = [ "ahash", "arrow", + "arrow-buffer", "arrow-schema", "datafusion-common", "datafusion-doc", @@ -1105,9 +1110,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c" +checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" dependencies = [ "ahash", "arrow", @@ -1118,9 +1123,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5" +checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" dependencies = [ "arrow", "arrow-array", @@ -1128,21 +1133,23 @@ dependencies = [ "arrow-ord", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-macros", "datafusion-physical-expr-common", - "itertools", + "itertools 0.14.0", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1" +checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" dependencies = [ "arrow", "async-trait", @@ -1156,9 +1163,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8" +checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1173,9 +1180,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6" +checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1183,19 +1190,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" +checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" dependencies = [ + "datafusion-expr", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "datafusion-optimizer" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75" +checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" dependencies = [ "arrow", "chrono", @@ -1203,7 +1211,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "indexmap", - "itertools", + "itertools 0.14.0", "log", "recursive", "regex", @@ -1212,9 +1220,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd" +checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" dependencies = [ "ahash", "arrow", @@ -1229,48 +1237,54 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.7.1", ] [[package]] name = "datafusion-physical-expr-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5" +checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" dependencies = [ "ahash", "arrow", + "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "itertools", + "itertools 0.14.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351" +checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" dependencies = [ "arrow", + "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools", + "futures", + "itertools 0.14.0", "log", "recursive", + "url", ] [[package]] name = "datafusion-physical-plan" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da" +checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" dependencies = [ "ahash", "arrow", @@ -1291,7 +1305,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", @@ -1300,9 +1314,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e23b0998195e495bfa7b37cdceb317129a6c40522219f6872d2e0c9ae9f4fcb" +checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" dependencies = [ "arrow", "chrono", @@ -1316,9 +1330,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfc59992a29eed2d2c1dd779deac99083b217774ebcf90ee121840607a4d866f" +checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" dependencies = [ "arrow", "datafusion-common", @@ -1350,9 +1364,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc" +checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" dependencies = [ "arrow", "arrow-array", @@ -1369,16 +1383,16 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ec36dd38512b1ecc7a3bb92e72046b944611b2f0d709445c1e51b0143bffd4" +checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944" dependencies = [ "arrow-buffer", "async-recursion", "async-trait", "chrono", "datafusion", - "itertools", + "itertools 0.14.0", "object_store", "pbjson-types", "prost", @@ -1405,7 +1419,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1448,6 +1462,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flatbuffers" version = "24.12.23" @@ -1545,7 +1565,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1938,7 +1958,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1999,6 +2019,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.14" @@ -2362,7 +2391,7 @@ dependencies = [ "httparse", "humantime", "hyper", - "itertools", + "itertools 0.13.0", "md-5", "parking_lot", "percent-encoding", @@ -2382,9 +2411,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" [[package]] name = "openssl-probe" @@ -2426,9 +2455,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8957c0c95a6a1804f3e51a18f69df29be53856a8c5768cc9b6d00fcafcd2917c" +checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" dependencies = [ "ahash", "arrow-array", @@ -2452,6 +2481,7 @@ dependencies = [ "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", @@ -2492,7 +2522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ "heck", - "itertools", + "itertools 0.13.0", "prost", "prost-types", ] @@ -2524,7 +2554,17 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset", + "fixedbitset 0.4.2", + "indexmap", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset 0.5.7", "indexmap", ] @@ -2606,7 +2646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2635,16 +2675,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" dependencies = [ "heck", - "itertools", + "itertools 0.13.0", "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.6.5", "prettyplease", "prost", "prost-types", "regex", - "syn 2.0.96", + "syn 2.0.98", "tempfile", ] @@ -2655,10 +2695,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2690,9 +2730,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" dependencies = [ "cfg-if", "indoc", @@ -2708,9 +2748,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes" -version = "0.22.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2529f0be73ffd2be0cc43c013a640796558aa12d7ca0aab5cc14f375b4733031" +checksum = "977dc837525cfd22919ba6a831413854beb7c99a256c03bf8624ad707e45810e" dependencies = [ "futures", "once_cell", @@ -2721,9 +2761,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" dependencies = [ "once_cell", "target-lexicon", @@ -2731,9 +2771,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" dependencies = [ "libc", "pyo3-build-config", @@ -2741,27 +2781,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2888,7 +2928,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3029,9 +3069,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustc_version" @@ -3161,7 +3201,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3234,7 +3274,7 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3245,7 +3285,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3269,7 +3309,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3314,6 +3354,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "1.0.1" @@ -3353,7 +3399,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3396,7 +3442,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3440,14 +3486,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "substrait" -version = "0.50.4" +version = "0.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1772d041c37cc7e6477733c76b2acf4ee36bd52b2ae4d9ea0ec9c87d003db32" +checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756" dependencies = [ "heck", "pbjson", @@ -3464,7 +3510,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.96", + "syn 2.0.98", "typify", "walkdir", ] @@ -3488,9 +3534,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.96" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -3514,7 +3560,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3563,7 +3609,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3574,7 +3620,7 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3646,7 +3692,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3718,7 +3764,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3784,7 +3830,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3795,9 +3841,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" +checksum = "e03ba3643450cfd95a1aca2e1938fef63c1c1994489337998aff4ad771f21ef8" dependencies = [ "typify-impl", "typify-macro", @@ -3805,9 +3851,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" +checksum = "bce48219a2f3154aaa2c56cbf027728b24a3c8fe0a47ed6399781de2b3f3eeaf" dependencies = [ "heck", "log", @@ -3818,16 +3864,16 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.96", - "thiserror 1.0.69", + "syn 2.0.98", + "thiserror 2.0.11", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" +checksum = "68b5780d745920ed73c5b7447496a9b5c42ed2681a9b70859377aec423ecf02b" dependencies = [ "proc-macro2", "quote", @@ -3836,7 +3882,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.96", + "syn 2.0.98", "typify-impl", ] @@ -3901,11 +3947,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.12.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" +checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.3.1", "serde", ] @@ -3971,7 +4017,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "wasm-bindgen-shared", ] @@ -4006,7 +4052,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4255,7 +4301,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "synstructure", ] @@ -4277,7 +4323,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4297,7 +4343,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "synstructure", ] @@ -4326,7 +4372,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 44e6e2244..d18e0e8f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,13 +35,13 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } -pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } -pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} -arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "44.0.0", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "44.0.0", optional = true } -datafusion-proto = { version = "44.0.0" } -datafusion-ffi = { version = "44.0.0" } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} +arrow = { version = "54", features = ["pyarrow"] } +datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "45.0.0", optional = true } +datafusion-proto = { version = "45.0.0" } +datafusion-ffi = { version = "45.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.12", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } @@ -52,7 +52,7 @@ url = "2" [build-dependencies] prost-types = "0.13" # keep in line with `datafusion-substrait` -pyo3-build-config = "0.22" +pyo3-build-config = "0.23" [lib] name = "datafusion_python" diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock index 3b57cac75..32af85180 100644 --- a/examples/ffi-table-provider/Cargo.lock +++ b/examples/ffi-table-provider/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "abi_stable" @@ -144,9 +144,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" dependencies = [ "arrow-arith", "arrow-array", @@ -165,24 +165,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half", "num", ] [[package]] name = "arrow-array" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" dependencies = [ "ahash", "arrow-buffer", @@ -191,15 +190,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "num", ] [[package]] name = "arrow-buffer" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" dependencies = [ "bytes", "half", @@ -208,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" dependencies = [ "arrow-array", "arrow-buffer", @@ -229,28 +228,25 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" dependencies = [ "arrow-array", - "arrow-buffer", "arrow-cast", - "arrow-data", "arrow-schema", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", "regex", ] [[package]] name = "arrow-data" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" dependencies = [ "arrow-buffer", "arrow-schema", @@ -260,13 +256,12 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-schema", "flatbuffers", @@ -275,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" dependencies = [ "arrow-array", "arrow-buffer", @@ -295,26 +290,23 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half", - "num", ] [[package]] name = "arrow-row" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" dependencies = [ - "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -324,18 +316,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" dependencies = [ "ahash", "arrow-array", @@ -347,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" dependencies = [ "arrow-array", "arrow-buffer", @@ -380,10 +372,9 @@ version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ - "bzip2", + "bzip2 0.4.4", "flate2", "futures-core", - "futures-io", "memchr", "pin-project-lite", "tokio", @@ -448,6 +439,19 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bigdecimal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -540,6 +544,16 @@ dependencies = [ "libc", ] +[[package]] +name = "bzip2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -751,11 +765,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", @@ -763,9 +775,8 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.5.0", "chrono", - "dashmap", "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", @@ -774,6 +785,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -784,18 +796,13 @@ dependencies = [ "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", - "itertools", + "itertools 0.14.0", "log", - "num_cpus", "object_store", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", + "regex", "sqlparser", "tempfile", "tokio", @@ -808,67 +815,74 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ - "arrow-schema", + "arrow", "async-trait", + "dashmap", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", "parking_lot", + "sqlparser", ] [[package]] name = "datafusion-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", + "arrow-ipc", "arrow-schema", - "chrono", + "base64", "half", "hashbrown 0.14.5", "indexmap", - "instant", "libc", - "num_cpus", + "log", "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" + [[package]] name = "datafusion-execution" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store", "parking_lot", @@ -879,63 +893,59 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", - "strum", - "strum_macros", ] [[package]] name = "datafusion-expr-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "datafusion-common", - "itertools", + "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "abi_stable", "arrow", + "arrow-array", + "arrow-schema", "async-ffi", "async-trait", "datafusion", "datafusion-proto", - "doc-comment", "futures", "log", "prost", + "semver", + "tokio", ] [[package]] name = "datafusion-functions" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-buffer", @@ -944,11 +954,14 @@ dependencies = [ "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hashbrown 0.14.5", "hex", - "itertools", + "itertools 0.14.0", "log", "md-5", "rand", @@ -960,44 +973,42 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", + "arrow-buffer", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap", "log", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-array", @@ -1005,26 +1016,43 @@ dependencies = [ "arrow-ord", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-macros", "datafusion-physical-expr-common", - "itertools", + "itertools 0.14.0", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", "datafusion-functions-window-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "log", @@ -1033,48 +1061,51 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.87", +] + [[package]] name = "datafusion-optimizer" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", - "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1083,7 +1114,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "paste", "petgraph", @@ -1091,39 +1122,43 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", + "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.14.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools", + "futures", + "itertools 0.14.0", + "log", + "recursive", + "url", ] [[package]] name = "datafusion-physical-plan" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", @@ -1137,7 +1172,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1145,20 +1179,17 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-proto" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "chrono", @@ -1172,33 +1203,30 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", - "chrono", "datafusion-common", - "object_store", "prost", ] [[package]] name = "datafusion-sql" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-array", "arrow-schema", + "bigdecimal", "datafusion-common", "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", - "strum", ] [[package]] @@ -1223,12 +1251,6 @@ dependencies = [ "syn 2.0.87", ] -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "either" version = "1.13.0" @@ -1272,15 +1294,15 @@ dependencies = [ [[package]] name = "fixedbitset" -version = "0.4.2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "24.3.25" +version = "24.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1469,12 +1491,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hex" version = "0.4.3" @@ -1651,9 +1667,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.1", @@ -1665,18 +1681,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -1692,6 +1696,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -1964,16 +1977,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.36.5" @@ -1994,7 +1997,7 @@ dependencies = [ "chrono", "futures", "humantime", - "itertools", + "itertools 0.13.0", "parking_lot", "percent-encoding", "snafu", @@ -2044,9 +2047,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" dependencies = [ "ahash", "arrow-array", @@ -2063,13 +2066,14 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "lz4_flex", "num", "num-bigint", "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", @@ -2101,9 +2105,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.6.5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", "indexmap", @@ -2206,17 +2210,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.87", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" dependencies = [ "cfg-if", "indoc", @@ -2232,9 +2245,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" dependencies = [ "once_cell", "target-lexicon", @@ -2242,9 +2255,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" dependencies = [ "libc", "pyo3-build-config", @@ -2252,9 +2265,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2264,9 +2277,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" dependencies = [ "heck", "proc-macro2", @@ -2314,6 +2327,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.87", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -2418,9 +2451,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.23" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] name = "seq-macro" @@ -2477,6 +2510,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "0.3.11" @@ -2527,9 +2566,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.51.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -2537,9 +2576,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", @@ -2552,6 +2591,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -2563,9 +2615,6 @@ name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros", -] [[package]] name = "strum_macros" @@ -2798,9 +2847,9 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.5.3" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -2906,10 +2955,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] -name = "web-sys" -version = "0.3.72" +name = "web-time" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml index 4e6f91f33..0e558fdd0 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-table-provider/Cargo.toml @@ -21,15 +21,15 @@ version = "0.1.0" edition = "2021" [dependencies] -datafusion = { version = "44.0.0" } -datafusion-ffi = { version = "44.0.0" } -pyo3 = { version = "0.22.6", features = ["extension-module", "abi3", "abi3-py38"] } -arrow = { version = "53.2.0" } -arrow-array = { version = "53.2.0" } -arrow-schema = { version = "53.2.0" } +datafusion = { version = "45.0.0" } +datafusion-ffi = { version = "45.0.0" } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +arrow = { version = "54" } +arrow-array = { version = "54" } +arrow-schema = { version = "54" } [build-dependencies] -pyo3-build-config = "0.22.6" +pyo3-build-config = "0.23" [lib] name = "ffi_table_provider" diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/ffi-table-provider/src/lib.rs index 473244d88..88deeece2 100644 --- a/examples/ffi-table-provider/src/lib.rs +++ b/examples/ffi-table-provider/src/lib.rs @@ -102,7 +102,7 @@ impl MyTableProvider { let provider = self .create_table() .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; - let provider = FFI_TableProvider::new(Arc::new(provider), false); + let provider = FFI_TableProvider::new(Arc::new(provider), false, None); PyCapsule::new_bound(py, provider, Some(name.clone())) } diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 77f88aa44..354c7e180 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -148,8 +148,7 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") == "beta").count() == 1 assert df.filter(col("b") != "beta").count() == 2 - with pytest.raises(Exception): - df.filter(col("a") == "beta").count() + assert df.filter(col("a") == "beta").count() == 0 def test_expr_to_variant(): diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index ad6aa7c0a..796b1f76e 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -732,7 +732,7 @@ def test_array_function_obj_tests(stmt, py_expr): ), ( f.regexp_match(column("a"), literal("(ell|orl)")), - pa.array([["ell"], ["orl"], None]), + pa.array([["ell"], ["orl"], None], type=pa.list_(pa.string_view())), ), ( f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), diff --git a/src/context.rs b/src/context.rs index f53b15576..ebe7db230 100644 --- a/src/context.rs +++ b/src/context.rs @@ -48,7 +48,7 @@ use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog_common::TableReference; +use datafusion::common::TableReference; use datafusion::common::{exec_err, ScalarValue}; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; use datafusion::datasource::file_format::parquet::ParquetFormat; diff --git a/src/dataframe.rs b/src/dataframe.rs index 6fb08ba25..13d7ae838 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -546,7 +546,7 @@ impl PyDataFrame { /// Collect the batches and pass to Arrow Table fn to_arrow_table(&self, py: Python<'_>) -> PyResult { let batches = self.collect(py)?.to_object(py); - let schema: PyObject = self.schema().into_py(py); + let schema: PyObject = self.schema().into_pyobject(py)?.to_object(py); // Instantiate pyarrow Table object and use its from_batches method let table_class = py.import_bound("pyarrow")?.getattr("Table")?; From 40a61c150adee6beb9961302fece81c33639082e Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 16 Feb 2025 02:31:00 +0800 Subject: [PATCH 044/248] add to_timestamp_nanos (#1020) --- python/datafusion/functions.py | 1 + python/tests/test_functions.py | 4 ++++ src/functions.rs | 2 ++ 3 files changed, 7 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 7c2fa9a8f..5c260aade 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -252,6 +252,7 @@ "to_hex", "to_timestamp", "to_timestamp_micros", + "to_timestamp_nanos", "to_timestamp_millis", "to_timestamp_seconds", "to_unixtime", diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 796b1f76e..b1a739b49 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -871,6 +871,7 @@ def test_temporal_functions(df): f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), f.extract(literal("day"), column("d")), + f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")), ) result = df.collect() assert len(result) == 1 @@ -909,6 +910,9 @@ def test_temporal_functions(df): [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) assert result.column(10) == pa.array([31, 26, 2], type=pa.int32()) + assert result.column(11) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) def test_arrow_cast(df): diff --git a/src/functions.rs b/src/functions.rs index 46c748cf8..6a8abb18d 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -553,6 +553,7 @@ expr_fn!( expr_fn!(now); expr_fn_vec!(to_timestamp); expr_fn_vec!(to_timestamp_millis); +expr_fn_vec!(to_timestamp_nanos); expr_fn_vec!(to_timestamp_micros); expr_fn_vec!(to_timestamp_seconds); expr_fn_vec!(to_unixtime); @@ -977,6 +978,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(to_hex))?; m.add_wrapped(wrap_pyfunction!(to_timestamp))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?; + m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_micros))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_seconds))?; m.add_wrapped(wrap_pyfunction!(to_unixtime))?; From 3584bec8900bcfb33bcae4b85a3c47a46b82c72e Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Wed, 19 Feb 2025 20:50:31 -0500 Subject: [PATCH 045/248] [infra] Fail Clippy on rust build warnings (#1029) * pyo3 update required changes to deprecated interfaces * Substrait feature clippy updates * PyTuple was called twice * add -D warnings option --------- Co-authored-by: Tim Saucer --- .github/workflows/test.yaml | 2 +- .pre-commit-config.yaml | 2 +- src/config.rs | 10 +++--- src/context.rs | 12 +++---- src/dataframe.rs | 17 +++++---- src/dataset.rs | 2 +- src/dataset_exec.rs | 8 ++--- src/errors.rs | 4 +++ src/expr.rs | 61 ++++++++++++++++---------------- src/expr/aggregate.rs | 6 ++-- src/expr/analyze.rs | 6 ++-- src/expr/create_memory_table.rs | 6 ++-- src/expr/create_view.rs | 6 ++-- src/expr/distinct.rs | 6 ++-- src/expr/drop_table.rs | 6 ++-- src/expr/empty_relation.rs | 6 ++-- src/expr/explain.rs | 6 ++-- src/expr/extension.rs | 6 ++-- src/expr/filter.rs | 6 ++-- src/expr/join.rs | 6 ++-- src/expr/limit.rs | 6 ++-- src/expr/literal.rs | 6 ++-- src/expr/logical_node.rs | 4 +-- src/expr/projection.rs | 6 ++-- src/expr/repartition.rs | 6 ++-- src/expr/sort.rs | 6 ++-- src/expr/subquery.rs | 6 ++-- src/expr/subquery_alias.rs | 6 ++-- src/expr/table_scan.rs | 6 ++-- src/expr/union.rs | 6 ++-- src/expr/unnest.rs | 6 ++-- src/expr/window.rs | 6 ++-- src/lib.rs | 10 +++--- src/physical_plan.rs | 2 +- src/pyarrow_filter_expression.rs | 36 ++++++++++--------- src/pyarrow_util.rs | 4 +-- src/sql/logical.rs | 4 +-- src/substrait.rs | 4 +-- src/udaf.rs | 5 +-- src/udf.rs | 5 +-- src/udwf.rs | 44 +++++++++++------------ 41 files changed, 188 insertions(+), 180 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c93d4c06f..c1d9ac838 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -71,7 +71,7 @@ jobs: - name: Run Clippy if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - run: cargo clippy --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure + run: cargo clippy --all-targets --all-features -- -D clippy::all -D warnings -A clippy::redundant_closure - name: Install dependencies and build uses: astral-sh/setup-uv@v5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e20fedf5c..b548ff18f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: - id: rust-clippy name: Rust clippy description: Run cargo clippy on files included in the commit. clippy should be installed before-hand. - entry: cargo clippy --all-targets --all-features -- -Dclippy::all -Aclippy::redundant_closure + entry: cargo clippy --all-targets --all-features -- -Dclippy::all -D warnings -Aclippy::redundant_closure pass_filenames: false types: [file, rust] language: system diff --git a/src/config.rs b/src/config.rs index cc725b9a3..667d5c590 100644 --- a/src/config.rs +++ b/src/config.rs @@ -47,14 +47,14 @@ impl PyConfig { } /// Get a configuration option - pub fn get(&mut self, key: &str, py: Python) -> PyResult { + pub fn get<'py>(&mut self, key: &str, py: Python<'py>) -> PyResult> { let options = self.config.to_owned(); for entry in options.entries() { if entry.key == key { - return Ok(entry.value.into_py(py)); + return Ok(entry.value.into_pyobject(py)?); } } - Ok(None::.into_py(py)) + Ok(None::.into_pyobject(py)?) } /// Set a configuration option @@ -66,10 +66,10 @@ impl PyConfig { /// Get all configuration options pub fn get_all(&mut self, py: Python) -> PyResult { - let dict = PyDict::new_bound(py); + let dict = PyDict::new(py); let options = self.config.to_owned(); for entry in options.entries() { - dict.set_item(entry.key, entry.value.clone().into_py(py))?; + dict.set_item(entry.key, entry.value.clone().into_pyobject(py)?)?; } Ok(dict.into()) } diff --git a/src/context.rs b/src/context.rs index ebe7db230..0f962638e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -458,8 +458,8 @@ impl PySessionContext { let py = data.py(); // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[data]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[data])?; let table = table_class.call_method1("from_pylist", args)?; // Convert Arrow Table to datafusion DataFrame @@ -478,8 +478,8 @@ impl PySessionContext { let py = data.py(); // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[data]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[data])?; let table = table_class.call_method1("from_pydict", args)?; // Convert Arrow Table to datafusion DataFrame @@ -533,8 +533,8 @@ impl PySessionContext { let py = data.py(); // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[data]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[data])?; let table = table_class.call_method1("from_pandas", args)?; // Convert Arrow Table to datafusion DataFrame diff --git a/src/dataframe.rs b/src/dataframe.rs index 13d7ae838..ed9578a71 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -545,12 +545,12 @@ impl PyDataFrame { /// Convert to Arrow Table /// Collect the batches and pass to Arrow Table fn to_arrow_table(&self, py: Python<'_>) -> PyResult { - let batches = self.collect(py)?.to_object(py); - let schema: PyObject = self.schema().into_pyobject(py)?.to_object(py); + let batches = self.collect(py)?.into_pyobject(py)?; + let schema = self.schema().into_pyobject(py)?; // Instantiate pyarrow Table object and use its from_batches method - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[batches, schema]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[batches, schema])?; let table: PyObject = table_class.call_method1("from_batches", args)?.into(); Ok(table) } @@ -585,8 +585,7 @@ impl PyDataFrame { let ffi_stream = FFI_ArrowArrayStream::new(reader); let stream_capsule_name = CString::new("arrow_array_stream").unwrap(); - PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name)) - .map_err(PyDataFusionError::from) + PyCapsule::new(py, ffi_stream, Some(stream_capsule_name)).map_err(PyDataFusionError::from) } fn execute_stream(&self, py: Python) -> PyDataFusionResult { @@ -649,8 +648,8 @@ impl PyDataFrame { /// Collect the batches, pass to Arrow Table & then convert to polars DataFrame fn to_polars(&self, py: Python<'_>) -> PyResult { let table = self.to_arrow_table(py)?; - let dataframe = py.import_bound("polars")?.getattr("DataFrame")?; - let args = PyTuple::new_bound(py, &[table]); + let dataframe = py.import("polars")?.getattr("DataFrame")?; + let args = PyTuple::new(py, &[table])?; let result: PyObject = dataframe.call1(args)?.into(); Ok(result) } @@ -673,7 +672,7 @@ fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> { // Import the Python 'builtins' module to access the print function // Note that println! does not print to the Python debug console and is not visible in notebooks for instance - let print = py.import_bound("builtins")?.getattr("print")?; + let print = py.import("builtins")?.getattr("print")?; print.call1((result,))?; Ok(()) } diff --git a/src/dataset.rs b/src/dataset.rs index a8fa21ec5..0baf4da2a 100644 --- a/src/dataset.rs +++ b/src/dataset.rs @@ -48,7 +48,7 @@ impl Dataset { // Creates a Python PyArrow.Dataset pub fn new(dataset: &Bound<'_, PyAny>, py: Python) -> PyResult { // Ensure that we were passed an instance of pyarrow.dataset.Dataset - let ds = PyModule::import_bound(py, "pyarrow.dataset")?; + let ds = PyModule::import(py, "pyarrow.dataset")?; let ds_attr = ds.getattr("Dataset")?; let ds_type = ds_attr.downcast::()?; if dataset.is_instance(ds_type)? { diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index ace42115b..445e4fe74 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -104,7 +104,7 @@ impl DatasetExec { }) .transpose()?; - let kwargs = PyDict::new_bound(py); + let kwargs = PyDict::new(py); kwargs.set_item("columns", columns.clone())?; kwargs.set_item( @@ -121,7 +121,7 @@ impl DatasetExec { .0, ); - let builtins = Python::import_bound(py, "builtins")?; + let builtins = Python::import(py, "builtins")?; let pylist = builtins.getattr("list")?; // Get the fragments or partitions of the dataset @@ -198,7 +198,7 @@ impl ExecutionPlan for DatasetExec { let dataset_schema = dataset .getattr("schema") .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - let kwargs = PyDict::new_bound(py); + let kwargs = PyDict::new(py); kwargs .set_item("columns", self.columns.clone()) .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; @@ -223,7 +223,7 @@ impl ExecutionPlan for DatasetExec { let record_batches: Bound<'_, PyIterator> = scanner .call_method0("to_batches") .map_err(|err| InnerDataFusionError::External(Box::new(err)))? - .iter() + .try_iter() .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; let record_batches = PyArrowBatchesAdapter { diff --git a/src/errors.rs b/src/errors.rs index b02b754a2..f1d5aeb23 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -91,3 +91,7 @@ pub fn py_datafusion_err(e: impl Debug) -> PyErr { pub fn py_unsupported_variant_err(e: impl Debug) -> PyErr { PyErr::new::(format!("{e:?}")) } + +pub fn to_datafusion_err(e: impl Debug) -> InnerDataFusionError { + InnerDataFusionError::Execution(format!("{e:?}")) +} diff --git a/src/expr.rs b/src/expr.rs index 1e9983d42..e750be6a4 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -19,6 +19,7 @@ use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, }; +use pyo3::IntoPyObjectExt; use pyo3::{basic::CompareOp, prelude::*}; use std::convert::{From, Into}; use std::sync::Arc; @@ -126,35 +127,35 @@ pub fn py_expr_list(expr: &[Expr]) -> PyResult> { #[pymethods] impl PyExpr { /// Return the specific expression - fn to_variant(&self, py: Python) -> PyResult { + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { Python::with_gil(|_| { match &self.expr { - Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_py(py)), - Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_py(py)), + Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_bound_py_any(py)?), + Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_bound_py_any(py)?), Expr::ScalarVariable(data_type, variables) => { - Ok(PyScalarVariable::new(data_type, variables).into_py(py)) + Ok(PyScalarVariable::new(data_type, variables).into_bound_py_any(py)?) } - Expr::Like(value) => Ok(PyLike::from(value.clone()).into_py(py)), - Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_py(py)), - Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_py(py)), - Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_py(py)), - Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_py(py)), - Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_py(py)), - Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_py(py)), - Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_py(py)), - Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_py(py)), - Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_py(py)), - Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_py(py)), - Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_py(py)), - Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_py(py)), + Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?), + Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_bound_py_any(py)?), + Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?), + Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_bound_py_any(py)?), + Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_bound_py_any(py)?), Expr::AggregateFunction(expr) => { - Ok(PyAggregateFunction::from(expr.clone()).into_py(py)) + Ok(PyAggregateFunction::from(expr.clone()).into_bound_py_any(py)?) } - Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_py(py)), - Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_py(py)), - Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_py(py)), - Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), - Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), + Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_bound_py_any(py)?), + Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_bound_py_any(py)?), + Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_bound_py_any(py)?), + Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?), + Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?), Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}", value @@ -163,29 +164,29 @@ impl PyExpr { "Converting Expr::WindowFunction to a Python object is not implemented: {:?}", value ))), - Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), - Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)), + Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?), + Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?), Expr::InSubquery(value) => { - Ok(in_subquery::PyInSubquery::from(value.clone()).into_py(py)) + Ok(in_subquery::PyInSubquery::from(value.clone()).into_bound_py_any(py)?) } Expr::ScalarSubquery(value) => { - Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) + Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?) } Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", qualifier, options ))), Expr::GroupingSet(value) => { - Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)) + Ok(grouping_set::PyGroupingSet::from(value.clone()).into_bound_py_any(py)?) } Expr::Placeholder(value) => { - Ok(placeholder::PyPlaceholder::from(value.clone()).into_py(py)) + Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?) } Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!( "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {:?} - {:?}", data_type, column ))), - Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_py(py)), + Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?), } }) } diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 389bfb332..8fc9da5b0 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -19,7 +19,7 @@ use datafusion::common::DataFusionError; use datafusion::logical_expr::expr::{AggregateFunction, Alias}; use datafusion::logical_expr::logical_plan::Aggregate; use datafusion::logical_expr::Expr; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; @@ -151,7 +151,7 @@ impl LogicalNode for PyAggregate { vec![PyLogicalPlan::from((*self.aggregate.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs index 084513971..62f93cd26 100644 --- a/src/expr/analyze.rs +++ b/src/expr/analyze.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Analyze; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; @@ -78,7 +78,7 @@ impl LogicalNode for PyAnalyze { vec![PyLogicalPlan::from((*self.analyze.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs index 01ebb66b0..8872b2d47 100644 --- a/src/expr/create_memory_table.rs +++ b/src/expr/create_memory_table.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::CreateMemoryTable; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -91,7 +91,7 @@ impl LogicalNode for PyCreateMemoryTable { vec![PyLogicalPlan::from((*self.create.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index d119f5c21..87bb76876 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::{CreateView, DdlStatement, LogicalPlan}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{errors::py_type_err, sql::logical::PyLogicalPlan}; @@ -88,8 +88,8 @@ impl LogicalNode for PyCreateView { vec![PyLogicalPlan::from((*self.create.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs index 061ab4824..b62b776f8 100644 --- a/src/expr/distinct.rs +++ b/src/expr/distinct.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::Distinct; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -89,7 +89,7 @@ impl LogicalNode for PyDistinct { } } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/drop_table.rs b/src/expr/drop_table.rs index 330156abe..96983c1cf 100644 --- a/src/expr/drop_table.rs +++ b/src/expr/drop_table.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::logical_plan::DropTable; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -83,7 +83,7 @@ impl LogicalNode for PyDropTable { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs index ce7163466..a1534ac15 100644 --- a/src/expr/empty_relation.rs +++ b/src/expr/empty_relation.rs @@ -17,7 +17,7 @@ use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; use datafusion::logical_expr::EmptyRelation; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; @@ -79,7 +79,7 @@ impl LogicalNode for PyEmptyRelation { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/explain.rs b/src/expr/explain.rs index 8e7fb8843..fc02fe2b5 100644 --- a/src/expr/explain.rs +++ b/src/expr/explain.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::{logical_plan::Explain, LogicalPlan}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{common::df_schema::PyDFSchema, errors::py_type_err, sql::logical::PyLogicalPlan}; @@ -104,7 +104,7 @@ impl LogicalNode for PyExplain { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/extension.rs b/src/expr/extension.rs index a29802b0b..1e3fbb199 100644 --- a/src/expr/extension.rs +++ b/src/expr/extension.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::Extension; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -46,7 +46,7 @@ impl LogicalNode for PyExtension { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/filter.rs b/src/expr/filter.rs index a6d8aa7ee..9bdb667cd 100644 --- a/src/expr/filter.rs +++ b/src/expr/filter.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Filter; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -81,7 +81,7 @@ impl LogicalNode for PyFilter { vec![PyLogicalPlan::from((*self.filter.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/join.rs b/src/expr/join.rs index 66e677f8a..76ec532e7 100644 --- a/src/expr/join.rs +++ b/src/expr/join.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::{Join, JoinConstraint, JoinType}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -193,7 +193,7 @@ impl LogicalNode for PyJoin { ] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/limit.rs b/src/expr/limit.rs index 84ad7d68b..c2a33ff89 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Limit; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -90,7 +90,7 @@ impl LogicalNode for PyLimit { vec![PyLogicalPlan::from((*self.limit.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/literal.rs b/src/expr/literal.rs index 2cb2079f1..a660ac914 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -17,7 +17,7 @@ use crate::errors::PyDataFusionError; use datafusion::common::ScalarValue; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; #[pyclass(name = "Literal", module = "datafusion.expr", subclass)] #[derive(Clone)] @@ -144,8 +144,8 @@ impl PyLiteral { } #[allow(clippy::wrong_self_convention)] - fn into_type(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn into_type<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } fn __repr__(&self) -> PyResult { diff --git a/src/expr/logical_node.rs b/src/expr/logical_node.rs index 757e4f94b..5aff70059 100644 --- a/src/expr/logical_node.rs +++ b/src/expr/logical_node.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use pyo3::{PyObject, PyResult, Python}; +use pyo3::{Bound, PyAny, PyResult, Python}; use crate::sql::logical::PyLogicalPlan; @@ -25,5 +25,5 @@ pub trait LogicalNode { /// The input plan to the current logical node instance. fn inputs(&self) -> Vec; - fn to_variant(&self, py: Python) -> PyResult; + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult>; } diff --git a/src/expr/projection.rs b/src/expr/projection.rs index 36534fdb2..dc7e5e3c1 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -17,7 +17,7 @@ use datafusion::logical_expr::logical_plan::Projection; use datafusion::logical_expr::Expr; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -113,7 +113,7 @@ impl LogicalNode for PyProjection { vec![PyLogicalPlan::from((*self.projection.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/repartition.rs b/src/expr/repartition.rs index 4e680e181..3e782d6af 100644 --- a/src/expr/repartition.rs +++ b/src/expr/repartition.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::{logical_plan::Repartition, Expr, Partitioning}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{errors::py_type_err, sql::logical::PyLogicalPlan}; @@ -121,7 +121,7 @@ impl LogicalNode for PyRepartition { vec![PyLogicalPlan::from((*self.repartition.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/sort.rs b/src/expr/sort.rs index a1803ccaf..ed4947591 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -17,7 +17,7 @@ use datafusion::common::DataFusionError; use datafusion::logical_expr::logical_plan::Sort; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -96,7 +96,7 @@ impl LogicalNode for PySort { vec![PyLogicalPlan::from((*self.sort.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index dac8d0a2b..5ebfe6927 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::Subquery; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -75,7 +75,7 @@ impl LogicalNode for PySubquery { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs index a83cff96d..267a4d485 100644 --- a/src/expr/subquery_alias.rs +++ b/src/expr/subquery_alias.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::SubqueryAlias; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; @@ -85,7 +85,7 @@ impl LogicalNode for PySubqueryAlias { vec![PyLogicalPlan::from((*self.subquery_alias.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index f61be7fe4..6a0d53f0f 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -17,7 +17,7 @@ use datafusion::common::TableReference; use datafusion::logical_expr::logical_plan::TableScan; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::expr::logical_node::LogicalNode; @@ -146,7 +146,7 @@ impl LogicalNode for PyTableScan { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/union.rs b/src/expr/union.rs index 62488d9a1..5a08ccc13 100644 --- a/src/expr/union.rs +++ b/src/expr/union.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Union; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -83,7 +83,7 @@ impl LogicalNode for PyUnion { .collect() } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/unnest.rs b/src/expr/unnest.rs index adc705035..8e70e0990 100644 --- a/src/expr/unnest.rs +++ b/src/expr/unnest.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Unnest; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -79,7 +79,7 @@ impl LogicalNode for PyUnnest { vec![PyLogicalPlan::from((*self.unnest_.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/window.rs b/src/expr/window.rs index 4dc6cb9c9..13deaec25 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -18,7 +18,7 @@ use datafusion::common::{DataFusionError, ScalarValue}; use datafusion::logical_expr::expr::WindowFunction; use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::data_type::PyScalarValue; @@ -289,7 +289,7 @@ impl LogicalNode for PyWindowExpr { vec![self.window.input.as_ref().clone().into()] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/lib.rs b/src/lib.rs index 317c3a49a..ce93ff0c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,21 +94,21 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; // Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/ - let common = PyModule::new_bound(py, "common")?; + let common = PyModule::new(py, "common")?; common::init_module(&common)?; m.add_submodule(&common)?; // Register `expr` as a submodule. Matching `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/ - let expr = PyModule::new_bound(py, "expr")?; + let expr = PyModule::new(py, "expr")?; expr::init_module(&expr)?; m.add_submodule(&expr)?; // Register the functions as a submodule - let funcs = PyModule::new_bound(py, "functions")?; + let funcs = PyModule::new(py, "functions")?; functions::init_module(&funcs)?; m.add_submodule(&funcs)?; - let store = PyModule::new_bound(py, "object_store")?; + let store = PyModule::new(py, "object_store")?; store::init_module(&store)?; m.add_submodule(&store)?; @@ -121,7 +121,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { #[cfg(feature = "substrait")] fn setup_substrait_module(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { - let substrait = PyModule::new_bound(py, "substrait")?; + let substrait = PyModule::new(py, "substrait")?; substrait::init_module(&substrait)?; m.add_submodule(&substrait)?; Ok(()) diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 295908dc7..f0be45c6a 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -66,7 +66,7 @@ impl PyExecutionPlan { )?; let bytes = proto.encode_to_vec(); - Ok(PyBytes::new_bound(py, &bytes)) + Ok(PyBytes::new(py, &bytes)) } #[staticmethod] diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index 314eebf4f..4b4c86597 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -16,7 +16,7 @@ // under the License. /// Converts a Datafusion logical plan expression (Expr) into a PyArrow compute expression -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::convert::TryFrom; use std::result::Result; @@ -53,24 +53,28 @@ fn operator_to_py<'py>( Ok(py_op) } -fn extract_scalar_list(exprs: &[Expr], py: Python) -> PyDataFusionResult> { +fn extract_scalar_list<'py>( + exprs: &[Expr], + py: Python<'py>, +) -> PyDataFusionResult>> { let ret = exprs .iter() .map(|expr| match expr { // TODO: should we also leverage `ScalarValue::to_pyarrow` here? Expr::Literal(v) => match v { - ScalarValue::Boolean(Some(b)) => Ok(b.into_py(py)), - ScalarValue::Int8(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Int16(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Int32(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Int64(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt8(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt16(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt32(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt64(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Float32(Some(f)) => Ok(f.into_py(py)), - ScalarValue::Float64(Some(f)) => Ok(f.into_py(py)), - ScalarValue::Utf8(Some(s)) => Ok(s.into_py(py)), + // The unwraps here are for infallible conversions + ScalarValue::Boolean(Some(b)) => Ok(b.into_bound_py_any(py)?), + ScalarValue::Int8(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Int16(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Int32(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Int64(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt8(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt16(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt32(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt64(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Float32(Some(f)) => Ok(f.into_bound_py_any(py)?), + ScalarValue::Float64(Some(f)) => Ok(f.into_bound_py_any(py)?), + ScalarValue::Utf8(Some(s)) => Ok(s.into_bound_py_any(py)?), _ => Err(PyDataFusionError::Common(format!( "PyArrow can't handle ScalarValue: {v:?}" ))), @@ -98,8 +102,8 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { // https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Expression.html#pyarrow-dataset-expression fn try_from(expr: &Expr) -> Result { Python::with_gil(|py| { - let pc = Python::import_bound(py, "pyarrow.compute")?; - let op_module = Python::import_bound(py, "operator")?; + let pc = Python::import(py, "pyarrow.compute")?; + let op_module = Python::import(py, "operator")?; let pc_expr: PyDataFusionResult> = match expr { Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?), Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)), diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs index 2b31467f8..cab708458 100644 --- a/src/pyarrow_util.rs +++ b/src/pyarrow_util.rs @@ -33,8 +33,8 @@ impl FromPyArrow for PyScalarValue { let val = value.call_method0("as_py")?; // construct pyarrow array from the python value and pyarrow type - let factory = py.import_bound("pyarrow")?.getattr("array")?; - let args = PyList::new_bound(py, [val]); + let factory = py.import("pyarrow")?.getattr("array")?; + let args = PyList::new(py, [val])?; let array = factory.call1((args, typ))?; // convert the pyarrow array to rust array using C data interface diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 1be33b75f..96561c434 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -64,7 +64,7 @@ impl PyLogicalPlan { #[pymethods] impl PyLogicalPlan { /// Return the specific logical operator - pub fn to_variant(&self, py: Python) -> PyResult { + pub fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), @@ -132,7 +132,7 @@ impl PyLogicalPlan { datafusion_proto::protobuf::LogicalPlanNode::try_from_logical_plan(&self.plan, &codec)?; let bytes = proto.encode_to_vec(); - Ok(PyBytes::new_bound(py, &bytes)) + Ok(PyBytes::new(py, &bytes)) } #[staticmethod] diff --git a/src/substrait.rs b/src/substrait.rs index 8dcf3e8a7..1fefc0bbd 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -40,7 +40,7 @@ impl PyPlan { self.plan .encode(&mut proto_bytes) .map_err(PyDataFusionError::EncodeError)?; - Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) + Ok(PyBytes::new(py, &proto_bytes).into()) } } @@ -95,7 +95,7 @@ impl PySubstraitSerializer { py: Python, ) -> PyDataFusionResult { let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?; - Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) + Ok(PyBytes::new(py, &proto_bytes).into()) } #[staticmethod] diff --git a/src/udaf.rs b/src/udaf.rs index 5f21533e0..34a9cd51d 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -29,6 +29,7 @@ use datafusion::logical_expr::{ }; use crate::common::data_type::PyScalarValue; +use crate::errors::to_datafusion_err; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -73,7 +74,7 @@ impl Accumulator for RustAccumulator { .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function self.accum @@ -119,7 +120,7 @@ impl Accumulator for RustAccumulator { .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function self.accum diff --git a/src/udf.rs b/src/udf.rs index 4570e77a6..574c9d7b5 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -28,6 +28,7 @@ use datafusion::logical_expr::function::ScalarFunctionImplementation; use datafusion::logical_expr::ScalarUDF; use datafusion::logical_expr::{create_udf, ColumnarValue}; +use crate::errors::to_datafusion_err; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -46,11 +47,11 @@ fn pyarrow_function_to_rust( .map_err(|e| DataFusionError::Execution(format!("{e:?}"))) }) .collect::, _>>()?; - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function let value = func - .call_bound(py, py_args, None) + .call(py, py_args, None) .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; // 3. cast to arrow::array::Array diff --git a/src/udwf.rs b/src/udwf.rs index 04a4a1640..defd9c522 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -27,6 +27,7 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use crate::common::data_type::PyScalarValue; +use crate::errors::to_datafusion_err; use crate::expr::PyExpr; use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; @@ -56,8 +57,8 @@ impl PartitionEvaluator for RustPartitionEvaluator { fn get_range(&self, idx: usize, n_rows: usize) -> Result> { Python::with_gil(|py| { - let py_args = vec![idx.to_object(py), n_rows.to_object(py)]; - let py_args = PyTuple::new_bound(py, py_args); + let py_args = vec![idx.into_pyobject(py)?, n_rows.into_pyobject(py)?]; + let py_args = PyTuple::new(py, py_args)?; self.evaluator .bind(py) @@ -93,17 +94,14 @@ impl PartitionEvaluator for RustPartitionEvaluator { fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result { println!("evaluate all called with number of values {}", values.len()); Python::with_gil(|py| { - let py_values = PyList::new_bound( + let py_values = PyList::new( py, values .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()), - ); - let py_num_rows = num_rows.to_object(py).into_bound(py); - let py_args = PyTuple::new_bound( - py, - PyTuple::new_bound(py, vec![py_values.as_any(), &py_num_rows]), - ); + )?; + let py_num_rows = num_rows.into_pyobject(py)?; + let py_args = PyTuple::new(py, vec![py_values.as_any(), &py_num_rows])?; self.evaluator .bind(py) @@ -112,32 +110,28 @@ impl PartitionEvaluator for RustPartitionEvaluator { let array_data = ArrayData::from_pyarrow_bound(&v).unwrap(); make_array(array_data) }) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) }) + .map_err(to_datafusion_err) } fn evaluate(&mut self, values: &[ArrayRef], range: &Range) -> Result { Python::with_gil(|py| { - let py_values = PyList::new_bound( + let py_values = PyList::new( py, values .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()), - ); - let range_tuple = - PyTuple::new_bound(py, vec![range.start.to_object(py), range.end.to_object(py)]); - let py_args = PyTuple::new_bound( - py, - PyTuple::new_bound(py, vec![py_values.as_any(), range_tuple.as_any()]), - ); + )?; + let range_tuple = PyTuple::new(py, vec![range.start, range.end])?; + let py_args = PyTuple::new(py, vec![py_values.as_any(), range_tuple.as_any()])?; self.evaluator .bind(py) .call_method1("evaluate", py_args) .and_then(|v| v.extract::()) .map(|v| v.0) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) }) + .map_err(to_datafusion_err) } fn evaluate_all_with_rank( @@ -148,23 +142,27 @@ impl PartitionEvaluator for RustPartitionEvaluator { Python::with_gil(|py| { let ranks = ranks_in_partition .iter() - .map(|r| PyTuple::new_bound(py, vec![r.start, r.end])); + .map(|r| PyTuple::new(py, vec![r.start, r.end])) + .collect::>>()?; // 1. cast args to Pyarrow array - let py_args = vec![num_rows.to_object(py), PyList::new_bound(py, ranks).into()]; + let py_args = vec![ + num_rows.into_pyobject(py)?.into_any(), + PyList::new(py, ranks)?.into_any(), + ]; - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args)?; // 2. call function self.evaluator .bind(py) .call_method1("evaluate_all_with_rank", py_args) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) .map(|v| { let array_data = ArrayData::from_pyarrow_bound(&v).unwrap(); make_array(array_data) }) }) + .map_err(to_datafusion_err) } fn supports_bounded_execution(&self) -> bool { From e6f6e66c1d180246ad933f8bcc0d40faa8426dfa Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 21 Feb 2025 16:03:36 -0500 Subject: [PATCH 046/248] Add user documentation for the FFI approach (#1031) * Initial commit for FFI user documentation * Update readme to point to the online documentation. Fix a small typo. * Small text adjustments for clarity and formatting --- README.md | 11 +- docs/source/contributor-guide/ffi.rst | 212 ++++++++++++++++++++++++++ docs/source/index.rst | 1 + 3 files changed, 220 insertions(+), 4 deletions(-) create mode 100644 docs/source/contributor-guide/ffi.rst diff --git a/README.md b/README.md index 5aaf7f5f3..9c56b62dd 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,8 @@ DataFusion's Python bindings can be used as a foundation for building new data s planning, and logical plan optimizations, and then transpiles the logical plan to Dask operations for execution. - [DataFusion Ballista](https://github.com/apache/datafusion-ballista) is a distributed SQL query engine that extends DataFusion's Python bindings for distributed use cases. - -It is also possible to use these Python bindings directly for DataFrame and SQL operations, but you may find that -[Polars](http://pola.rs/) and [DuckDB](http://www.duckdb.org/) are more suitable for this use case, since they have -more of an end-user focus and are more actively maintained than these Python bindings. +- [DataFusion Ray](https://github.com/apache/datafusion-ray) is another distributed query engine that uses + DataFusion's Python bindings. ## Features @@ -114,6 +112,11 @@ Printing the context will show the current configuration settings. print(ctx) ``` +## Extensions + +For information about how to extend DataFusion Python, please see the extensions page of the +[online documentation](https://datafusion.apache.org/python/). + ## More Examples See [examples](examples/README.md) for more information. diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst new file mode 100644 index 000000000..c1f9806b3 --- /dev/null +++ b/docs/source/contributor-guide/ffi.rst @@ -0,0 +1,212 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Python Extensions +================= + +The DataFusion in Python project is designed to allow users to extend its functionality in a few core +areas. Ideally many users would like to package their extensions as a Python package and easily +integrate that package with this project. This page serves to describe some of the challenges we face +when doing these integrations and the approach our project uses. + +The Primary Issue +----------------- + +Suppose you wish to use DataFusion and you have a custom data source that can produce tables that +can then be queried against, similar to how you can register a :ref:`CSV ` or +:ref:`Parquet ` file. In DataFusion terminology, you likely want to implement a +:ref:`Custom Table Provider `. In an effort to make your data source +as performant as possible and to utilize the features of DataFusion, you may decide to write +your source in Rust and then expose it through `PyO3 `_ as a Python library. + +At first glance, it may appear the best way to do this is to add the ``datafusion-python`` +crate as a dependency, provide a ``PyTable``, and then to register it with the +``SessionContext``. Unfortunately, this will not work. + +When you produce your code as a Python library and it needs to interact with the DataFusion +library, at the lowest level they communicate through an Application Binary Interface (ABI). +The acronym sounds similar to API (Application Programming Interface), but it is distinctly +different. + +The ABI sets the standard for how these libraries can share data and functions between each +other. One of the key differences between Rust and other programming languages is that Rust +does not have a stable ABI. What this means in practice is that if you compile a Rust library +with one version of the ``rustc`` compiler and I compile another library to interface with it +but I use a different version of the compiler, there is no guarantee the interface will be +the same. + +In practice, this means that a Python library built with ``datafusion-python`` as a Rust +dependency will generally **not** be compatible with the DataFusion Python package, even +if they reference the same version of ``datafusion-python``. If you attempt to do this, it may +work on your local computer if you have built both packages with the same optimizations. +This can sometimes lead to a false expectation that the code will work, but it frequently +breaks the moment you try to use your package against the released packages. + +You can find more information about the Rust ABI in their +`online documentation `_. + +The FFI Approach +---------------- + +Rust supports interacting with other programming languages through it's Foreign Function +Interface (FFI). The advantage of using the FFI is that it enables you to write data structures +and functions that have a stable ABI. The allows you to use Rust code with C, Python, and +other languages. In fact, the `PyO3 `_ library uses the FFI to share data +and functions between Python and Rust. + +The approach we are taking in the DataFusion in Python project is to incrementally expose +more portions of the DataFusion project via FFI interfaces. This allows users to write Rust +code that does **not** require the ``datafusion-python`` crate as a dependency, expose their +code in Python via PyO3, and have it interact with the DataFusion Python package. + +Early adopters of this approach include `delta-rs `_ +who has adapted their Table Provider for use in ```datafusion-python``` with only a few lines +of code. Also, the DataFusion Python project uses the existing definitions from +`Apache Arrow CStream Interface `_ +to support importing **and** exporting tables. Any Python package that supports reading +the Arrow C Stream interface can work with DataFusion Python out of the box! You can read +more about working with Arrow sources in the :ref:`Data Sources ` +page. + +To learn more about the Foreign Function Interface in Rust, the +`Rustonomicon `_ is a good resource. + +Inspiration from Arrow +---------------------- + +DataFusion is built upon `Apache Arrow `_. The canonical Python +Arrow implementation, `pyarrow `_ provides +an excellent way to share Arrow data between Python projects without performing any copy +operations on the data. They do this by using a well defined set of interfaces. You can +find the details about their stream interface +`here `_. The +`Rust Arrow Implementation `_ also supports these +``C`` style definitions via the Foreign Function Interface. + +In addition to using these interfaces to transfer Arrow data between libraries, ``pyarrow`` +goes one step further to make sharing the interfaces easier in Python. They do this +by exposing PyCapsules that contain the expected functionality. + +You can learn more about PyCapsules from the official +`Python online documentation `_. PyCapsules +have excellent support in PyO3 already. The +`PyO3 online documentation `_ is a good source +for more details on using PyCapsules in Rust. + +Two lessons we leverage from the Arrow project in DataFusion Python are: + +- We reuse the existing Arrow FFI functionality wherever possible. +- We expose PyCapsules that contain a FFI stable struct. + +Implementation Details +---------------------- + +The bulk of the code necessary to perform our FFI operations is in the upstream +`DataFusion `_ core repository. You can review the code and +documentation in the `datafusion-ffi`_ crate. + +Our FFI implementation is narrowly focused at sharing data and functions with Rust backed +libraries. This allows us to use the `abi_stable crate `_. +This is an excellent crate that allows for easy conversion between Rust native types +and FFI-safe alternatives. For example, if you needed to pass a ``Vec`` via FFI, +you can simply convert it to a ``RVec`` in an intuitive manner. It also supports +features like ``RResult`` and ``ROption`` that do not have an obvious translation to a +C equivalent. + +The `datafusion-ffi`_ crate has been designed to make it easy to convert from DataFusion +traits into their FFI counterparts. For example, if you have defined a custom +`TableProvider `_ +and you want to create a sharable FFI counterpart, you could write: + +.. code-block:: rust + + let my_provider = MyTableProvider::default(); + let ffi_provider = FFI_TableProvider::new(Arc::new(my_provider), false, None); + +If you were interfacing with a library that provided the above ``FFI_TableProvider`` and +you needed to turn it back into an ``TableProvider``, you can turn it into a +``ForeignTableProvider`` with implements the ``TableProvider`` trait. + +.. code-block:: rust + + let foreign_provider: ForeignTableProvider = ffi_provider.into(); + +If you review the code in `datafusion-ffi`_ you will find that each of the traits we share +across the boundary has two portions, one with a ``FFI_`` prefix and one with a ``Foreign`` +prefix. This is used to distinguish which side of the FFI boundary that struct is +designed to be used on. The structures with the ``FFI_`` prefix are to be used on the +**provider** of the structure. In the example we're showing, this means the code that has +written the underlying ``TableProvider`` implementation to access your custom data source. +The structures with the ``Foreign`` prefix are to be used by the receiver. In this case, +it is the ``datafusion-python`` library. + +In order to share these FFI structures, we need to wrap them in some kind of Python object +that can be used to interface from one package to another. As described in the above +section on our inspiration from Arrow, we use ``PyCapsule``. We can create a ``PyCapsule`` +for our provider thusly: + +.. code-block:: rust + + let name = CString::new("datafusion_table_provider")?; + let my_capsule = PyCapsule::new_bound(py, provider, Some(name))?; + +On the receiving side, turn this pycapsule object into the ``FFI_TableProvider``, which +can then be turned into a ``ForeignTableProvider`` the associated code is: + +.. code-block:: rust + + let capsule = capsule.downcast::()?; + let provider = unsafe { capsule.reference::() }; + +By convention the ``datafusion-python`` library expects a Python object that has a +``TableProvider`` PyCapsule to have this capsule accessible by calling a function named +``__datafusion_table_provider__``. You can see a complete working example of how to +share a ``TableProvider`` from one python library to DataFusion Python in the +`repository examples folder `_. + +This section has been written using ``TableProvider`` as an example. It is the first +extension that has been written using this approach and the most thoroughly implemented. +As we continue to expose more of the DataFusion features, we intend to follow this same +design pattern. + +Alternative Approach +-------------------- + +Suppose you needed to expose some other features of DataFusion and you could not wait +for the upstream repository to implement the FFI approach we describe. In this case +you decide to create your dependency on the ``datafusion-python`` crate instead. + +As we discussed, this is not guaranteed to work across different compiler versions and +optimization levels. If you wish to go down this route, there are two approaches we +have identified you can use. + +#. Re-export all of ``datafusion-python`` yourself with your extensions built in. +#. Carefully synchonize your software releases with the ``datafusion-python`` CI build + system so that your libraries use the exact same compiler, features, and + optimization level. + +We currently do not recommend either of these approaches as they are difficult to +maintain over a long period. Additionally, they require a tight version coupling +between libraries. + +Status of Work +-------------- + +At the time of this writing, the FFI features are under active development. To see +the latest status, we recommend reviewing the code in the `datafusion-ffi`_ crate. + +.. _datafusion-ffi: https://crates.io/crates/datafusion-ffi diff --git a/docs/source/index.rst b/docs/source/index.rst index 34eb23b28..558b2d572 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -85,6 +85,7 @@ Example :caption: CONTRIBUTOR GUIDE contributor-guide/introduction + contributor-guide/ffi .. _toc.api: .. toctree:: From 3f3983cc86ffe267cff97480241e8a588ac38fa3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 23 Feb 2025 08:00:52 -0500 Subject: [PATCH 047/248] build(deps): bump arrow from 54.1.0 to 54.2.0 (#1035) Bumps [arrow](https://github.com/apache/arrow-rs) from 54.1.0 to 54.2.0. - [Release notes](https://github.com/apache/arrow-rs/releases) - [Changelog](https://github.com/apache/arrow-rs/blob/main/CHANGELOG-old.md) - [Commits](https://github.com/apache/arrow-rs/compare/54.1.0...54.2.0) --- updated-dependencies: - dependency-name: arrow dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1b1ed50a..d23ed6169 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" +checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" +checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" +checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" +checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" +checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" +checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" +checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" +checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" +checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" dependencies = [ "arrow-array", "arrow-buffer", @@ -326,9 +326,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" +checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -339,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" +checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" dependencies = [ "arrow-array", "arrow-buffer", @@ -352,18 +352,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" +checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" +checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" dependencies = [ "ahash", "arrow-array", @@ -375,9 +375,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" +checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" dependencies = [ "arrow-array", "arrow-buffer", From 69ebf70bd821d0ae516d2f61d96058e2252a7a1f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 24 Feb 2025 21:30:52 +0100 Subject: [PATCH 048/248] Chore: Release datafusion-python 45 (#1024) * Bump version number to prepare for release * Add changelog 45.0.0 * Add deprecated marker from either typing or typing_extensions based on the python version * Limit pyarrow version per issue # 1023 * Bumping the version number to support new release candidate * There was no guarantee that the record batches would be returned in a single partition, so update the unit test to check all partitions. * Revert "Limit pyarrow version per issue # 1023" This reverts commit b48d5872661017ec21ea71f7dbb9569f2f0bf797. * Correct import for python 3.13 and above * Bump minor version due to pypi requirement * Update cargo lock --- Cargo.lock | 113 +++++++++++++-------------------- Cargo.toml | 2 +- dev/changelog/45.0.0.md | 42 ++++++++++++ python/datafusion/context.py | 5 +- python/datafusion/dataframe.py | 5 +- python/datafusion/expr.py | 6 +- python/datafusion/substrait.py | 5 +- python/tests/test_dataframe.py | 21 ++++-- 8 files changed, 118 insertions(+), 81 deletions(-) create mode 100644 dev/changelog/45.0.0.md diff --git a/Cargo.lock b/Cargo.lock index d23ed6169..5c7f2bf3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -606,19 +606,18 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c" dependencies = [ "bzip2-sys", - "libc", ] [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.12+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" dependencies = [ "cc", "libc", @@ -627,9 +626,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.12" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2" +checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" dependencies = [ "jobserver", "libc", @@ -684,21 +683,20 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.53" +version = "0.1.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e24a03c8b52922d68a1589ad61032f2c1aa5a8158d2aa0d93c6e9534944bbad6" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" dependencies = [ "cc", ] [[package]] name = "comfy-table" -version = "7.1.3" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ - "strum", - "strum_macros", + "unicode-segmentation", "unicode-width", ] @@ -837,9 +835,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] @@ -878,7 +876,7 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2 0.5.0", + "bzip2 0.5.1", "chrono", "datafusion-catalog", "datafusion-common", @@ -1240,7 +1238,7 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "petgraph 0.7.1", + "petgraph", ] [[package]] @@ -1341,7 +1339,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "44.0.0" +version = "45.2.0" dependencies = [ "arrow", "async-trait", @@ -1436,9 +1434,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" @@ -1456,12 +1454,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fixedbitset" version = "0.5.7" @@ -2269,9 +2261,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" dependencies = [ "adler2", ] @@ -2548,23 +2540,13 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset 0.4.2", - "indexmap", -] - [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset 0.5.7", + "fixedbitset", "indexmap", ] @@ -2660,9 +2642,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", "prost-derive", @@ -2670,16 +2652,16 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "once_cell", - "petgraph 0.6.5", + "petgraph", "prettyplease", "prost", "prost-types", @@ -2690,12 +2672,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.98", @@ -2703,9 +2685,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ "prost", ] @@ -2721,9 +2703,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88" dependencies = [ "cc", ] @@ -2860,9 +2842,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" +checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" dependencies = [ "cfg_aliases", "libc", @@ -3042,15 +3024,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" dependencies = [ "cc", "cfg-if", "getrandom 0.2.15", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -3097,9 +3078,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.22" +version = "0.23.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" +checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" dependencies = [ "once_cell", "ring", @@ -3377,9 +3358,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] name = "snafu" @@ -3418,12 +3399,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "sqlparser" version = "0.53.0" @@ -3453,9 +3428,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stacker" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +checksum = "1d08feb8f695b465baed819b03c128dc23f57a694510ab1f06c77f763975685e" dependencies = [ "cc", "cfg-if", diff --git a/Cargo.toml b/Cargo.toml index d18e0e8f0..5358b1836 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "44.0.0" +version = "45.2.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/45.0.0.md b/dev/changelog/45.0.0.md new file mode 100644 index 000000000..93659b171 --- /dev/null +++ b/dev/changelog/45.0.0.md @@ -0,0 +1,42 @@ + + +# Apache DataFusion Python 45.0.0 Changelog + +This release consists of 2 commits from 2 contributors. See credits at the end of this changelog for more information. + +**Fixed bugs:** + +- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky) + +**Other:** + +- Chore/upgrade datafusion 45 [#1010](https://github.com/apache/datafusion-python/pull/1010) (kevinjqliu) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 1 Kevin Liu + 1 Tim Saucer +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 864ef1c8b..21955b6d1 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -21,7 +21,10 @@ from typing import TYPE_CHECKING, Any, Protocol -from typing_extensions import deprecated +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.catalog import Catalog, Table from datafusion.dataframe import DataFrame diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 7413a5fa3..23b5d630b 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -33,7 +33,10 @@ overload, ) -from typing_extensions import deprecated +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.plan import ExecutionPlan, LogicalPlan from datafusion.record_batch import RecordBatchStream diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 68ddd7c9a..e3d7158eb 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -25,7 +25,11 @@ from typing import TYPE_CHECKING, Any, Optional, Type import pyarrow as pa -from typing_extensions import deprecated + +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.common import DataTypeMap, NullTreatment, RexType diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index 402184d3f..06302fe38 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -26,7 +26,10 @@ import pathlib from typing import TYPE_CHECKING -from typing_extensions import deprecated +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.plan import LogicalPlan diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 5bc3fb094..c636e896a 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -755,13 +755,20 @@ def test_execution_plan(aggregate_df): assert "CsvExec:" in indent ctx = SessionContext() - stream = ctx.execute(plan, 0) - # get the one and only batch - batch = stream.next() - assert batch is not None - # there should be no more batches - with pytest.raises(StopIteration): - stream.next() + rows_returned = 0 + for idx in range(0, plan.partition_count): + stream = ctx.execute(plan, idx) + try: + batch = stream.next() + assert batch is not None + rows_returned += len(batch.to_pyarrow()[0]) + except StopIteration: + # This is one of the partitions with no values + pass + with pytest.raises(StopIteration): + stream.next() + + assert rows_returned == 5 def test_repartition(df): From a80a788f69cf46ef002b3c537837548cc103748c Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 8 Mar 2025 21:22:36 +0800 Subject: [PATCH 049/248] Enable Dataframe to be converted into views which can be used in register_table (#1016) * add test_view * feat: add into_view method to register DataFrame as a view * add pytableprovider * feat: add as_table method to PyTableProvider and update into_view to return PyTable * refactor: simplify as_table method and update documentation for into_view * test: improve test_register_filtered_dataframe by removing redundant comments and assertions * test: enhance test_register_filtered_dataframe with additional assertions for DataFrame results * ruff formatted * cleanup: remove unused imports from test_view.py * docs: add example for registering a DataFrame as a view in README.md * docs: update docstring for into_view method to clarify usage as ViewTable * chore: add license header to test_view.py * ruff correction * refactor: rename into_view method to _into_view * ruff lint * refactor: simplify into_view method and update Rust binding convention * docs: add views section to user guide with example on registering views * feat: add register_view method to SessionContext for DataFrame registration * docs: update README and user guide to reflect register_view method for DataFrame registration * docs: remove some documentation from PyDataFrame --- README.md | 40 +++++++++++++ .../user-guide/common-operations/index.rst | 1 + .../user-guide/common-operations/views.rst | 58 +++++++++++++++++++ python/datafusion/context.py | 12 ++++ python/datafusion/dataframe.py | 4 ++ python/tests/test_view.py | 49 ++++++++++++++++ src/dataframe.rs | 39 +++++++++++++ 7 files changed, 203 insertions(+) create mode 100644 docs/source/user-guide/common-operations/views.rst create mode 100644 python/tests/test_view.py diff --git a/README.md b/README.md index 9c56b62dd..4f80dbe18 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,46 @@ This produces the following chart: ![Chart](examples/chart.png) +## Registering a DataFrame as a View + +You can use SessionContext's `register_view` method to convert a DataFrame into a view and register it with the context. + +```python +from datafusion import SessionContext, col, literal + +# Create a DataFusion context +ctx = SessionContext() + +# Create sample data +data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} + +# Create a DataFrame from the dictionary +df = ctx.from_pydict(data, "my_table") + +# Filter the DataFrame (for example, keep rows where a > 2) +df_filtered = df.filter(col("a") > literal(2)) + +# Register the dataframe as a view with the context +ctx.register_view("view1", df_filtered) + +# Now run a SQL query against the registered view +df_view = ctx.sql("SELECT * FROM view1") + +# Collect the results +results = df_view.collect() + +# Convert results to a list of dictionaries for display +result_dicts = [batch.to_pydict() for batch in results] + +print(result_dicts) +``` + +This will output: + +```python +[{'a': [3, 4, 5], 'b': [30, 40, 50]}] +``` + ## Configuration It is possible to configure runtime (memory and disk settings) and configuration settings when creating a context. diff --git a/docs/source/user-guide/common-operations/index.rst b/docs/source/user-guide/common-operations/index.rst index d7c708c21..7abd1f138 100644 --- a/docs/source/user-guide/common-operations/index.rst +++ b/docs/source/user-guide/common-operations/index.rst @@ -23,6 +23,7 @@ The contents of this section are designed to guide a new user through how to use .. toctree:: :maxdepth: 2 + views basic-info select-and-filter expressions diff --git a/docs/source/user-guide/common-operations/views.rst b/docs/source/user-guide/common-operations/views.rst new file mode 100644 index 000000000..df11e3abe --- /dev/null +++ b/docs/source/user-guide/common-operations/views.rst @@ -0,0 +1,58 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +====================== +Registering Views +====================== + +You can use the context's ``register_view`` method to register a DataFrame as a view + +.. code-block:: python + + from datafusion import SessionContext, col, literal + + # Create a DataFusion context + ctx = SessionContext() + + # Create sample data + data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} + + # Create a DataFrame from the dictionary + df = ctx.from_pydict(data, "my_table") + + # Filter the DataFrame (for example, keep rows where a > 2) + df_filtered = df.filter(col("a") > literal(2)) + + # Register the dataframe as a view with the context + ctx.register_view("view1", df_filtered) + + # Now run a SQL query against the registered view + df_view = ctx.sql("SELECT * FROM view1") + + # Collect the results + results = df_view.collect() + + # Convert results to a list of dictionaries for display + result_dicts = [batch.to_pydict() for batch in results] + + print(result_dicts) + +This will output: + +.. code-block:: python + + [{'a': [3, 4, 5], 'b': [30, 40, 50]}] diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 21955b6d1..befc4dce6 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -707,6 +707,18 @@ def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFr """ return DataFrame(self.ctx.from_polars(data, name)) + # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 + # is the discussion on how we arrived at adding register_view + def register_view(self, name: str, df: DataFrame): + """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view. + + Args: + name (str): The name to register the view under. + df (DataFrame): The DataFrame to be converted into a view and registered. + """ + view = df.into_view() + self.ctx.register_table(name, view) + def register_table(self, name: str, table: Table) -> None: """Register a :py:class: `~datafusion.catalog.Table` as a table. diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 23b5d630b..85a179ec9 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -124,6 +124,10 @@ def __init__(self, df: DataFrameInternal) -> None: """ self.df = df + def into_view(self) -> pa.Table: + """Convert DataFrame as a ViewTable which can be used in register_table.""" + return self.df.into_view() + def __getitem__(self, key: str | List[str]) -> DataFrame: """Return a new :py:class`DataFrame` with the specified column or columns. diff --git a/python/tests/test_view.py b/python/tests/test_view.py new file mode 100644 index 000000000..1d92cc0d4 --- /dev/null +++ b/python/tests/test_view.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from datafusion import SessionContext, col, literal + + +def test_register_filtered_dataframe(): + ctx = SessionContext() + + data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} + + df = ctx.from_pydict(data, "my_table") + + df_filtered = df.filter(col("a") > literal(2)) + + ctx.register_view("view1", df_filtered) + + df_view = ctx.sql("SELECT * FROM view1") + + filtered_results = df_view.collect() + + result_dicts = [batch.to_pydict() for batch in filtered_results] + + expected_results = [{"a": [3, 4, 5], "b": [30, 40, 50]}] + + assert result_dicts == expected_results + + df_results = df.collect() + + df_result_dicts = [batch.to_pydict() for batch in df_results] + + expected_df_results = [{"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}] + + assert df_result_dicts == expected_df_results diff --git a/src/dataframe.rs b/src/dataframe.rs index ed9578a71..243e2e14f 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -30,6 +30,7 @@ use datafusion::arrow::util::pretty; use datafusion::common::UnnestOptions; use datafusion::config::{CsvOptions, TableParquetOptions}; use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; +use datafusion::datasource::TableProvider; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; @@ -39,6 +40,7 @@ use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyCapsule, PyTuple, PyTupleMethods}; use tokio::task::JoinHandle; +use crate::catalog::PyTable; use crate::errors::{py_datafusion_err, PyDataFusionError}; use crate::expr::sort_expr::to_sort_expressions; use crate::physical_plan::PyExecutionPlan; @@ -50,6 +52,25 @@ use crate::{ expr::{sort_expr::PySortExpr, PyExpr}, }; +// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 +// - we have not decided on the table_provider approach yet +// this is an interim implementation +#[pyclass(name = "TableProvider", module = "datafusion")] +pub struct PyTableProvider { + provider: Arc, +} + +impl PyTableProvider { + pub fn new(provider: Arc) -> Self { + Self { provider } + } + + pub fn as_table(&self) -> PyTable { + let table_provider: Arc = self.provider.clone(); + PyTable::new(table_provider) + } +} + /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. /// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. @@ -156,6 +177,24 @@ impl PyDataFrame { PyArrowType(self.df.schema().into()) } + /// Convert this DataFrame into a Table that can be used in register_table + /// By convention, into_... methods consume self and return the new object. + /// Disabling the clippy lint, so we can use &self + /// because we're working with Python bindings + /// where objects are shared + /// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 + /// - we have not decided on the table_provider approach yet + #[allow(clippy::wrong_self_convention)] + fn into_view(&self) -> PyDataFusionResult { + // Call the underlying Rust DataFrame::into_view method. + // Note that the Rust method consumes self; here we clone the inner Arc + // so that we don’t invalidate this PyDataFrame. + let table_provider = self.df.as_ref().clone().into_view(); + let table_provider = PyTableProvider::new(table_provider); + + Ok(table_provider.as_table()) + } + #[pyo3(signature = (*args))] fn select_columns(&self, args: Vec) -> PyDataFusionResult { let args = args.iter().map(|s| s.as_ref()).collect::>(); From 9027b4d79fdd7a41dd9c1f25c2ecebc1fabf50f2 Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Sat, 8 Mar 2025 21:24:02 +0800 Subject: [PATCH 050/248] fix: type checking (#993) * fix: type checking * update license * format * format * update catalog * revert type annotation * format * format * update --- python/datafusion/catalog.py | 5 +++-- python/datafusion/context.py | 19 ++++++++++++------ python/datafusion/dataframe.py | 3 ++- python/datafusion/expr.py | 8 ++++---- python/datafusion/functions.py | 10 +++++++--- python/datafusion/input/location.py | 10 +++++----- python/datafusion/udf.py | 7 ++++--- python/tests/test_functions.py | 30 +++++++++++++++++++++++++++++ 8 files changed, 68 insertions(+), 24 deletions(-) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 703037665..0560f4704 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -66,11 +66,12 @@ def __init__(self, table: df_internal.Table) -> None: """This constructor is not typically called by the end user.""" self.table = table + @property def schema(self) -> pyarrow.Schema: """Returns the schema associated with this table.""" - return self.table.schema() + return self.table.schema @property def kind(self) -> str: """Returns the kind of table.""" - return self.table.kind() + return self.table.kind diff --git a/python/datafusion/context.py b/python/datafusion/context.py index befc4dce6..282b2a477 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -728,7 +728,7 @@ def register_table(self, name: str, table: Table) -> None: name: Name of the resultant table. table: DataFusion table to add to the session context. """ - self.ctx.register_table(name, table) + self.ctx.register_table(name, table.table) def deregister_table(self, name: str) -> None: """Remove a table from the session.""" @@ -767,7 +767,7 @@ def register_parquet( file_extension: str = ".parquet", skip_metadata: bool = True, schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[SortExpr]] | None = None, ) -> None: """Register a Parquet file as a table. @@ -798,7 +798,9 @@ def register_parquet( file_extension, skip_metadata, schema, - file_sort_order, + [sort_list_to_raw_sort_list(exprs) for exprs in file_sort_order] + if file_sort_order is not None + else None, ) def register_csv( @@ -934,7 +936,7 @@ def register_udwf(self, udwf: WindowUDF) -> None: def catalog(self, name: str = "datafusion") -> Catalog: """Retrieve a catalog by name.""" - return self.ctx.catalog(name) + return Catalog(self.ctx.catalog(name)) @deprecated( "Use the catalog provider interface ``SessionContext.Catalog`` to " @@ -1054,7 +1056,7 @@ def read_parquet( file_extension: str = ".parquet", skip_metadata: bool = True, schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -1078,6 +1080,11 @@ def read_parquet( """ if table_partition_cols is None: table_partition_cols = [] + file_sort_order = ( + [sort_list_to_raw_sort_list(f) for f in file_sort_order] + if file_sort_order is not None + else None + ) return DataFrame( self.ctx.read_parquet( str(path), @@ -1121,7 +1128,7 @@ def read_table(self, table: Table) -> DataFrame: :py:class:`~datafusion.catalog.ListingTable`, create a :py:class:`~datafusion.dataframe.DataFrame`. """ - return DataFrame(self.ctx.read_table(table)) + return DataFrame(self.ctx.read_table(table.table)) def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream: """Execute the ``plan`` and return the results.""" diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 85a179ec9..de5d8376e 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -52,6 +52,7 @@ from enum import Enum from datafusion._internal import DataFrame as DataFrameInternal +from datafusion._internal import expr as expr_internal from datafusion.expr import Expr, SortExpr, sort_or_default @@ -277,7 +278,7 @@ def with_columns( def _simplify_expression( *exprs: Expr | Iterable[Expr], **named_exprs: Expr - ) -> list[Expr]: + ) -> list[expr_internal.Expr]: expr_list = [] for expr in exprs: if isinstance(expr, Expr): diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index e3d7158eb..3639abec6 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -176,7 +176,7 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: """Helper function to return a default Sort if an Expr is provided.""" if isinstance(e, SortExpr): return e.raw_sort - return SortExpr(e.expr, True, True).raw_sort + return SortExpr(e, True, True).raw_sort def sort_list_to_raw_sort_list( @@ -231,7 +231,7 @@ def variant_name(self) -> str: def __richcmp__(self, other: Expr, op: int) -> Expr: """Comparison operator.""" - return Expr(self.expr.__richcmp__(other, op)) + return Expr(self.expr.__richcmp__(other.expr, op)) def __repr__(self) -> str: """Generate a string representation of this expression.""" @@ -417,7 +417,7 @@ def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: ascending: If true, sort in ascending order. nulls_first: Return null values first. """ - return SortExpr(self.expr, ascending=ascending, nulls_first=nulls_first) + return SortExpr(self, ascending=ascending, nulls_first=nulls_first) def is_null(self) -> Expr: """Returns ``True`` if this expression is null.""" @@ -789,7 +789,7 @@ class SortExpr: def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: """This constructor should not be called by the end user.""" - self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first) + self.raw_sort = expr_internal.SortExpr(expr.expr, ascending, nulls_first) def expr(self) -> Expr: """Return the raw expr backing the SortExpr.""" diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 5c260aade..b449c4868 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -366,7 +366,7 @@ def concat_ws(separator: str, *args: Expr) -> Expr: def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a new sort expression.""" - return SortExpr(expr.expr, ascending=ascending, nulls_first=nulls_first) + return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) def alias(expr: Expr, name: str) -> Expr: @@ -942,6 +942,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_millis(arg.expr, *formatters)) @@ -950,6 +951,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_micros(arg.expr, *formatters)) @@ -958,6 +960,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_nanos(arg.expr, *formatters)) @@ -966,6 +969,7 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_seconds(arg.expr, *formatters)) @@ -1078,9 +1082,9 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr: return Expr(f.range(start.expr, stop.expr, step.expr)) -def uuid(arg: Expr) -> Expr: +def uuid() -> Expr: """Returns uuid v4 as a string value.""" - return Expr(f.uuid(arg.expr)) + return Expr(f.uuid()) def struct(*args: Expr) -> Expr: diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index a8252b53c..517cd1578 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -37,12 +37,12 @@ def is_correct_input(self, input_item: Any, table_name: str, **kwargs): def build_table( self, - input_file: str, + input_item: str, table_name: str, **kwargs, ) -> SqlTable: """Create a table from the input source.""" - _, extension = os.path.splitext(input_file) + _, extension = os.path.splitext(input_item) format = extension.lstrip(".").lower() num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] @@ -50,7 +50,7 @@ def build_table( import pyarrow.parquet as pq # Read the Parquet metadata - metadata = pq.read_metadata(input_file) + metadata = pq.read_metadata(input_item) num_rows = metadata.num_rows # Iterate through the schema and build the SqlTable for col in metadata.schema: @@ -69,7 +69,7 @@ def build_table( # to get that information. However, this should only be occurring # at table creation time and therefore shouldn't # slow down query performance. - with open(input_file, "r") as file: + with open(input_item, "r") as file: reader = csv.reader(file) header_row = next(reader) print(header_row) @@ -84,6 +84,6 @@ def build_table( ) # Input could possibly be multiple files. Create a list if so - input_files = glob.glob(input_file) + input_files = glob.glob(input_item) return SqlTable(table_name, columns, num_rows, input_files) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index c97f453d0..0bba3d723 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -85,7 +85,7 @@ class ScalarUDF: def __init__( self, - name: Optional[str], + name: str, func: Callable[..., _R], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: _R, @@ -182,7 +182,7 @@ class AggregateUDF: def __init__( self, - name: Optional[str], + name: str, accumulator: Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, @@ -277,6 +277,7 @@ def sum_bias_10() -> Summarize: ) if name is None: name = accum.__call__().__class__.__qualname__.lower() + assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] return AggregateUDF( @@ -462,7 +463,7 @@ class WindowUDF: def __init__( self, - name: Optional[str], + name: str, func: Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index b1a739b49..fca05bb8f 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -871,7 +871,22 @@ def test_temporal_functions(df): f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), f.extract(literal("day"), column("d")), + f.to_timestamp( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_seconds( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_millis( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_micros( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")), + f.to_timestamp_nanos( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), ) result = df.collect() assert len(result) == 1 @@ -913,6 +928,21 @@ def test_temporal_functions(df): assert result.column(11) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") ) + assert result.column(12) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + ) + assert result.column(13) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + ) + assert result.column(14) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + ) + assert result.column(15) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) + assert result.column(16) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) def test_arrow_cast(df): From acd70409f73f299a144e7ff4115c6e6035c3ffb5 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Sat, 8 Mar 2025 16:37:10 +0100 Subject: [PATCH 051/248] feat: reads using global ctx (#982) * feat: reads using global ctx * Add text to io methods to describe the context they are using --------- Co-authored-by: Tim Saucer --- python/datafusion/__init__.py | 5 + python/datafusion/io.py | 199 ++++++++++++++++++++++++++ python/tests/test_io.py | 95 ++++++++++++ python/tests/test_wrapper_coverage.py | 2 + src/context.rs | 12 +- src/utils.rs | 8 ++ 6 files changed, 319 insertions(+), 2 deletions(-) create mode 100644 python/datafusion/io.py create mode 100644 python/tests/test_io.py diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 85aefcce7..f11ce54a6 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -45,6 +45,7 @@ Expr, WindowFrame, ) +from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF @@ -81,6 +82,10 @@ "functions", "object_store", "substrait", + "read_parquet", + "read_avro", + "read_csv", + "read_json", ] diff --git a/python/datafusion/io.py b/python/datafusion/io.py new file mode 100644 index 000000000..7f3b77efa --- /dev/null +++ b/python/datafusion/io.py @@ -0,0 +1,199 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""IO read functions using global context.""" + +import pathlib + +import pyarrow + +from datafusion.dataframe import DataFrame +from datafusion.expr import Expr + +from ._internal import SessionContext as SessionContextInternal + + +def read_parquet( + path: str | pathlib.Path, + table_partition_cols: list[tuple[str, str]] | None = None, + parquet_pruning: bool = True, + file_extension: str = ".parquet", + skip_metadata: bool = True, + schema: pyarrow.Schema | None = None, + file_sort_order: list[list[Expr]] | None = None, +) -> DataFrame: + """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the Parquet file. + table_partition_cols: Partition columns. + parquet_pruning: Whether the parquet reader should use the predicate + to prune row groups. + file_extension: File extension; only files with this extension are + selected for data input. + skip_metadata: Whether the parquet reader should skip any metadata + that may be in the file schema. This can help avoid schema + conflicts due to metadata. + schema: An optional schema representing the parquet files. If None, + the parquet reader will try to infer it based on data in the + file. + file_sort_order: Sort order for the file. + + Returns: + DataFrame representation of the read Parquet files + """ + if table_partition_cols is None: + table_partition_cols = [] + return DataFrame( + SessionContextInternal._global_ctx().read_parquet( + str(path), + table_partition_cols, + parquet_pruning, + file_extension, + skip_metadata, + schema, + file_sort_order, + ) + ) + + +def read_json( + path: str | pathlib.Path, + schema: pyarrow.Schema | None = None, + schema_infer_max_records: int = 1000, + file_extension: str = ".json", + table_partition_cols: list[tuple[str, str]] | None = None, + file_compression_type: str | None = None, +) -> DataFrame: + """Read a line-delimited JSON data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the JSON file. + schema: The data source schema. + schema_infer_max_records: Maximum number of rows to read from JSON + files for schema inference if needed. + file_extension: File extension; only files with this extension are + selected for data input. + table_partition_cols: Partition columns. + file_compression_type: File compression type. + + Returns: + DataFrame representation of the read JSON files. + """ + if table_partition_cols is None: + table_partition_cols = [] + return DataFrame( + SessionContextInternal._global_ctx().read_json( + str(path), + schema, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, + ) + ) + + +def read_csv( + path: str | pathlib.Path | list[str] | list[pathlib.Path], + schema: pyarrow.Schema | None = None, + has_header: bool = True, + delimiter: str = ",", + schema_infer_max_records: int = 1000, + file_extension: str = ".csv", + table_partition_cols: list[tuple[str, str]] | None = None, + file_compression_type: str | None = None, +) -> DataFrame: + """Read a CSV data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the CSV file + schema: An optional schema representing the CSV files. If None, the + CSV reader will try to infer it based on data in file. + has_header: Whether the CSV file have a header. If schema inference + is run on a file with no headers, default column names are + created. + delimiter: An optional column delimiter. + schema_infer_max_records: Maximum number of rows to read from CSV + files for schema inference if needed. + file_extension: File extension; only files with this extension are + selected for data input. + table_partition_cols: Partition columns. + file_compression_type: File compression type. + + Returns: + DataFrame representation of the read CSV files + """ + if table_partition_cols is None: + table_partition_cols = [] + + path = [str(p) for p in path] if isinstance(path, list) else str(path) + + return DataFrame( + SessionContextInternal._global_ctx().read_csv( + path, + schema, + has_header, + delimiter, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, + ) + ) + + +def read_avro( + path: str | pathlib.Path, + schema: pyarrow.Schema | None = None, + file_partition_cols: list[tuple[str, str]] | None = None, + file_extension: str = ".avro", +) -> DataFrame: + """Create a :py:class:`DataFrame` for reading Avro data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the Avro file. + schema: The data source schema. + file_partition_cols: Partition columns. + file_extension: File extension to select. + + Returns: + DataFrame representation of the read Avro file + """ + if file_partition_cols is None: + file_partition_cols = [] + return DataFrame( + SessionContextInternal._global_ctx().read_avro( + str(path), schema, file_partition_cols, file_extension + ) + ) diff --git a/python/tests/test_io.py b/python/tests/test_io.py new file mode 100644 index 000000000..21ad188ee --- /dev/null +++ b/python/tests/test_io.py @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import pathlib + +import pyarrow as pa +from datafusion import column +from datafusion.io import read_avro, read_csv, read_json, read_parquet + + +def test_read_json_global_ctx(ctx): + path = os.path.dirname(os.path.abspath(__file__)) + + # Default + test_data_path = os.path.join(path, "data_test_context", "data.json") + df = read_json(test_data_path) + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].column(1) == pa.array([1, 2, 3]) + + # Schema + schema = pa.schema( + [ + pa.field("A", pa.string(), nullable=True), + ] + ) + df = read_json(test_data_path, schema=schema) + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].schema == schema + + # File extension + test_data_path = os.path.join(path, "data_test_context", "data.json") + df = read_json(test_data_path, file_extension=".json") + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].column(1) == pa.array([1, 2, 3]) + + +def test_read_parquet_global(): + parquet_df = read_parquet(path="parquet/data/alltypes_plain.parquet") + parquet_df.show() + assert parquet_df is not None + + path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet" + parquet_df = read_parquet(path=path) + assert parquet_df is not None + + +def test_read_csv(): + csv_df = read_csv(path="testing/data/csv/aggregate_test_100.csv") + csv_df.select(column("c1")).show() + + +def test_read_csv_list(): + csv_df = read_csv(path=["testing/data/csv/aggregate_test_100.csv"]) + expected = csv_df.count() * 2 + + double_csv_df = read_csv( + path=[ + "testing/data/csv/aggregate_test_100.csv", + "testing/data/csv/aggregate_test_100.csv", + ] + ) + actual = double_csv_df.count() + + double_csv_df.select(column("c1")).show() + assert actual == expected + + +def test_read_avro(): + avro_df = read_avro(path="testing/data/avro/alltypes_plain.avro") + avro_df.show() + assert avro_df is not None + + path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro" + avro_df = read_avro(path=path) + assert avro_df is not None diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index 86f2d57f2..ac064ba95 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -34,6 +34,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None: return for attr in dir(internal_obj): + if attr in ["_global_ctx"]: + continue assert attr in dir(wrapped_obj) internal_attr = getattr(internal_obj, attr) diff --git a/src/context.rs b/src/context.rs index 0f962638e..9ba87eb8a 100644 --- a/src/context.rs +++ b/src/context.rs @@ -44,7 +44,7 @@ use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; use crate::udf::PyScalarUDF; use crate::udwf::PyWindowUDF; -use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; +use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; @@ -69,7 +69,7 @@ use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; -use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; +use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType}; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -306,6 +306,14 @@ impl PySessionContext { }) } + #[classmethod] + #[pyo3(signature = ())] + fn _global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { + Ok(Self { + ctx: get_global_ctx().clone(), + }) + } + /// Register an object store with the given name #[pyo3(signature = (scheme, store, host=None))] pub fn register_object_store( diff --git a/src/utils.rs b/src/utils.rs index ed224b364..999aad755 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -17,6 +17,7 @@ use crate::errors::{PyDataFusionError, PyDataFusionResult}; use crate::TokioRuntime; +use datafusion::execution::context::SessionContext; use datafusion::logical_expr::Volatility; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -37,6 +38,13 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) } +/// Utility to get the Global Datafussion CTX +#[inline] +pub(crate) fn get_global_ctx() -> &'static SessionContext { + static CTX: OnceLock = OnceLock::new(); + CTX.get_or_init(|| SessionContext::new()) +} + /// Utility to collect rust futures with GIL released pub fn wait_for_future(py: Python, f: F) -> F::Output where From 973d7ec4a8196a78bc4fb32db4f24e523997ba4c Mon Sep 17 00:00:00 2001 From: Crystal Zhou <45134936+CrystalZhou0529@users.noreply.github.com> Date: Sat, 8 Mar 2025 16:23:54 -0500 Subject: [PATCH 052/248] feat: Implementation of udf and udaf decorator (#1040) * Implementation of udf and udaf decorator * Rename decorators back to udf and udaf, update documentations * Minor typo fixes * Fixing linting errors * ruff formatting --------- Co-authored-by: Tim Saucer --- python/datafusion/udf.py | 257 +++++++++++++++++++++++++++----------- python/tests/test_udaf.py | 42 +++++++ python/tests/test_udf.py | 42 ++++++- 3 files changed, 265 insertions(+), 76 deletions(-) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 0bba3d723..af7bcf2ed 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -19,6 +19,7 @@ from __future__ import annotations +import functools from abc import ABCMeta, abstractmethod from enum import Enum from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar @@ -110,43 +111,102 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udf.__call__(*args_raw)) - @staticmethod - def udf( - func: Callable[..., _R], - input_types: list[pyarrow.DataType], - return_type: _R, - volatility: Volatility | str, - name: Optional[str] = None, - ) -> ScalarUDF: - """Create a new User-Defined Function. + class udf: + """Create a new User-Defined Function (UDF). + + This class can be used both as a **function** and as a **decorator**. + + Usage: + - **As a function**: Call `udf(func, input_types, return_type, volatility, + name)`. + - **As a decorator**: Use `@udf(input_types, return_type, volatility, + name)`. In this case, do **not** pass `func` explicitly. Args: - func: A callable python function. - input_types: The data types of the arguments to ``func``. This list - must be of the same length as the number of arguments. - return_type: The data type of the return value from the python - function. - volatility: See ``Volatility`` for allowed values. - name: A descriptive name for the function. + func (Callable, optional): **Only needed when calling as a function.** + Skip this argument when using `udf` as a decorator. + input_types (list[pyarrow.DataType]): The data types of the arguments + to `func`. This list must be of the same length as the number of + arguments. + return_type (_R): The data type of the return value from the function. + volatility (Volatility | str): See `Volatility` for allowed values. + name (Optional[str]): A descriptive name for the function. Returns: - A user-defined aggregate function, which can be used in either data - aggregation or window function calls. + A user-defined function that can be used in SQL expressions, + data aggregation, or window function calls. + + Example: + **Using `udf` as a function:** + ``` + def double_func(x): + return x * 2 + double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(), + "volatile", "double_it") + ``` + + **Using `udf` as a decorator:** + ``` + @udf([pyarrow.int32()], pyarrow.int32(), "volatile", "double_it") + def double_udf(x): + return x * 2 + ``` """ - if not callable(func): - raise TypeError("`func` argument must be callable") - if name is None: - if hasattr(func, "__qualname__"): - name = func.__qualname__.lower() + + def __new__(cls, *args, **kwargs): + """Create a new UDF. + + Trigger UDF function or decorator depending on if the first args is callable + """ + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return cls._function(*args, **kwargs) else: - name = func.__class__.__name__.lower() - return ScalarUDF( - name=name, - func=func, - input_types=input_types, - return_type=return_type, - volatility=volatility, - ) + # Case 2: Used as a decorator with parameters + return cls._decorator(*args, **kwargs) + + @staticmethod + def _function( + func: Callable[..., _R], + input_types: list[pyarrow.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> ScalarUDF: + if not callable(func): + raise TypeError("`func` argument must be callable") + if name is None: + if hasattr(func, "__qualname__"): + name = func.__qualname__.lower() + else: + name = func.__class__.__name__.lower() + return ScalarUDF( + name=name, + func=func, + input_types=input_types, + return_type=return_type, + volatility=volatility, + ) + + @staticmethod + def _decorator( + input_types: list[pyarrow.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ): + def decorator(func): + udf_caller = ScalarUDF.udf( + func, input_types, return_type, volatility, name + ) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + return udf_caller(*args, **kwargs) + + return wrapper + + return decorator class Accumulator(metaclass=ABCMeta): @@ -212,25 +272,27 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udaf.__call__(*args_raw)) - @staticmethod - def udaf( - accum: Callable[[], Accumulator], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], - volatility: Volatility | str, - name: Optional[str] = None, - ) -> AggregateUDF: - """Create a new User-Defined Aggregate Function. + class udaf: + """Create a new User-Defined Aggregate Function (UDAF). - If your :py:class:`Accumulator` can be instantiated with no arguments, you - can simply pass it's type as ``accum``. If you need to pass additional arguments - to it's constructor, you can define a lambda or a factory method. During runtime - the :py:class:`Accumulator` will be constructed for every instance in - which this UDAF is used. The following examples are all valid. + This class allows you to define an **aggregate function** that can be used in + data aggregation or window function calls. - .. code-block:: python + Usage: + - **As a function**: Call `udaf(accum, input_types, return_type, state_type, + volatility, name)`. + - **As a decorator**: Use `@udaf(input_types, return_type, state_type, + volatility, name)`. + When using `udaf` as a decorator, **do not pass `accum` explicitly**. + **Function example:** + + If your `:py:class:Accumulator` can be instantiated with no arguments, you + can simply pass it's type as `accum`. If you need to pass additional + arguments to it's constructor, you can define a lambda or a factory method. + During runtime the `:py:class:Accumulator` will be constructed for every + instance in which this UDAF is used. The following examples are all valid. + ``` import pyarrow as pa import pyarrow.compute as pc @@ -253,12 +315,24 @@ def evaluate(self) -> pa.Scalar: def sum_bias_10() -> Summarize: return Summarize(10.0) - udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], "immutable") - udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], "immutable") - udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), [pa.float64()], "immutable") + udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), + [pa.float64()], "immutable") + ``` + + **Decorator example:** + ``` + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def udf4() -> Summarize: + return Summarize(10.0) + ``` Args: - accum: The accumulator python function. + accum: The accumulator python function. **Only needed when calling as a + function. Skip this argument when using `udaf` as a decorator.** input_types: The data types of the arguments to ``accum``. return_type: The data type of the return value. state_type: The data types of the intermediate accumulation. @@ -268,26 +342,69 @@ def sum_bias_10() -> Summarize: Returns: A user-defined aggregate function, which can be used in either data aggregation or window function calls. - """ # noqa W505 - if not callable(accum): - raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): - raise TypeError( - "Accumulator must implement the abstract base class Accumulator" + """ + + def __new__(cls, *args, **kwargs): + """Create a new UDAF. + + Trigger UDAF function or decorator depending on if the first args is + callable + """ + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return cls._function(*args, **kwargs) + else: + # Case 2: Used as a decorator with parameters + return cls._decorator(*args, **kwargs) + + @staticmethod + def _function( + accum: Callable[[], Accumulator], + input_types: pyarrow.DataType | list[pyarrow.DataType], + return_type: pyarrow.DataType, + state_type: list[pyarrow.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ) -> AggregateUDF: + if not callable(accum): + raise TypeError("`func` must be callable.") + if not isinstance(accum.__call__(), Accumulator): + raise TypeError( + "Accumulator must implement the abstract base class Accumulator" + ) + if name is None: + name = accum.__call__().__class__.__qualname__.lower() + if isinstance(input_types, pyarrow.DataType): + input_types = [input_types] + return AggregateUDF( + name=name, + accumulator=accum, + input_types=input_types, + return_type=return_type, + state_type=state_type, + volatility=volatility, ) - if name is None: - name = accum.__call__().__class__.__qualname__.lower() - assert name is not None - if isinstance(input_types, pyarrow.DataType): - input_types = [input_types] - return AggregateUDF( - name=name, - accumulator=accum, - input_types=input_types, - return_type=return_type, - state_type=state_type, - volatility=volatility, - ) + + @staticmethod + def _decorator( + input_types: pyarrow.DataType | list[pyarrow.DataType], + return_type: pyarrow.DataType, + state_type: list[pyarrow.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ): + def decorator(accum: Callable[[], Accumulator]): + udaf_caller = AggregateUDF.udaf( + accum, input_types, return_type, state_type, volatility, name + ) + + @functools.wraps(accum) + def wrapper(*args, **kwargs): + return udaf_caller(*args, **kwargs) + + return wrapper + + return decorator class WindowEvaluator(metaclass=ABCMeta): diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 0005a3da8..e69c77d3c 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -117,6 +117,26 @@ def test_udaf_aggregate(df): assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) +def test_udaf_decorator_aggregate(df): + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def summarize(): + return Summarize() + + df1 = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df1.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + df2 = df.aggregate([], [summarize(column("a"))]) + + # Run a second time to ensure the state is properly reset + result = df2.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + def test_udaf_aggregate_with_arguments(df): bias = 10.0 @@ -143,6 +163,28 @@ def test_udaf_aggregate_with_arguments(df): assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0]) +def test_udaf_decorator_aggregate_with_arguments(df): + bias = 10.0 + + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def summarize(): + return Summarize(bias) + + df1 = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df1.collect()[0] + + assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0]) + + df2 = df.aggregate([], [summarize(column("a"))]) + + # Run a second time to ensure the state is properly reset + result = df2.collect()[0] + + assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0]) + + def test_group_by(df): summarize = udaf( Summarize, diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index 3a5dce6d6..a6c047552 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -24,7 +24,7 @@ def df(ctx): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + [pa.array([1, 2, 3]), pa.array([4, 4, None])], names=["a", "b"], ) return ctx.create_dataframe([[batch]], name="test_table") @@ -39,10 +39,20 @@ def test_udf(df): volatility="immutable", ) - df = df.select(is_null(column("a"))) + df = df.select(is_null(column("b"))) result = df.collect()[0].column(0) - assert result == pa.array([False, False, False]) + assert result == pa.array([False, False, True]) + + +def test_udf_decorator(df): + @udf([pa.int64()], pa.bool_(), "immutable") + def is_null(x: pa.Array) -> pa.Array: + return x.is_null() + + df = df.select(is_null(column("b"))) + result = df.collect()[0].column(0) + assert result == pa.array([False, False, True]) def test_register_udf(ctx, df) -> None: @@ -56,10 +66,10 @@ def test_register_udf(ctx, df) -> None: ctx.register_udf(is_null) - df_result = ctx.sql("select is_null(a) from test_table") + df_result = ctx.sql("select is_null(b) from test_table") result = df_result.collect()[0].column(0) - assert result == pa.array([False, False, False]) + assert result == pa.array([False, False, True]) class OverThresholdUDF: @@ -70,7 +80,7 @@ def __call__(self, values: pa.Array) -> pa.Array: return pa.array(v.as_py() >= self.threshold for v in values) -def test_udf_with_parameters(df) -> None: +def test_udf_with_parameters_function(df) -> None: udf_no_param = udf( OverThresholdUDF(), pa.int64(), @@ -94,3 +104,23 @@ def test_udf_with_parameters(df) -> None: result = df2.collect()[0].column(0) assert result == pa.array([False, True, True]) + + +def test_udf_with_parameters_decorator(df) -> None: + @udf([pa.int64()], pa.bool_(), "immutable") + def udf_no_param(values: pa.Array) -> pa.Array: + return OverThresholdUDF()(values) + + df1 = df.select(udf_no_param(column("a"))) + result = df1.collect()[0].column(0) + + assert result == pa.array([True, True, True]) + + @udf([pa.int64()], pa.bool_(), "immutable") + def udf_with_param(values: pa.Array) -> pa.Array: + return OverThresholdUDF(2)(values) + + df2 = df.select(udf_with_param(column("a"))) + result = df2.collect()[0].column(0) + + assert result == pa.array([False, True, True]) From d72f5605b3d523585d04857505793920f96242ba Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 10 Mar 2025 06:56:12 -0400 Subject: [PATCH 053/248] Enable FA ruff lint (#1052) --- examples/python-udwf.py | 2 ++ pyproject.toml | 2 +- python/datafusion/io.py | 2 ++ python/tests/test_udaf.py | 2 ++ python/tests/test_udwf.py | 2 ++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 32f8fadaa..7d39dc1b8 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import datafusion import pyarrow as pa from datafusion import col, lit, udwf diff --git a/pyproject.toml b/pyproject.toml index f416e02a5..d16a18aa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "D", "W", "I"] +select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"] [tool.ruff.lint.pydocstyle] convention = "google" diff --git a/python/datafusion/io.py b/python/datafusion/io.py index 7f3b77efa..3b6264948 100644 --- a/python/datafusion/io.py +++ b/python/datafusion/io.py @@ -17,6 +17,8 @@ """IO read functions using global context.""" +from __future__ import annotations + import pathlib import pyarrow diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index e69c77d3c..97cf81f3c 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + from typing import List import pyarrow as pa diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 0ffa04179..2fea34aa3 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import pyarrow as pa import pytest from datafusion import SessionContext, column, lit, udwf From 0002372ccdb780e011631c797ec9613174cf0a94 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 10 Mar 2025 14:22:42 -0400 Subject: [PATCH 054/248] Enable take comments to assign issues to users (#1058) --- .github/workflows/take.yml | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/take.yml diff --git a/.github/workflows/take.yml b/.github/workflows/take.yml new file mode 100644 index 000000000..86dc190ad --- /dev/null +++ b/.github/workflows/take.yml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Assign the issue via a `take` comment +on: + issue_comment: + types: created + +permissions: + issues: write + +jobs: + issue_assign: + runs-on: ubuntu-latest + if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' + concurrency: + group: ${{ github.actor }}-issue-assign + steps: + - run: | + CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s) + if [ "$CODE" -eq "204" ] + then + echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" + curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees + else + echo "Cannot assign issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" + fi \ No newline at end of file From 9d634de6df2f8b76bd303ab1f5972f01deb2210d Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 10 Mar 2025 14:24:40 -0400 Subject: [PATCH 055/248] Update python min version to 3.9 (#1043) * 3.8 -> 3.9 * upgrade pyo3 abi3-py38 -> abi3-py39 --- Cargo.toml | 2 +- .../source/contributor-guide/introduction.rst | 2 +- examples/ffi-table-provider/Cargo.lock | 75 +- examples/ffi-table-provider/Cargo.toml | 2 +- examples/ffi-table-provider/pyproject.toml | 2 +- pyproject.toml | 3 +- uv.lock | 707 ++---------------- 7 files changed, 121 insertions(+), 672 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5358b1836..50967a219 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } -pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} arrow = { version = "54", features = ["pyarrow"] } datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index 25f2c21a4..2fba64111 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -118,7 +118,7 @@ be ignored by ``git``. .. code-block:: implementation=CPython - version=3.8 + version=3.9 shared=true abi3=true lib_name=python3.12 diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock index 32af85180..8d0edd515 100644 --- a/examples/ffi-table-provider/Cargo.lock +++ b/examples/ffi-table-provider/Cargo.lock @@ -766,7 +766,8 @@ dependencies = [ [[package]] name = "datafusion" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" dependencies = [ "arrow", "arrow-array", @@ -816,7 +817,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" dependencies = [ "arrow", "async-trait", @@ -836,7 +838,8 @@ dependencies = [ [[package]] name = "datafusion-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" dependencies = [ "ahash", "arrow", @@ -862,7 +865,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" dependencies = [ "log", "tokio", @@ -871,12 +875,14 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" [[package]] name = "datafusion-execution" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" dependencies = [ "arrow", "dashmap", @@ -894,7 +900,8 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" dependencies = [ "arrow", "chrono", @@ -914,7 +921,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" dependencies = [ "arrow", "datafusion-common", @@ -925,7 +933,8 @@ dependencies = [ [[package]] name = "datafusion-ffi" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" dependencies = [ "abi_stable", "arrow", @@ -945,7 +954,8 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" dependencies = [ "arrow", "arrow-buffer", @@ -974,7 +984,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" dependencies = [ "ahash", "arrow", @@ -996,7 +1007,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" dependencies = [ "ahash", "arrow", @@ -1008,7 +1020,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" dependencies = [ "arrow", "arrow-array", @@ -1031,7 +1044,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" dependencies = [ "arrow", "async-trait", @@ -1046,7 +1060,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1062,7 +1077,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1071,7 +1087,8 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" dependencies = [ "datafusion-expr", "quote", @@ -1081,7 +1098,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" dependencies = [ "arrow", "chrono", @@ -1099,7 +1117,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" dependencies = [ "ahash", "arrow", @@ -1123,7 +1142,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" dependencies = [ "ahash", "arrow", @@ -1137,7 +1157,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" dependencies = [ "arrow", "arrow-schema", @@ -1158,7 +1179,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" dependencies = [ "ahash", "arrow", @@ -1189,7 +1211,8 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" dependencies = [ "arrow", "chrono", @@ -1204,7 +1227,8 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" dependencies = [ "arrow", "datafusion-common", @@ -1214,7 +1238,8 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" dependencies = [ "arrow", "arrow-array", diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml index 0e558fdd0..f4e4fda79 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-table-provider/Cargo.toml @@ -23,7 +23,7 @@ edition = "2021" [dependencies] datafusion = { version = "45.0.0" } datafusion-ffi = { version = "45.0.0" } -pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } arrow = { version = "54" } arrow-array = { version = "54" } arrow-schema = { version = "54" } diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-table-provider/pyproject.toml index 116efae9c..9cd25b423 100644 --- a/examples/ffi-table-provider/pyproject.toml +++ b/examples/ffi-table-provider/pyproject.toml @@ -21,7 +21,7 @@ build-backend = "maturin" [project] name = "ffi_table_provider" -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/pyproject.toml b/pyproject.toml index d16a18aa6..1c2733677 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ name = "datafusion" description = "Build and run queries against data" readme = "README.md" license = { file = "LICENSE.txt" } -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = ["datafusion", "dataframe", "rust", "query-engine"] classifiers = [ "Development Status :: 2 - Pre-Alpha", @@ -35,7 +35,6 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/uv.lock b/uv.lock index 587ddc8b7..619b92856 100644 --- a/uv.lock +++ b/uv.lock @@ -1,23 +1,10 @@ version = 1 -requires-python = ">=3.8" +requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.12'", "python_full_version == '3.11.*'", "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", - "python_full_version < '3.9'", -] - -[[package]] -name = "alabaster" -version = "0.7.13" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/94/71/a8ee96d1fd95ca04a0d2e2d9c4081dac4c2d2b12f7ddb899c8cb9bfd1532/alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2", size = 11454 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3", size = 13857 }, + "python_full_version < '3.10'", ] [[package]] @@ -25,7 +12,7 @@ name = "alabaster" version = "0.7.16" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776 } wheels = [ @@ -46,42 +33,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, ] -[[package]] -name = "appnope" -version = "0.1.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, -] - -[[package]] -name = "astroid" -version = "3.2.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/53/1067e1113ecaf58312357f2cd93063674924119d80d173adc3f6f2387aa2/astroid-3.2.4.tar.gz", hash = "sha256:0e14202810b30da1b735827f78f5157be2bbd4a7a59b7707ca0bfc2fb4c0063a", size = 397576 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/80/96/b32bbbb46170a1c8b8b1f28c794202e25cfe743565e9d3469b8eb1e0cc05/astroid-3.2.4-py3-none-any.whl", hash = "sha256:413658a61eeca6202a59231abb473f932038fbcbf1666587f66d482083413a25", size = 276348 }, -] - [[package]] name = "astroid" version = "3.3.8" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] dependencies = [ - { name = "typing-extensions", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196 } wheels = [ @@ -101,23 +58,11 @@ wheels = [ name = "babel" version = "2.16.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pytz", marker = "python_full_version < '3.9'" }, -] sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 } wheels = [ { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, ] -[[package]] -name = "backcall" -version = "0.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/40/764a663805d84deee23043e1426a9175567db89c8b3287b5c2ad9f71aa93/backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", size = 18041 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/1c/ff6546b6c12603d8dd1070aa3c3d273ad4c07f5771689a7b69a550e8c951/backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255", size = 11157 }, -] - [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -194,14 +139,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, - { url = "https://files.pythonhosted.org/packages/48/08/15bf6b43ae9bd06f6b00ad8a91f5a8fe1069d4c9fab550a866755402724e/cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", size = 182457 }, - { url = "https://files.pythonhosted.org/packages/c2/5b/f1523dd545f92f7df468e5f653ffa4df30ac222f3c884e51e139878f1cb5/cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", size = 425932 }, - { url = "https://files.pythonhosted.org/packages/53/93/7e547ab4105969cc8c93b38a667b82a835dd2cc78f3a7dad6130cfd41e1d/cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", size = 448585 }, - { url = "https://files.pythonhosted.org/packages/56/c4/a308f2c332006206bb511de219efeff090e9d63529ba0a77aae72e82248b/cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", size = 456268 }, - { url = "https://files.pythonhosted.org/packages/ca/5b/b63681518265f2f4060d2b60755c1c77ec89e5e045fc3773b72735ddaad5/cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", size = 436592 }, - { url = "https://files.pythonhosted.org/packages/bb/19/b51af9f4a4faa4a8ac5a0e5d5c2522dcd9703d07fac69da34a36c4d960d3/cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", size = 446512 }, - { url = "https://files.pythonhosted.org/packages/e2/63/2bed8323890cb613bbecda807688a31ed11a7fe7afe31f8faaae0206a9a3/cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", size = 171576 }, - { url = "https://files.pythonhosted.org/packages/2f/70/80c33b044ebc79527447fd4fbc5455d514c3bb840dede4455de97da39b4d/cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", size = 181229 }, { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220 }, { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605 }, { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910 }, @@ -274,19 +211,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, - { url = "https://files.pythonhosted.org/packages/10/bd/6517ea94f2672e801011d50b5d06be2a0deaf566aea27bcdcd47e5195357/charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c", size = 195653 }, - { url = "https://files.pythonhosted.org/packages/e5/0d/815a2ba3f283b4eeaa5ece57acade365c5b4135f65a807a083c818716582/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9", size = 140701 }, - { url = "https://files.pythonhosted.org/packages/aa/17/c94be7ee0d142687e047fe1de72060f6d6837f40eedc26e87e6e124a3fc6/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8", size = 150495 }, - { url = "https://files.pythonhosted.org/packages/f7/33/557ac796c47165fc141e4fb71d7b0310f67e05cb420756f3a82e0a0068e0/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6", size = 142946 }, - { url = "https://files.pythonhosted.org/packages/1e/0d/38ef4ae41e9248d63fc4998d933cae22473b1b2ac4122cf908d0f5eb32aa/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c", size = 144737 }, - { url = "https://files.pythonhosted.org/packages/43/01/754cdb29dd0560f58290aaaa284d43eea343ad0512e6ad3b8b5c11f08592/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a", size = 147471 }, - { url = "https://files.pythonhosted.org/packages/ba/cd/861883ba5160c7a9bd242c30b2c71074cda2aefcc0addc91118e0d4e0765/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd", size = 140801 }, - { url = "https://files.pythonhosted.org/packages/6f/7f/0c0dad447819e90b93f8ed238cc8f11b91353c23c19e70fa80483a155bed/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd", size = 149312 }, - { url = "https://files.pythonhosted.org/packages/8e/09/9f8abcc6fff60fb727268b63c376c8c79cc37b833c2dfe1f535dfb59523b/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824", size = 152347 }, - { url = "https://files.pythonhosted.org/packages/be/e5/3f363dad2e24378f88ccf63ecc39e817c29f32e308ef21a7a6d9c1201165/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca", size = 149888 }, - { url = "https://files.pythonhosted.org/packages/e4/10/a78c0e91f487b4ad0ef7480ac765e15b774f83de2597f1b6ef0eaf7a2f99/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b", size = 145169 }, - { url = "https://files.pythonhosted.org/packages/d3/81/396e7d7f5d7420da8273c91175d2e9a3f569288e3611d521685e4b9ac9cc/charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e", size = 95094 }, - { url = "https://files.pythonhosted.org/packages/40/bb/20affbbd9ea29c71ea123769dc568a6d42052ff5089c5fe23e21e21084a6/charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4", size = 102139 }, { url = "https://files.pythonhosted.org/packages/7f/c0/b913f8f02836ed9ab32ea643c6fe4d3325c3d8627cf6e78098671cafff86/charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", size = 197867 }, { url = "https://files.pythonhosted.org/packages/0f/6c/2bee440303d705b6fb1e2ec789543edec83d32d258299b16eed28aad48e0/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", size = 141385 }, { url = "https://files.pythonhosted.org/packages/3d/04/cb42585f07f6f9fd3219ffb6f37d5a39b4fd2db2355b23683060029c35f7/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", size = 151367 }, @@ -351,11 +275,9 @@ wheels = [ [[package]] name = "datafusion" -version = "44.0.0" source = { editable = "." } dependencies = [ - { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "pyarrow", version = "18.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pyarrow" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] @@ -369,20 +291,16 @@ dev = [ { name = "toml" }, ] docs = [ - { name = "ipython", version = "8.12.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "ipython", version = "8.31.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "jinja2" }, { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "myst-parser", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "pandas", version = "2.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "pandas", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pandas" }, { name = "pickleshare" }, { name = "pydata-sphinx-theme" }, - { name = "setuptools", version = "75.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "setuptools", version = "75.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "setuptools" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "sphinx-autoapi" }, ] @@ -435,28 +353,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, ] -[[package]] -name = "docutils" -version = "0.20.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666 }, -] - [[package]] name = "docutils" version = "0.21.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 } wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, @@ -503,8 +403,7 @@ name = "importlib-metadata" version = "8.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp", version = "3.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "zipp", version = "3.21.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "zipp", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 } wheels = [ @@ -520,52 +419,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] -[[package]] -name = "ipython" -version = "8.12.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "appnope", marker = "python_full_version < '3.9' and sys_platform == 'darwin'" }, - { name = "backcall", marker = "python_full_version < '3.9'" }, - { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version < '3.9'" }, - { name = "jedi", marker = "python_full_version < '3.9'" }, - { name = "matplotlib-inline", marker = "python_full_version < '3.9'" }, - { name = "pexpect", marker = "python_full_version < '3.9' and sys_platform != 'win32'" }, - { name = "pickleshare", marker = "python_full_version < '3.9'" }, - { name = "prompt-toolkit", marker = "python_full_version < '3.9'" }, - { name = "pygments", marker = "python_full_version < '3.9'" }, - { name = "stack-data", marker = "python_full_version < '3.9'" }, - { name = "traitlets", marker = "python_full_version < '3.9'" }, - { name = "typing-extensions", marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/6a/44ef299b1762f5a73841e87fae8a73a8cc8aee538d6dc8c77a5afe1fd2ce/ipython-8.12.3.tar.gz", hash = "sha256:3910c4b54543c2ad73d06579aa771041b7d5707b033bd488669b4cf544e3b363", size = 5470171 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/97/8fe103906cd81bc42d3b0175b5534a9f67dccae47d6451131cf8d0d70bb2/ipython-8.12.3-py3-none-any.whl", hash = "sha256:b0340d46a933d27c657b211a329d0be23793c36595acf9e6ef4164bc01a1804c", size = 798307 }, -] - [[package]] name = "ipython" version = "8.18.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] dependencies = [ - { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version == '3.9.*'" }, - { name = "exceptiongroup", marker = "python_full_version == '3.9.*'" }, - { name = "jedi", marker = "python_full_version == '3.9.*'" }, - { name = "matplotlib-inline", marker = "python_full_version == '3.9.*'" }, - { name = "pexpect", marker = "python_full_version == '3.9.*' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version == '3.9.*'" }, - { name = "pygments", marker = "python_full_version == '3.9.*'" }, - { name = "stack-data", marker = "python_full_version == '3.9.*'" }, - { name = "traitlets", marker = "python_full_version == '3.9.*'" }, - { name = "typing-extensions", marker = "python_full_version == '3.9.*'" }, + { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version < '3.10'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.10'" }, + { name = "jedi", marker = "python_full_version < '3.10'" }, + { name = "matplotlib-inline", marker = "python_full_version < '3.10'" }, + { name = "pexpect", marker = "python_full_version < '3.10' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version < '3.10'" }, + { name = "pygments", marker = "python_full_version < '3.10'" }, + { name = "stack-data", marker = "python_full_version < '3.10'" }, + { name = "traitlets", marker = "python_full_version < '3.10'" }, + { name = "typing-extensions", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b1/b9/3ba6c45a6df813c09a48bac313c22ff83efa26cbb55011218d925a46e2ad/ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", size = 5486330 } wheels = [ @@ -616,8 +488,7 @@ name = "jinja2" version = "3.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "markupsafe", version = "2.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "markupsafe", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "markupsafe" }, ] sdist = { url = "https://files.pythonhosted.org/packages/af/92/b3130cbbf5591acf9ade8708c365f3238046ac7cb8ccba6e81abccb0ccff/jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", size = 244674 } wheels = [ @@ -636,77 +507,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, ] -[[package]] -name = "markupsafe" -version = "2.1.5" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/87/5b/aae44c6655f3801e81aa3eef09dbbf012431987ba564d7231722f68df02d/MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", size = 19384 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e4/54/ad5eb37bf9d51800010a74e4665425831a9db4e7c4e0fde4352e391e808e/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", size = 18206 }, - { url = "https://files.pythonhosted.org/packages/6a/4a/a4d49415e600bacae038c67f9fecc1d5433b9d3c71a4de6f33537b89654c/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", size = 14079 }, - { url = "https://files.pythonhosted.org/packages/0a/7b/85681ae3c33c385b10ac0f8dd025c30af83c78cec1c37a6aa3b55e67f5ec/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", size = 26620 }, - { url = "https://files.pythonhosted.org/packages/7c/52/2b1b570f6b8b803cef5ac28fdf78c0da318916c7d2fe9402a84d591b394c/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", size = 25818 }, - { url = "https://files.pythonhosted.org/packages/29/fe/a36ba8c7ca55621620b2d7c585313efd10729e63ef81e4e61f52330da781/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", size = 25493 }, - { url = "https://files.pythonhosted.org/packages/60/ae/9c60231cdfda003434e8bd27282b1f4e197ad5a710c14bee8bea8a9ca4f0/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", size = 30630 }, - { url = "https://files.pythonhosted.org/packages/65/dc/1510be4d179869f5dafe071aecb3f1f41b45d37c02329dfba01ff59e5ac5/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", size = 29745 }, - { url = "https://files.pythonhosted.org/packages/30/39/8d845dd7d0b0613d86e0ef89549bfb5f61ed781f59af45fc96496e897f3a/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", size = 30021 }, - { url = "https://files.pythonhosted.org/packages/c7/5c/356a6f62e4f3c5fbf2602b4771376af22a3b16efa74eb8716fb4e328e01e/MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", size = 16659 }, - { url = "https://files.pythonhosted.org/packages/69/48/acbf292615c65f0604a0c6fc402ce6d8c991276e16c80c46a8f758fbd30c/MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", size = 17213 }, - { url = "https://files.pythonhosted.org/packages/11/e7/291e55127bb2ae67c64d66cef01432b5933859dfb7d6949daa721b89d0b3/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", size = 18219 }, - { url = "https://files.pythonhosted.org/packages/6b/cb/aed7a284c00dfa7c0682d14df85ad4955a350a21d2e3b06d8240497359bf/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", size = 14098 }, - { url = "https://files.pythonhosted.org/packages/1c/cf/35fe557e53709e93feb65575c93927942087e9b97213eabc3fe9d5b25a55/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", size = 29014 }, - { url = "https://files.pythonhosted.org/packages/97/18/c30da5e7a0e7f4603abfc6780574131221d9148f323752c2755d48abad30/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", size = 28220 }, - { url = "https://files.pythonhosted.org/packages/0c/40/2e73e7d532d030b1e41180807a80d564eda53babaf04d65e15c1cf897e40/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", size = 27756 }, - { url = "https://files.pythonhosted.org/packages/18/46/5dca760547e8c59c5311b332f70605d24c99d1303dd9a6e1fc3ed0d73561/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", size = 33988 }, - { url = "https://files.pythonhosted.org/packages/6d/c5/27febe918ac36397919cd4a67d5579cbbfa8da027fa1238af6285bb368ea/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", size = 32718 }, - { url = "https://files.pythonhosted.org/packages/f8/81/56e567126a2c2bc2684d6391332e357589a96a76cb9f8e5052d85cb0ead8/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", size = 33317 }, - { url = "https://files.pythonhosted.org/packages/00/0b/23f4b2470accb53285c613a3ab9ec19dc944eaf53592cb6d9e2af8aa24cc/MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", size = 16670 }, - { url = "https://files.pythonhosted.org/packages/b7/a2/c78a06a9ec6d04b3445a949615c4c7ed86a0b2eb68e44e7541b9d57067cc/MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", size = 17224 }, - { url = "https://files.pythonhosted.org/packages/53/bd/583bf3e4c8d6a321938c13f49d44024dbe5ed63e0a7ba127e454a66da974/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", size = 18215 }, - { url = "https://files.pythonhosted.org/packages/48/d6/e7cd795fc710292c3af3a06d80868ce4b02bfbbf370b7cee11d282815a2a/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", size = 14069 }, - { url = "https://files.pythonhosted.org/packages/51/b5/5d8ec796e2a08fc814a2c7d2584b55f889a55cf17dd1a90f2beb70744e5c/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", size = 29452 }, - { url = "https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", size = 28462 }, - { url = "https://files.pythonhosted.org/packages/2d/75/fd6cb2e68780f72d47e6671840ca517bda5ef663d30ada7616b0462ad1e3/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", size = 27869 }, - { url = "https://files.pythonhosted.org/packages/b0/81/147c477391c2750e8fc7705829f7351cf1cd3be64406edcf900dc633feb2/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", size = 33906 }, - { url = "https://files.pythonhosted.org/packages/8b/ff/9a52b71839d7a256b563e85d11050e307121000dcebc97df120176b3ad93/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", size = 32296 }, - { url = "https://files.pythonhosted.org/packages/88/07/2dc76aa51b481eb96a4c3198894f38b480490e834479611a4053fbf08623/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", size = 33038 }, - { url = "https://files.pythonhosted.org/packages/96/0c/620c1fb3661858c0e37eb3cbffd8c6f732a67cd97296f725789679801b31/MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", size = 16572 }, - { url = "https://files.pythonhosted.org/packages/3f/14/c3554d512d5f9100a95e737502f4a2323a1959f6d0d01e0d0997b35f7b10/MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", size = 17127 }, - { url = "https://files.pythonhosted.org/packages/f8/ff/2c942a82c35a49df5de3a630ce0a8456ac2969691b230e530ac12314364c/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", size = 18192 }, - { url = "https://files.pythonhosted.org/packages/4f/14/6f294b9c4f969d0c801a4615e221c1e084722ea6114ab2114189c5b8cbe0/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", size = 14072 }, - { url = "https://files.pythonhosted.org/packages/81/d4/fd74714ed30a1dedd0b82427c02fa4deec64f173831ec716da11c51a50aa/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", size = 26928 }, - { url = "https://files.pythonhosted.org/packages/c7/bd/50319665ce81bb10e90d1cf76f9e1aa269ea6f7fa30ab4521f14d122a3df/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", size = 26106 }, - { url = "https://files.pythonhosted.org/packages/4c/6f/f2b0f675635b05f6afd5ea03c094557bdb8622fa8e673387444fe8d8e787/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68", size = 25781 }, - { url = "https://files.pythonhosted.org/packages/51/e0/393467cf899b34a9d3678e78961c2c8cdf49fb902a959ba54ece01273fb1/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", size = 30518 }, - { url = "https://files.pythonhosted.org/packages/f6/02/5437e2ad33047290dafced9df741d9efc3e716b75583bbd73a9984f1b6f7/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", size = 29669 }, - { url = "https://files.pythonhosted.org/packages/0e/7d/968284145ffd9d726183ed6237c77938c021abacde4e073020f920e060b2/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", size = 29933 }, - { url = "https://files.pythonhosted.org/packages/bf/f3/ecb00fc8ab02b7beae8699f34db9357ae49d9f21d4d3de6f305f34fa949e/MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", size = 16656 }, - { url = "https://files.pythonhosted.org/packages/92/21/357205f03514a49b293e214ac39de01fadd0970a6e05e4bf1ddd0ffd0881/MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", size = 17206 }, - { url = "https://files.pythonhosted.org/packages/0f/31/780bb297db036ba7b7bbede5e1d7f1e14d704ad4beb3ce53fb495d22bc62/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", size = 18193 }, - { url = "https://files.pythonhosted.org/packages/6c/77/d77701bbef72892affe060cdacb7a2ed7fd68dae3b477a8642f15ad3b132/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", size = 14073 }, - { url = "https://files.pythonhosted.org/packages/d9/a7/1e558b4f78454c8a3a0199292d96159eb4d091f983bc35ef258314fe7269/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", size = 26486 }, - { url = "https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", size = 25685 }, - { url = "https://files.pythonhosted.org/packages/6a/18/ae5a258e3401f9b8312f92b028c54d7026a97ec3ab20bfaddbdfa7d8cce8/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", size = 25338 }, - { url = "https://files.pythonhosted.org/packages/0b/cc/48206bd61c5b9d0129f4d75243b156929b04c94c09041321456fd06a876d/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", size = 30439 }, - { url = "https://files.pythonhosted.org/packages/d1/06/a41c112ab9ffdeeb5f77bc3e331fdadf97fa65e52e44ba31880f4e7f983c/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", size = 29531 }, - { url = "https://files.pythonhosted.org/packages/02/8c/ab9a463301a50dab04d5472e998acbd4080597abc048166ded5c7aa768c8/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", size = 29823 }, - { url = "https://files.pythonhosted.org/packages/bc/29/9bc18da763496b055d8e98ce476c8e718dcfd78157e17f555ce6dd7d0895/MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", size = 16658 }, - { url = "https://files.pythonhosted.org/packages/f6/f8/4da07de16f10551ca1f640c92b5f316f9394088b183c6a57183df6de5ae4/MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", size = 17211 }, -] - [[package]] name = "markupsafe" version = "3.0.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } wheels = [ { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 }, @@ -832,18 +636,15 @@ name = "myst-parser" version = "3.0.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", - "python_full_version < '3.9'", + "python_full_version < '3.10'", ] dependencies = [ - { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "docutils", marker = "python_full_version < '3.10'" }, { name = "jinja2", marker = "python_full_version < '3.10'" }, { name = "markdown-it-py", marker = "python_full_version < '3.10'" }, { name = "mdit-py-plugins", marker = "python_full_version < '3.10'" }, { name = "pyyaml", marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/49/64/e2f13dac02f599980798c01156393b781aec983b52a6e4057ee58f07c43a/myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87", size = 92392 } wheels = [ @@ -860,7 +661,7 @@ resolution-markers = [ "python_full_version == '3.10.*'", ] dependencies = [ - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "docutils", marker = "python_full_version >= '3.10'" }, { name = "jinja2", marker = "python_full_version >= '3.10'" }, { name = "markdown-it-py", marker = "python_full_version >= '3.10'" }, { name = "mdit-py-plugins", marker = "python_full_version >= '3.10'" }, @@ -872,50 +673,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/b4/b036f8fdb667587bb37df29dc6644681dd78b7a2a6321a34684b79412b28/myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d", size = 84563 }, ] -[[package]] -name = "numpy" -version = "1.24.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140 }, - { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297 }, - { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611 }, - { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357 }, - { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222 }, - { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514 }, - { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508 }, - { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033 }, - { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951 }, - { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923 }, - { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446 }, - { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466 }, - { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722 }, - { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102 }, - { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616 }, - { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263 }, - { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660 }, - { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112 }, - { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549 }, - { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950 }, - { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228 }, - { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170 }, - { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918 }, - { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441 }, - { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590 }, - { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744 }, - { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290 }, -] - [[package]] name = "numpy" version = "2.0.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 } wheels = [ @@ -1041,63 +804,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, ] -[[package]] -name = "pandas" -version = "2.0.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "python-dateutil", marker = "python_full_version < '3.9'" }, - { name = "pytz", marker = "python_full_version < '3.9'" }, - { name = "tzdata", marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908 }, - { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486 }, - { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897 }, - { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421 }, - { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792 }, - { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333 }, - { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672 }, - { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229 }, - { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591 }, - { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370 }, - { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935 }, - { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692 }, - { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303 }, - { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932 }, - { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018 }, - { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723 }, - { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403 }, - { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638 }, - { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160 }, - { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752 }, - { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852 }, - { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496 }, - { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766 }, - { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902 }, -] - [[package]] name = "pandas" version = "2.2.3" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] dependencies = [ - { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "python-dateutil", marker = "python_full_version >= '3.9'" }, - { name = "pytz", marker = "python_full_version >= '3.9'" }, - { name = "tzdata", marker = "python_full_version >= '3.9'" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } wheels = [ @@ -1213,65 +929,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, ] -[[package]] -name = "pyarrow" -version = "17.0.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/27/4e/ea6d43f324169f8aec0e57569443a38bab4b398d09769ca64f7b4d467de3/pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", size = 1112479 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/5d/78d4b040bc5ff2fc6c3d03e80fca396b742f6c125b8af06bcf7427f931bc/pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", size = 28994846 }, - { url = "https://files.pythonhosted.org/packages/3b/73/8ed168db7642e91180330e4ea9f3ff8bab404678f00d32d7df0871a4933b/pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", size = 27165908 }, - { url = "https://files.pythonhosted.org/packages/81/36/e78c24be99242063f6d0590ef68c857ea07bdea470242c361e9a15bd57a4/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", size = 39264209 }, - { url = "https://files.pythonhosted.org/packages/18/4c/3db637d7578f683b0a8fb8999b436bdbedd6e3517bd4f90c70853cf3ad20/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", size = 39862883 }, - { url = "https://files.pythonhosted.org/packages/81/3c/0580626896c842614a523e66b351181ed5bb14e5dfc263cd68cea2c46d90/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8", size = 38723009 }, - { url = "https://files.pythonhosted.org/packages/ee/fb/c1b47f0ada36d856a352da261a44d7344d8f22e2f7db3945f8c3b81be5dd/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", size = 39855626 }, - { url = "https://files.pythonhosted.org/packages/19/09/b0a02908180a25d57312ab5919069c39fddf30602568980419f4b02393f6/pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", size = 25147242 }, - { url = "https://files.pythonhosted.org/packages/f9/46/ce89f87c2936f5bb9d879473b9663ce7a4b1f4359acc2f0eb39865eaa1af/pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", size = 29028748 }, - { url = "https://files.pythonhosted.org/packages/8d/8e/ce2e9b2146de422f6638333c01903140e9ada244a2a477918a368306c64c/pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", size = 27190965 }, - { url = "https://files.pythonhosted.org/packages/3b/c8/5675719570eb1acd809481c6d64e2136ffb340bc387f4ca62dce79516cea/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", size = 39269081 }, - { url = "https://files.pythonhosted.org/packages/5e/78/3931194f16ab681ebb87ad252e7b8d2c8b23dad49706cadc865dff4a1dd3/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", size = 39864921 }, - { url = "https://files.pythonhosted.org/packages/d8/81/69b6606093363f55a2a574c018901c40952d4e902e670656d18213c71ad7/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", size = 38740798 }, - { url = "https://files.pythonhosted.org/packages/4c/21/9ca93b84b92ef927814cb7ba37f0774a484c849d58f0b692b16af8eebcfb/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", size = 39871877 }, - { url = "https://files.pythonhosted.org/packages/30/d1/63a7c248432c71c7d3ee803e706590a0b81ce1a8d2b2ae49677774b813bb/pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", size = 25151089 }, - { url = "https://files.pythonhosted.org/packages/d4/62/ce6ac1275a432b4a27c55fe96c58147f111d8ba1ad800a112d31859fae2f/pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", size = 29019418 }, - { url = "https://files.pythonhosted.org/packages/8e/0a/dbd0c134e7a0c30bea439675cc120012337202e5fac7163ba839aa3691d2/pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", size = 27152197 }, - { url = "https://files.pythonhosted.org/packages/cb/05/3f4a16498349db79090767620d6dc23c1ec0c658a668d61d76b87706c65d/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", size = 39263026 }, - { url = "https://files.pythonhosted.org/packages/c2/0c/ea2107236740be8fa0e0d4a293a095c9f43546a2465bb7df34eee9126b09/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", size = 39880798 }, - { url = "https://files.pythonhosted.org/packages/f6/b0/b9164a8bc495083c10c281cc65064553ec87b7537d6f742a89d5953a2a3e/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", size = 38715172 }, - { url = "https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", size = 39874508 }, - { url = "https://files.pythonhosted.org/packages/ae/49/baafe2a964f663413be3bd1cf5c45ed98c5e42e804e2328e18f4570027c1/pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", size = 25099235 }, - { url = "https://files.pythonhosted.org/packages/8d/bd/8f52c1d7b430260f80a349cffa2df351750a737b5336313d56dcadeb9ae1/pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", size = 28999345 }, - { url = "https://files.pythonhosted.org/packages/64/d9/51e35550f2f18b8815a2ab25948f735434db32000c0e91eba3a32634782a/pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", size = 27168441 }, - { url = "https://files.pythonhosted.org/packages/18/d8/7161d87d07ea51be70c49f615004c1446d5723622a18b2681f7e4b71bf6e/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", size = 39363163 }, - { url = "https://files.pythonhosted.org/packages/3f/08/bc497130789833de09e345e3ce4647e3ce86517c4f70f2144f0367ca378b/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", size = 39965253 }, - { url = "https://files.pythonhosted.org/packages/d3/2e/493dd7db889402b4c7871ca7dfdd20f2c5deedbff802d3eb8576359930f9/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", size = 38805378 }, - { url = "https://files.pythonhosted.org/packages/e6/c1/4c6bcdf7a820034aa91a8b4d25fef38809be79b42ca7aaa16d4680b0bbac/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", size = 39958364 }, - { url = "https://files.pythonhosted.org/packages/d1/db/42ac644453cfdfc60fe002b46d647fe7a6dfad753ef7b28e99b4c936ad5d/pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", size = 25229211 }, - { url = "https://files.pythonhosted.org/packages/43/e0/a898096d35be240aa61fb2d54db58b86d664b10e1e51256f9300f47565e8/pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", size = 29007881 }, - { url = "https://files.pythonhosted.org/packages/59/22/f7d14907ed0697b5dd488d393129f2738629fa5bcba863e00931b7975946/pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", size = 27178117 }, - { url = "https://files.pythonhosted.org/packages/bf/ee/661211feac0ed48467b1d5c57298c91403809ec3ab78b1d175e1d6ad03cf/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", size = 39273896 }, - { url = "https://files.pythonhosted.org/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", size = 39875438 }, - { url = "https://files.pythonhosted.org/packages/75/63/29d1bfcc57af73cde3fc3baccab2f37548de512dbe0ab294b033cd203516/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", size = 38735092 }, - { url = "https://files.pythonhosted.org/packages/39/f4/90258b4de753df7cc61cefb0312f8abcf226672e96cc64996e66afce817a/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", size = 39867610 }, - { url = "https://files.pythonhosted.org/packages/e7/f6/b75d4816c32f1618ed31a005ee635dd1d91d8164495d94f2ea092f594661/pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", size = 25148611 }, -] - [[package]] name = "pyarrow" version = "18.1.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } wheels = [ { url = "https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size = 29531620 }, @@ -1332,10 +993,8 @@ version = "0.8.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "beautifulsoup4" }, - { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "docutils" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fc/d6/3921de802cf1ee771f0e76c9068b52498aeb8eeec6b830ff931c81c7ecf3/pydata_sphinx_theme-0.8.0.tar.gz", hash = "sha256:9f72015d9c572ea92e3007ab221a8325767c426783b6b9941813e65fa988dc90", size = 1123746 } @@ -1349,13 +1008,11 @@ version = "2.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "deprecated" }, - { name = "pyjwt", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version < '3.9'" }, - { name = "pyjwt", version = "2.10.1", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version >= '3.9'" }, + { name = "pyjwt", extra = ["crypto"] }, { name = "pynacl" }, { name = "requests" }, { name = "typing-extensions" }, - { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "urllib3" }, ] sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 } wheels = [ @@ -1371,33 +1028,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] -[[package]] -name = "pyjwt" -version = "2.9.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/fb/68/ce067f09fca4abeca8771fe667d89cc347d1e99da3e093112ac329c6020e/pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c", size = 78825 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/84/0fdf9b18ba31d69877bd39c9cd6052b47f3761e9910c15de788e519f079f/PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850", size = 22344 }, -] - -[package.optional-dependencies] -crypto = [ - { name = "cryptography", marker = "python_full_version < '3.9'" }, -] - [[package]] name = "pyjwt" version = "2.10.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } wheels = [ { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, @@ -1405,7 +1039,7 @@ wheels = [ [package.optional-dependencies] crypto = [ - { name = "cryptography", marker = "python_full_version >= '3.9'" }, + { name = "cryptography" }, ] [[package]] @@ -1508,13 +1142,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, - { url = "https://files.pythonhosted.org/packages/74/d9/323a59d506f12f498c2097488d80d16f4cf965cee1791eab58b56b19f47a/PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", size = 183218 }, - { url = "https://files.pythonhosted.org/packages/74/cc/20c34d00f04d785f2028737e2e2a8254e1425102e730fee1d6396f832577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", size = 728067 }, - { url = "https://files.pythonhosted.org/packages/20/52/551c69ca1501d21c0de51ddafa8c23a0191ef296ff098e98358f69080577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", size = 757812 }, - { url = "https://files.pythonhosted.org/packages/fd/7f/2c3697bba5d4aa5cc2afe81826d73dfae5f049458e44732c7a0938baa673/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", size = 746531 }, - { url = "https://files.pythonhosted.org/packages/8c/ab/6226d3df99900e580091bb44258fde77a8433511a86883bd4681ea19a858/PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", size = 800820 }, - { url = "https://files.pythonhosted.org/packages/a0/99/a9eb0f3e710c06c5d922026f6736e920d431812ace24aae38228d0d64b04/PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", size = 145514 }, - { url = "https://files.pythonhosted.org/packages/75/8a/ee831ad5fafa4431099aa4e078d4c8efd43cd5e48fbc774641d233b683a9/PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", size = 162702 }, { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777 }, { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318 }, { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 }, @@ -1534,8 +1161,7 @@ dependencies = [ { name = "certifi" }, { name = "charset-normalizer" }, { name = "idna" }, - { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "urllib3" }, ] sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } wheels = [ @@ -1567,28 +1193,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/94/0498cdb7316ed67a1928300dd87d659c933479f44dec51b4f62bfd1f8028/ruff-0.9.1-py3-none-win_arm64.whl", hash = "sha256:1cd76c7f9c679e6e8f2af8f778367dca82b95009bc7b1a85a47f1521ae524fa7", size = 9145708 }, ] -[[package]] -name = "setuptools" -version = "75.3.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 }, -] - [[package]] name = "setuptools" version = "75.8.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 } wheels = [ { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 }, @@ -1621,63 +1229,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, ] -[[package]] -name = "sphinx" -version = "7.1.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "alabaster", version = "0.7.13", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "babel", marker = "python_full_version < '3.9'" }, - { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "imagesize", marker = "python_full_version < '3.9'" }, - { name = "importlib-metadata", marker = "python_full_version < '3.9'" }, - { name = "jinja2", marker = "python_full_version < '3.9'" }, - { name = "packaging", marker = "python_full_version < '3.9'" }, - { name = "pygments", marker = "python_full_version < '3.9'" }, - { name = "requests", marker = "python_full_version < '3.9'" }, - { name = "snowballstemmer", marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-applehelp", version = "1.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-devhelp", version = "1.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-htmlhelp", version = "2.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-qthelp", version = "1.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-serializinghtml", version = "1.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/dc/01/688bdf9282241dca09fe6e3a1110eda399fa9b10d0672db609e37c2e7a39/sphinx-7.1.2.tar.gz", hash = "sha256:780f4d32f1d7d1126576e0e5ecc19dc32ab76cd24e950228dcf7b1f6d3d9e22f", size = 6828258 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/17/325cf6a257d84751a48ae90752b3d8fe0be8f9535b6253add61c49d0d9bc/sphinx-7.1.2-py3-none-any.whl", hash = "sha256:d170a81825b2fcacb6dfd5a0d7f578a053e45d3f2b153fecc948c37344eb4cbe", size = 3169543 }, -] - [[package]] name = "sphinx" version = "7.4.7" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] dependencies = [ - { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "babel", marker = "python_full_version == '3.9.*'" }, - { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "imagesize", marker = "python_full_version == '3.9.*'" }, - { name = "importlib-metadata", marker = "python_full_version == '3.9.*'" }, - { name = "jinja2", marker = "python_full_version == '3.9.*'" }, - { name = "packaging", marker = "python_full_version == '3.9.*'" }, - { name = "pygments", marker = "python_full_version == '3.9.*'" }, - { name = "requests", marker = "python_full_version == '3.9.*'" }, - { name = "snowballstemmer", marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "tomli", marker = "python_full_version == '3.9.*'" }, + { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "babel", marker = "python_full_version < '3.10'" }, + { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, + { name = "docutils", marker = "python_full_version < '3.10'" }, + { name = "imagesize", marker = "python_full_version < '3.10'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, + { name = "jinja2", marker = "python_full_version < '3.10'" }, + { name = "packaging", marker = "python_full_version < '3.10'" }, + { name = "pygments", marker = "python_full_version < '3.10'" }, + { name = "requests", marker = "python_full_version < '3.10'" }, + { name = "snowballstemmer", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.10'" }, + { name = "tomli", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911 } wheels = [ @@ -1697,19 +1274,19 @@ dependencies = [ { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "babel", marker = "python_full_version >= '3.10'" }, { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "docutils", marker = "python_full_version >= '3.10'" }, { name = "imagesize", marker = "python_full_version >= '3.10'" }, { name = "jinja2", marker = "python_full_version >= '3.10'" }, { name = "packaging", marker = "python_full_version >= '3.10'" }, { name = "pygments", marker = "python_full_version >= '3.10'" }, { name = "requests", marker = "python_full_version >= '3.10'" }, { name = "snowballstemmer", marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.10'" }, { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.10'" }, { name = "tomli", marker = "python_full_version == '3.10.*'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 } @@ -1722,97 +1299,40 @@ name = "sphinx-autoapi" version = "3.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "astroid", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "astroid", version = "3.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "astroid" }, { name = "jinja2" }, { name = "pyyaml" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "stdlib-list", version = "0.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "stdlib-list", version = "0.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "stdlib-list", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4a/eb/cc243583bb1d518ca3b10998c203d919a8ed90affd4831f2b61ad09043d2/sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c", size = 29292 } wheels = [ { url = "https://files.pythonhosted.org/packages/de/d6/f2acdc2567337fd5f5dc091a4e58d8a0fb14927b9779fc1e5ecee96d9824/sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92", size = 34095 }, ] -[[package]] -name = "sphinxcontrib-applehelp" -version = "1.0.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/32/df/45e827f4d7e7fcc84e853bcef1d836effd762d63ccb86f43ede4e98b478c/sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e", size = 24766 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228", size = 120601 }, -] - [[package]] name = "sphinxcontrib-applehelp" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 } wheels = [ { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 }, ] -[[package]] -name = "sphinxcontrib-devhelp" -version = "1.0.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/98/33/dc28393f16385f722c893cb55539c641c9aaec8d1bc1c15b69ce0ac2dbb3/sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4", size = 17398 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", size = 84690 }, -] - [[package]] name = "sphinxcontrib-devhelp" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 } wheels = [ { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 }, ] -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.0.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/47/64cff68ea3aa450c373301e5bebfbb9fce0a3e70aca245fcadd4af06cd75/sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff", size = 27967 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903", size = 99833 }, -] - [[package]] name = "sphinxcontrib-htmlhelp" version = "2.1.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 } wheels = [ { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, @@ -1827,55 +1347,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 }, ] -[[package]] -name = "sphinxcontrib-qthelp" -version = "1.0.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/8e/c4846e59f38a5f2b4a0e3b27af38f2fcf904d4bfd82095bf92de0b114ebd/sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", size = 21658 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6", size = 90609 }, -] - [[package]] name = "sphinxcontrib-qthelp" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 } wheels = [ { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 }, ] -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "1.1.5" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b5/72/835d6fadb9e5d02304cf39b18f93d227cd93abd3c41ebf58e6853eeb1455/sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952", size = 21019 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", size = 94021 }, -] - [[package]] name = "sphinxcontrib-serializinghtml" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 } wheels = [ { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 }, @@ -1895,25 +1379,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, ] -[[package]] -name = "stdlib-list" -version = "0.10.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/39/bb/1cdbc326a5ab0026602e0489cbf02357e78140253c4b57cd866d380eb355/stdlib_list-0.10.0.tar.gz", hash = "sha256:6519c50d645513ed287657bfe856d527f277331540691ddeaf77b25459964a14", size = 59447 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/d9/9085375f0d23a4896b307bf14dcc61b49ec8cc67cb33e06cf95bf3af3966/stdlib_list-0.10.0-py3-none-any.whl", hash = "sha256:b3a911bc441d03e0332dd1a9e7d0870ba3bb0a542a74d7524f54fb431256e214", size = 79814 }, -] - [[package]] name = "stdlib-list" version = "0.11.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/5d/04/6b37a71e92ddca16b190b7df62494ac4779d58ced4787f73584eb32c8f03/stdlib_list-0.11.0.tar.gz", hash = "sha256:b74a7b643a77a12637e907f3f62f0ab9f67300bce4014f6b2d3c8b4c8fd63c66", size = 60335 } wheels = [ { url = "https://files.pythonhosted.org/packages/16/fe/e07300c027a868d32d8ed7a425503401e91a03ff90e7ca525c115c634ffb/stdlib_list-0.11.0-py3-none-any.whl", hash = "sha256:8bf8decfffaaf273d4cfeb5bd852b910a00dec1037dcf163576803622bccf597", size = 83617 }, @@ -1994,28 +1463,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, ] -[[package]] -name = "urllib3" -version = "2.2.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, -] - [[package]] name = "urllib3" version = "2.3.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } wheels = [ { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, @@ -2091,17 +1542,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, - { url = "https://files.pythonhosted.org/packages/0c/66/95b9e90e6e1274999b183c9c3f984996d870e933ca9560115bd1cd1d6f77/wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9", size = 53234 }, - { url = "https://files.pythonhosted.org/packages/a4/b6/6eced5e2db5924bf6d9223d2bb96b62e00395aae77058e6a9e11bf16b3bd/wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119", size = 38462 }, - { url = "https://files.pythonhosted.org/packages/5d/a4/c8472fe2568978b5532df84273c53ddf713f689d408a4335717ab89547e0/wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6", size = 38730 }, - { url = "https://files.pythonhosted.org/packages/3c/70/1d259c6b1ad164eb23ff70e3e452dd1950f96e6473f72b7207891d0fd1f0/wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9", size = 86225 }, - { url = "https://files.pythonhosted.org/packages/a9/68/6b83367e1afb8de91cbea4ef8e85b58acdf62f034f05d78c7b82afaa23d8/wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a", size = 78055 }, - { url = "https://files.pythonhosted.org/packages/0d/21/09573d2443916705c57fdab85d508f592c0a58d57becc53e15755d67fba2/wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2", size = 85592 }, - { url = "https://files.pythonhosted.org/packages/45/ce/700e17a852dd5dec894e241c72973ea82363486bcc1fb05d47b4fbd1d683/wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a", size = 83906 }, - { url = "https://files.pythonhosted.org/packages/37/14/bd210faf0a66faeb8529d42b6b45a25d6aa6ce25ddfc19168e4161aed227/wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04", size = 76763 }, - { url = "https://files.pythonhosted.org/packages/34/0c/85af70d291f44659c422416f0272046109e785bf6db8c081cfeeae5715c5/wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f", size = 83573 }, - { url = "https://files.pythonhosted.org/packages/f8/1e/b215068e824878f69ea945804fa26c176f7c2735a3ad5367d78930bd076a/wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7", size = 36408 }, - { url = "https://files.pythonhosted.org/packages/52/27/3dd9ad5f1097b33c95d05929e409cc86d7c765cb5437b86694dc8f8e9af0/wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3", size = 38737 }, { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 }, { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 }, { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 }, @@ -2116,25 +1556,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, ] -[[package]] -name = "zipp" -version = "3.20.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/54/bf/5c0000c44ebc80123ecbdddba1f5dcd94a5ada602a9c225d84b5aaa55e86/zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29", size = 24199 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 }, -] - [[package]] name = "zipp" version = "3.21.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 } wheels = [ { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, From b194a8772e58ccefc697e11671113127a8038716 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 12 Mar 2025 14:25:32 -0400 Subject: [PATCH 056/248] feat/improve ruff test coverage (#1055) * Run python tests on all currently supported python versions * Update ruff checks to select all * Ruff auto fix * Applying ruff suggestions * noqa rules updates per ruff checks * Working through more ruff suggestions * Working through more ruff suggestions * update timestamps on tests * More ruff updates * More ruff updates * Instead of importing udf static functions as variables, import * More ruff formatting suggestions * more ruff formatting suggestions * More ruff formatting * More ruff formatting * Cut off lint errors for this PR * Working through more ruff checks and disabling a bunch for now * Address CI difference from local ruff * UDWF isn't a proper abstract base class right now since users can opt in to all methods * Update pre-commit to match the version of ruff used in CI * To enable testing in python 3.9 we need numpy. Also going to the current minimal supported version * Update min requried version of python to 3.9 in pyproject.toml. The other changes will come in #1043 that is soon to be merged. * Suppress UP035 * ruff format --- .github/workflows/test.yaml | 2 + .pre-commit-config.yaml | 2 +- benchmarks/tpch/tpch.py | 14 +- dev/release/check-rat-report.py | 2 +- dev/release/generate-changelog.py | 10 +- docs/source/conf.py | 2 +- examples/python-udwf.py | 2 +- examples/tpch/_tests.py | 15 +- pyproject.toml | 76 +++++- python/datafusion/__init__.py | 50 ++-- python/datafusion/common.py | 14 +- python/datafusion/context.py | 4 +- python/datafusion/dataframe.py | 15 +- python/datafusion/expr.py | 94 +++---- python/datafusion/functions.py | 46 ++-- python/datafusion/input/__init__.py | 2 +- python/datafusion/input/base.py | 6 +- python/datafusion/input/location.py | 40 +-- python/datafusion/io.py | 20 +- python/datafusion/object_store.py | 2 +- python/datafusion/plan.py | 8 +- python/datafusion/record_batch.py | 8 +- python/datafusion/substrait.py | 21 +- python/datafusion/udf.py | 236 +++++++++-------- python/tests/generic.py | 19 +- python/tests/test_aggregation.py | 16 +- python/tests/test_catalog.py | 9 +- python/tests/test_context.py | 53 ++-- python/tests/test_dataframe.py | 38 ++- python/tests/test_expr.py | 11 +- python/tests/test_functions.py | 358 ++++++++++++++------------ python/tests/test_imports.py | 7 +- python/tests/test_input.py | 12 +- python/tests/test_io.py | 13 +- python/tests/test_sql.py | 35 +-- python/tests/test_store.py | 13 +- python/tests/test_substrait.py | 2 +- python/tests/test_udaf.py | 10 +- python/tests/test_udwf.py | 2 +- python/tests/test_wrapper_coverage.py | 7 +- 40 files changed, 697 insertions(+), 599 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c1d9ac838..da3582766 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -33,9 +33,11 @@ jobs: fail-fast: false matrix: python-version: + - "3.9" - "3.10" - "3.11" - "3.12" + - "3.13" toolchain: - "stable" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b548ff18f..abcfcf321 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: actionlint-docker - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.3.0 + rev: v0.9.10 hooks: # Run the linter. - id: ruff diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index fb86b12b6..bfb9ac398 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -59,13 +59,13 @@ def bench(data_path, query_path): end = time.time() time_millis = (end - start) * 1000 total_time_millis += time_millis - print("setup,{}".format(round(time_millis, 1))) - results.write("setup,{}\n".format(round(time_millis, 1))) + print(f"setup,{round(time_millis, 1)}") + results.write(f"setup,{round(time_millis, 1)}\n") results.flush() # run queries for query in range(1, 23): - with open("{}/q{}.sql".format(query_path, query)) as f: + with open(f"{query_path}/q{query}.sql") as f: text = f.read() tmp = text.split(";") queries = [] @@ -83,14 +83,14 @@ def bench(data_path, query_path): end = time.time() time_millis = (end - start) * 1000 total_time_millis += time_millis - print("q{},{}".format(query, round(time_millis, 1))) - results.write("q{},{}\n".format(query, round(time_millis, 1))) + print(f"q{query},{round(time_millis, 1)}") + results.write(f"q{query},{round(time_millis, 1)}\n") results.flush() except Exception as e: print("query", query, "failed", e) - print("total,{}".format(round(total_time_millis, 1))) - results.write("total,{}\n".format(round(total_time_millis, 1))) + print(f"total,{round(total_time_millis, 1)}") + results.write(f"total,{round(total_time_millis, 1)}\n") if __name__ == "__main__": diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py index d3dd7c5dd..0c9f4c326 100644 --- a/dev/release/check-rat-report.py +++ b/dev/release/check-rat-report.py @@ -29,7 +29,7 @@ exclude_globs_filename = sys.argv[1] xml_filename = sys.argv[2] -globs = [line.strip() for line in open(exclude_globs_filename, "r")] +globs = [line.strip() for line in open(exclude_globs_filename)] tree = ET.parse(xml_filename) root = tree.getroot() diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 2564eea86..e30e2def2 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -26,15 +26,11 @@ def print_pulls(repo_name, title, pulls): if len(pulls) > 0: - print("**{}:**".format(title)) + print(f"**{title}:**") print() for pull, commit in pulls: - url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) - print( - "- {} [#{}]({}) ({})".format( - pull.title, pull.number, url, commit.author.login - ) - ) + url = f"https://github.com/{repo_name}/pull/{pull.number}" + print(f"- {pull.title} [#{pull.number}]({url}) ({commit.author.login})") print() diff --git a/docs/source/conf.py b/docs/source/conf.py index 2e5a41339..c82a189e0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,7 +73,7 @@ autoapi_python_class_content = "both" -def autoapi_skip_member_fn(app, what, name, obj, skip, options): +def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001 skip_contents = [ # Re-exports ("class", "datafusion.DataFrame"), diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 7d39dc1b8..98d118bf2 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -59,7 +59,7 @@ def __init__(self, alpha: float) -> None: def supports_bounded_execution(self) -> bool: return True - def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: + def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002 # Override the default range of current row since uses_window_frame is False # So for the purpose of this test we just smooth from the previous row to # current. diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index c4d872085..2be4dfabd 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -27,28 +27,25 @@ def df_selection(col_name, col_type): if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type): return F.round(col(col_name), lit(2)).alias(col_name) - elif col_type == pa.string() or col_type == pa.string_view(): + if col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) - else: - return col(col_name) + return col(col_name) def load_schema(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return col_name, pa.string() - elif isinstance(col_type, pa.Decimal128Type): + if isinstance(col_type, pa.Decimal128Type): return col_name, pa.float64() - else: - return col_name, col_type + return col_name, col_type def expected_selection(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return F.trim(col(col_name)).cast(col_type).alias(col_name) - elif col_type == pa.string() or col_type == pa.string_view(): + if col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) - else: - return col(col_name) + return col(col_name) def selections_and_schema(original_schema): diff --git a/pyproject.toml b/pyproject.toml index 1c2733677..060e3b80a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,57 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"] +select = ["ALL" ] +ignore = [ + "A001", # Allow using words like min as variable names + "A002", # Allow using words like filter as variable names + "ANN401", # Allow Any for wrapper classes + "COM812", # Recommended to ignore these rules when using with ruff-format + "FIX002", # Allow TODO lines - consider removing at some point + "FBT001", # Allow boolean positional args + "FBT002", # Allow boolean positional args + "ISC001", # Recommended to ignore these rules when using with ruff-format + "SLF001", # Allow accessing private members + "TD002", + "TD003", # Allow TODO lines + "UP007", # Disallowing Union is pedantic + # TODO: Enable all of the following, but this PR is getting too large already + "PT001", + "ANN204", + "B008", + "EM101", + "PLR0913", + "PLR1714", + "ANN201", + "C400", + "TRY003", + "B904", + "UP006", + "RUF012", + "FBT003", + "C416", + "SIM102", + "PGH003", + "PLR2004", + "PERF401", + "PD901", + "EM102", + "ERA001", + "SIM108", + "ICN001", + "ANN001", + "ANN202", + "PTH", + "N812", + "INP001", + "DTZ007", + "PLW2901", + "RET503", + "RUF015", + "A005", + "TC001", + "UP035", +] [tool.ruff.lint.pydocstyle] convention = "google" @@ -75,16 +125,30 @@ max-doc-length = 88 # Disable docstring checking for these directories [tool.ruff.lint.per-file-ignores] -"python/tests/*" = ["D"] -"examples/*" = ["D", "W505"] -"dev/*" = ["D"] -"benchmarks/*" = ["D", "F"] +"python/tests/*" = [ + "ANN", + "ARG", + "BLE001", + "D", + "S101", + "SLF", + "PD", + "PLR2004", + "PT011", + "RUF015", + "S608", + "PLR0913", + "PT004", +] +"examples/*" = ["D", "W505", "E501", "T201", "S101"] +"dev/*" = ["D", "E", "T", "S", "PLR", "C", "SIM", "UP", "EXE", "N817"] +"benchmarks/*" = ["D", "F", "T", "BLE", "FURB", "PLR", "E", "TD", "TRY", "S", "SIM", "EXE", "UP"] "docs/*" = ["D"] [dependency-groups] dev = [ "maturin>=1.8.1", - "numpy>1.24.4 ; python_full_version >= '3.10'", + "numpy>1.25.0", "pytest>=7.4.4", "ruff>=0.9.1", "toml>=0.10.2", diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index f11ce54a6..286e5dc31 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -48,44 +48,47 @@ from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream -from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF +from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf __version__ = importlib_metadata.version(__name__) __all__ = [ "Accumulator", + "AggregateUDF", + "Catalog", "Config", - "DataFrame", - "SessionContext", - "SessionConfig", - "SQLOptions", - "RuntimeEnvBuilder", - "Expr", - "ScalarUDF", - "WindowFrame", - "column", - "col", - "literal", - "lit", "DFSchema", - "Catalog", + "DataFrame", "Database", - "Table", - "AggregateUDF", - "WindowUDF", - "LogicalPlan", "ExecutionPlan", + "Expr", + "LogicalPlan", "RecordBatch", "RecordBatchStream", + "RuntimeEnvBuilder", + "SQLOptions", + "ScalarUDF", + "SessionConfig", + "SessionContext", + "Table", + "WindowFrame", + "WindowUDF", + "col", + "column", "common", "expr", "functions", + "lit", + "literal", "object_store", - "substrait", - "read_parquet", "read_avro", "read_csv", "read_json", + "read_parquet", + "substrait", + "udaf", + "udf", + "udwf", ] @@ -120,10 +123,3 @@ def str_lit(value): def lit(value): """Create a literal expression.""" return Expr.literal(value) - - -udf = ScalarUDF.udf - -udaf = AggregateUDF.udaf - -udwf = WindowUDF.udwf diff --git a/python/datafusion/common.py b/python/datafusion/common.py index a2298c634..e762a993b 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -20,7 +20,7 @@ from ._internal import common as common_internal -# TODO these should all have proper wrapper classes +# TODO: these should all have proper wrapper classes DFSchema = common_internal.DFSchema DataType = common_internal.DataType @@ -38,15 +38,15 @@ "DFSchema", "DataType", "DataTypeMap", - "RexType", - "PythonType", - "SqlType", "NullTreatment", - "SqlTable", + "PythonType", + "RexType", + "SqlFunction", "SqlSchema", - "SqlView", "SqlStatistics", - "SqlFunction", + "SqlTable", + "SqlType", + "SqlView", ] diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 282b2a477..0ab1a908a 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -393,8 +393,6 @@ def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder: class RuntimeConfig(RuntimeEnvBuilder): """See `RuntimeEnvBuilder`.""" - pass - class SQLOptions: """Options to be used when performing SQL queries.""" @@ -498,7 +496,7 @@ def __init__( self.ctx = SessionContextInternal(config, runtime) - def enable_url_table(self) -> "SessionContext": + def enable_url_table(self) -> SessionContext: """Control if local files can be queried as tables. Returns: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index de5d8376e..d1c71c2bb 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -29,6 +29,7 @@ List, Literal, Optional, + Type, Union, overload, ) @@ -49,10 +50,11 @@ import polars as pl import pyarrow as pa + from datafusion._internal import DataFrame as DataFrameInternal + from datafusion._internal import expr as expr_internal + from enum import Enum -from datafusion._internal import DataFrame as DataFrameInternal -from datafusion._internal import expr as expr_internal from datafusion.expr import Expr, SortExpr, sort_or_default @@ -73,7 +75,7 @@ class Compression(Enum): LZ4_RAW = "lz4_raw" @classmethod - def from_str(cls, value: str) -> "Compression": + def from_str(cls: Type[Compression], value: str) -> Compression: """Convert a string to a Compression enum value. Args: @@ -88,8 +90,9 @@ def from_str(cls, value: str) -> "Compression": try: return cls(value.lower()) except ValueError: + valid_values = str([item.value for item in Compression]) raise ValueError( - f"{value} is not a valid Compression. Valid values are: {[item.value for item in Compression]}" + f"{value} is not a valid Compression. Valid values are: {valid_values}" ) def get_default_level(self) -> Optional[int]: @@ -104,9 +107,9 @@ def get_default_level(self) -> Optional[int]: # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223 if self == Compression.GZIP: return 6 - elif self == Compression.BROTLI: + if self == Compression.BROTLI: return 1 - elif self == Compression.ZSTD: + if self == Compression.ZSTD: return 4 return None diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 3639abec6..702f75aed 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -101,63 +101,63 @@ WindowExpr = expr_internal.WindowExpr __all__ = [ - "Expr", - "Column", - "Literal", - "BinaryExpr", - "Literal", + "Aggregate", "AggregateFunction", - "Not", - "IsNotNull", - "IsNull", - "IsTrue", - "IsFalse", - "IsUnknown", - "IsNotTrue", - "IsNotFalse", - "IsNotUnknown", - "Negative", - "Like", - "ILike", - "SimilarTo", - "ScalarVariable", "Alias", - "InList", - "Exists", - "Subquery", - "InSubquery", - "ScalarSubquery", - "Placeholder", - "GroupingSet", + "Analyze", + "Between", + "BinaryExpr", "Case", "CaseBuilder", "Cast", - "TryCast", - "Between", + "Column", + "CreateMemoryTable", + "CreateView", + "Distinct", + "DropTable", + "EmptyRelation", + "Exists", "Explain", + "Expr", + "Extension", + "Filter", + "GroupingSet", + "ILike", + "InList", + "InSubquery", + "IsFalse", + "IsNotFalse", + "IsNotNull", + "IsNotTrue", + "IsNotUnknown", + "IsNull", + "IsTrue", + "IsUnknown", + "Join", + "JoinConstraint", + "JoinType", + "Like", "Limit", - "Aggregate", + "Literal", + "Literal", + "Negative", + "Not", + "Partitioning", + "Placeholder", + "Projection", + "Repartition", + "ScalarSubquery", + "ScalarVariable", + "SimilarTo", "Sort", "SortExpr", - "Analyze", - "EmptyRelation", - "Join", - "JoinType", - "JoinConstraint", + "Subquery", + "SubqueryAlias", + "TableScan", + "TryCast", "Union", "Unnest", "UnnestExpr", - "Extension", - "Filter", - "Projection", - "TableScan", - "CreateMemoryTable", - "CreateView", - "Distinct", - "SubqueryAlias", - "DropTable", - "Partitioning", - "Repartition", "Window", "WindowExpr", "WindowFrame", @@ -311,7 +311,7 @@ def __getitem__(self, key: str | int) -> Expr: ) return Expr(self.expr.__getitem__(key)) - def __eq__(self, rhs: Any) -> Expr: + def __eq__(self, rhs: object) -> Expr: """Equal to. Accepts either an expression or any valid PyArrow scalar literal value. @@ -320,7 +320,7 @@ def __eq__(self, rhs: Any) -> Expr: rhs = Expr.literal(rhs) return Expr(self.expr.__eq__(rhs.expr)) - def __ne__(self, rhs: Any) -> Expr: + def __ne__(self, rhs: object) -> Expr: """Not equal to. Accepts either an expression or any valid PyArrow scalar literal value. diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index b449c4868..0cc7434cf 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -18,13 +18,12 @@ from __future__ import annotations -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional import pyarrow as pa from datafusion._internal import functions as f from datafusion.common import NullTreatment -from datafusion.context import SessionContext from datafusion.expr import ( CaseBuilder, Expr, @@ -34,6 +33,9 @@ sort_list_to_raw_sort_list, ) +if TYPE_CHECKING: + from datafusion.context import SessionContext + __all__ = [ "abs", "acos", @@ -81,8 +83,8 @@ "array_sort", "array_to_string", "array_union", - "arrow_typeof", "arrow_cast", + "arrow_typeof", "ascii", "asin", "asinh", @@ -97,6 +99,7 @@ "bool_and", "bool_or", "btrim", + "cardinality", "case", "cbrt", "ceil", @@ -116,6 +119,7 @@ "covar", "covar_pop", "covar_samp", + "cume_dist", "current_date", "current_time", "date_bin", @@ -125,17 +129,17 @@ "datetrunc", "decode", "degrees", + "dense_rank", "digest", "empty", "encode", "ends_with", - "extract", "exp", + "extract", "factorial", "find_in_set", "first_value", "flatten", - "cardinality", "floor", "from_unixtime", "gcd", @@ -143,8 +147,10 @@ "initcap", "isnan", "iszero", + "lag", "last_value", "lcm", + "lead", "left", "length", "levenshtein", @@ -166,10 +172,10 @@ "list_prepend", "list_push_back", "list_push_front", - "list_repeat", "list_remove", "list_remove_all", "list_remove_n", + "list_repeat", "list_replace", "list_replace_all", "list_replace_n", @@ -180,14 +186,14 @@ "list_union", "ln", "log", - "log10", "log2", + "log10", "lower", "lpad", "ltrim", "make_array", - "make_list", "make_date", + "make_list", "max", "md5", "mean", @@ -195,19 +201,22 @@ "min", "named_struct", "nanvl", - "nvl", "now", "nth_value", + "ntile", "nullif", + "nvl", "octet_length", "order_by", "overlay", + "percent_rank", "pi", "pow", "power", "radians", "random", "range", + "rank", "regexp_like", "regexp_match", "regexp_replace", @@ -225,6 +234,7 @@ "reverse", "right", "round", + "row_number", "rpad", "rtrim", "sha224", @@ -252,8 +262,8 @@ "to_hex", "to_timestamp", "to_timestamp_micros", - "to_timestamp_nanos", "to_timestamp_millis", + "to_timestamp_nanos", "to_timestamp_seconds", "to_unixtime", "translate", @@ -268,14 +278,6 @@ "when", # Window Functions "window", - "lead", - "lag", - "row_number", - "rank", - "dense_rank", - "percent_rank", - "cume_dist", - "ntile", ] @@ -292,14 +294,14 @@ def nullif(expr1: Expr, expr2: Expr) -> Expr: return Expr(f.nullif(expr1.expr, expr2.expr)) -def encode(input: Expr, encoding: Expr) -> Expr: +def encode(expr: Expr, encoding: Expr) -> Expr: """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" - return Expr(f.encode(input.expr, encoding.expr)) + return Expr(f.encode(expr.expr, encoding.expr)) -def decode(input: Expr, encoding: Expr) -> Expr: +def decode(expr: Expr, encoding: Expr) -> Expr: """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" - return Expr(f.decode(input.expr, encoding.expr)) + return Expr(f.decode(expr.expr, encoding.expr)) def array_to_string(expr: Expr, delimiter: Expr) -> Expr: diff --git a/python/datafusion/input/__init__.py b/python/datafusion/input/__init__.py index f85ce21f0..f0c1f42b4 100644 --- a/python/datafusion/input/__init__.py +++ b/python/datafusion/input/__init__.py @@ -23,5 +23,5 @@ from .location import LocationInputPlugin __all__ = [ - LocationInputPlugin, + "LocationInputPlugin", ] diff --git a/python/datafusion/input/base.py b/python/datafusion/input/base.py index 4eba19784..f67dde2a1 100644 --- a/python/datafusion/input/base.py +++ b/python/datafusion/input/base.py @@ -38,11 +38,9 @@ class BaseInputSource(ABC): """ @abstractmethod - def is_correct_input(self, input_item: Any, table_name: str, **kwargs) -> bool: + def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool: """Returns `True` if the input is valid.""" - pass @abstractmethod - def build_table(self, input_item: Any, table_name: str, **kwarg) -> SqlTable: + def build_table(self, input_item: Any, table_name: str, **kwarg: Any) -> SqlTable: # type: ignore[invalid-type-form] """Create a table from the input source.""" - pass diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index 517cd1578..08d98d115 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -18,7 +18,7 @@ """The default input source for DataFusion.""" import glob -import os +from pathlib import Path from typing import Any from datafusion.common import DataTypeMap, SqlTable @@ -31,7 +31,7 @@ class LocationInputPlugin(BaseInputSource): This can be read in from a file (on disk, remote etc.). """ - def is_correct_input(self, input_item: Any, table_name: str, **kwargs): + def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool: # noqa: ARG002 """Returns `True` if the input is valid.""" return isinstance(input_item, str) @@ -39,27 +39,28 @@ def build_table( self, input_item: str, table_name: str, - **kwargs, - ) -> SqlTable: + **kwargs: Any, # noqa: ARG002 + ) -> SqlTable: # type: ignore[invalid-type-form] """Create a table from the input source.""" - _, extension = os.path.splitext(input_item) - format = extension.lstrip(".").lower() + extension = Path(input_item).suffix + file_format = extension.lstrip(".").lower() num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] - if format == "parquet": + if file_format == "parquet": import pyarrow.parquet as pq # Read the Parquet metadata metadata = pq.read_metadata(input_item) num_rows = metadata.num_rows # Iterate through the schema and build the SqlTable - for col in metadata.schema: - columns.append( - ( - col.name, - DataTypeMap.from_parquet_type_str(col.physical_type), - ) + columns = [ + ( + col.name, + DataTypeMap.from_parquet_type_str(col.physical_type), ) + for col in metadata.schema + ] + elif format == "csv": import csv @@ -69,19 +70,18 @@ def build_table( # to get that information. However, this should only be occurring # at table creation time and therefore shouldn't # slow down query performance. - with open(input_item, "r") as file: + with Path(input_item).open() as file: reader = csv.reader(file) - header_row = next(reader) - print(header_row) + _header_row = next(reader) for _ in reader: num_rows += 1 # TODO: Need to actually consume this row into reasonable columns - raise RuntimeError("TODO: Currently unable to support CSV input files.") + msg = "TODO: Currently unable to support CSV input files." + raise RuntimeError(msg) else: - raise RuntimeError( - f"Input of format: `{format}` is currently not supported.\ + msg = f"Input of format: `{format}` is currently not supported.\ Only Parquet and CSV." - ) + raise RuntimeError(msg) # Input could possibly be multiple files. Create a list if so input_files = glob.glob(input_item) diff --git a/python/datafusion/io.py b/python/datafusion/io.py index 3b6264948..3e39703e3 100644 --- a/python/datafusion/io.py +++ b/python/datafusion/io.py @@ -19,15 +19,19 @@ from __future__ import annotations -import pathlib - -import pyarrow +from typing import TYPE_CHECKING from datafusion.dataframe import DataFrame -from datafusion.expr import Expr from ._internal import SessionContext as SessionContextInternal +if TYPE_CHECKING: + import pathlib + + import pyarrow as pa + + from datafusion.expr import Expr + def read_parquet( path: str | pathlib.Path, @@ -35,7 +39,7 @@ def read_parquet( parquet_pruning: bool = True, file_extension: str = ".parquet", skip_metadata: bool = True, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[Expr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -79,7 +83,7 @@ def read_parquet( def read_json( path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = ".json", table_partition_cols: list[tuple[str, str]] | None = None, @@ -120,7 +124,7 @@ def read_json( def read_csv( path: str | pathlib.Path | list[str] | list[pathlib.Path], - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, has_header: bool = True, delimiter: str = ",", schema_infer_max_records: int = 1000, @@ -173,7 +177,7 @@ def read_csv( def read_avro( path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".avro", ) -> DataFrame: diff --git a/python/datafusion/object_store.py b/python/datafusion/object_store.py index 7cc17506f..6298526f5 100644 --- a/python/datafusion/object_store.py +++ b/python/datafusion/object_store.py @@ -24,4 +24,4 @@ MicrosoftAzure = object_store.MicrosoftAzure Http = object_store.Http -__all__ = ["AmazonS3", "GoogleCloud", "LocalFileSystem", "MicrosoftAzure", "Http"] +__all__ = ["AmazonS3", "GoogleCloud", "Http", "LocalFileSystem", "MicrosoftAzure"] diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index 133fc446d..0b7bebcb3 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -19,7 +19,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, List +from typing import TYPE_CHECKING, Any import datafusion._internal as df_internal @@ -27,8 +27,8 @@ from datafusion.context import SessionContext __all__ = [ - "LogicalPlan", "ExecutionPlan", + "LogicalPlan", ] @@ -54,7 +54,7 @@ def to_variant(self) -> Any: """Convert the logical plan into its specific variant.""" return self._raw_plan.to_variant() - def inputs(self) -> List[LogicalPlan]: + def inputs(self) -> list[LogicalPlan]: """Returns the list of inputs to the logical plan.""" return [LogicalPlan(p) for p in self._raw_plan.inputs()] @@ -106,7 +106,7 @@ def __init__(self, plan: df_internal.ExecutionPlan) -> None: """This constructor should not be called by the end user.""" self._raw_plan = plan - def children(self) -> List[ExecutionPlan]: + def children(self) -> list[ExecutionPlan]: """Get a list of children `ExecutionPlan` that act as inputs to this plan. The returned list will be empty for leaf nodes such as scans, will contain a diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py index 772cd9089..556eaa786 100644 --- a/python/datafusion/record_batch.py +++ b/python/datafusion/record_batch.py @@ -26,14 +26,14 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - import pyarrow + import pyarrow as pa import typing_extensions import datafusion._internal as df_internal class RecordBatch: - """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`.""" + """This class is essentially a wrapper for :py:class:`pa.RecordBatch`.""" def __init__(self, record_batch: df_internal.RecordBatch) -> None: """This constructor is generally not called by the end user. @@ -42,8 +42,8 @@ def __init__(self, record_batch: df_internal.RecordBatch) -> None: """ self.record_batch = record_batch - def to_pyarrow(self) -> pyarrow.RecordBatch: - """Convert to :py:class:`pyarrow.RecordBatch`.""" + def to_pyarrow(self) -> pa.RecordBatch: + """Convert to :py:class:`pa.RecordBatch`.""" return self.record_batch.to_pyarrow() diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index 06302fe38..f10adfb0c 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -23,7 +23,6 @@ from __future__ import annotations -import pathlib from typing import TYPE_CHECKING try: @@ -36,11 +35,13 @@ from ._internal import substrait as substrait_internal if TYPE_CHECKING: + import pathlib + from datafusion.context import SessionContext __all__ = [ - "Plan", "Consumer", + "Plan", "Producer", "Serde", ] @@ -68,11 +69,9 @@ def encode(self) -> bytes: @deprecated("Use `Plan` instead.") -class plan(Plan): +class plan(Plan): # noqa: N801 """See `Plan`.""" - pass - class Serde: """Provides the ``Substrait`` serialization and deserialization.""" @@ -140,11 +139,9 @@ def deserialize_bytes(proto_bytes: bytes) -> Plan: @deprecated("Use `Serde` instead.") -class serde(Serde): +class serde(Serde): # noqa: N801 """See `Serde` instead.""" - pass - class Producer: """Generates substrait plans from a logical plan.""" @@ -168,11 +165,9 @@ def to_substrait_plan(logical_plan: LogicalPlan, ctx: SessionContext) -> Plan: @deprecated("Use `Producer` instead.") -class producer(Producer): +class producer(Producer): # noqa: N801 """Use `Producer` instead.""" - pass - class Consumer: """Generates a logical plan from a substrait plan.""" @@ -194,7 +189,5 @@ def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: @deprecated("Use `Consumer` instead.") -class consumer(Consumer): +class consumer(Consumer): # noqa: N801 """Use `Consumer` instead.""" - - pass diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index af7bcf2ed..603b7063d 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -22,15 +22,15 @@ import functools from abc import ABCMeta, abstractmethod from enum import Enum -from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload -import pyarrow +import pyarrow as pa import datafusion._internal as df_internal from datafusion.expr import Expr if TYPE_CHECKING: - _R = TypeVar("_R", bound=pyarrow.DataType) + _R = TypeVar("_R", bound=pa.DataType) class Volatility(Enum): @@ -72,7 +72,7 @@ class Volatility(Enum): for each output row, resulting in a unique random value for each row. """ - def __str__(self): + def __str__(self) -> str: """Returns the string equivalent.""" return self.name.lower() @@ -88,7 +88,7 @@ def __init__( self, name: str, func: Callable[..., _R], - input_types: pyarrow.DataType | list[pyarrow.DataType], + input_types: pa.DataType | list[pa.DataType], return_type: _R, volatility: Volatility | str, ) -> None: @@ -96,7 +96,7 @@ def __init__( See helper method :py:func:`udf` for argument details. """ - if isinstance(input_types, pyarrow.DataType): + if isinstance(input_types, pa.DataType): input_types = [input_types] self._udf = df_internal.ScalarUDF( name, func, input_types, return_type, str(volatility) @@ -111,7 +111,27 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udf.__call__(*args_raw)) - class udf: + @overload + @staticmethod + def udf( + input_types: list[pa.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[..., ScalarUDF]: ... + + @overload + @staticmethod + def udf( + func: Callable[..., _R], + input_types: list[pa.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> ScalarUDF: ... + + @staticmethod + def udf(*args: Any, **kwargs: Any): # noqa: D417 """Create a new User-Defined Function (UDF). This class can be used both as a **function** and as a **decorator**. @@ -125,7 +145,7 @@ class udf: Args: func (Callable, optional): **Only needed when calling as a function.** Skip this argument when using `udf` as a decorator. - input_types (list[pyarrow.DataType]): The data types of the arguments + input_types (list[pa.DataType]): The data types of the arguments to `func`. This list must be of the same length as the number of arguments. return_type (_R): The data type of the return value from the function. @@ -141,40 +161,28 @@ class udf: ``` def double_func(x): return x * 2 - double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(), + double_udf = udf(double_func, [pa.int32()], pa.int32(), "volatile", "double_it") ``` **Using `udf` as a decorator:** ``` - @udf([pyarrow.int32()], pyarrow.int32(), "volatile", "double_it") + @udf([pa.int32()], pa.int32(), "volatile", "double_it") def double_udf(x): return x * 2 ``` """ - def __new__(cls, *args, **kwargs): - """Create a new UDF. - - Trigger UDF function or decorator depending on if the first args is callable - """ - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return cls._function(*args, **kwargs) - else: - # Case 2: Used as a decorator with parameters - return cls._decorator(*args, **kwargs) - - @staticmethod def _function( func: Callable[..., _R], - input_types: list[pyarrow.DataType], + input_types: list[pa.DataType], return_type: _R, volatility: Volatility | str, name: Optional[str] = None, ) -> ScalarUDF: if not callable(func): - raise TypeError("`func` argument must be callable") + msg = "`func` argument must be callable" + raise TypeError(msg) if name is None: if hasattr(func, "__qualname__"): name = func.__qualname__.lower() @@ -188,49 +196,50 @@ def _function( volatility=volatility, ) - @staticmethod def _decorator( - input_types: list[pyarrow.DataType], + input_types: list[pa.DataType], return_type: _R, volatility: Volatility | str, name: Optional[str] = None, - ): - def decorator(func): + ) -> Callable: + def decorator(func: Callable): udf_caller = ScalarUDF.udf( func, input_types, return_type, volatility, name ) @functools.wraps(func) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any): return udf_caller(*args, **kwargs) return wrapper return decorator + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return _function(*args, **kwargs) + # Case 2: Used as a decorator with parameters + return _decorator(*args, **kwargs) + class Accumulator(metaclass=ABCMeta): """Defines how an :py:class:`AggregateUDF` accumulates values.""" @abstractmethod - def state(self) -> List[pyarrow.Scalar]: + def state(self) -> list[pa.Scalar]: """Return the current state.""" - pass @abstractmethod - def update(self, *values: pyarrow.Array) -> None: + def update(self, *values: pa.Array) -> None: """Evaluate an array of values and update state.""" - pass @abstractmethod - def merge(self, states: List[pyarrow.Array]) -> None: + def merge(self, states: list[pa.Array]) -> None: """Merge a set of states.""" - pass @abstractmethod - def evaluate(self) -> pyarrow.Scalar: + def evaluate(self) -> pa.Scalar: """Return the resultant value.""" - pass class AggregateUDF: @@ -244,9 +253,9 @@ def __init__( self, name: str, accumulator: Callable[[], Accumulator], - input_types: list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], + input_types: list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], volatility: Volatility | str, ) -> None: """Instantiate a user-defined aggregate function (UDAF). @@ -272,7 +281,29 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udaf.__call__(*args_raw)) - class udaf: + @overload + @staticmethod + def udaf( + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[..., AggregateUDF]: ... + + @overload + @staticmethod + def udaf( + accum: Callable[[], Accumulator], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ) -> AggregateUDF: ... + + @staticmethod + def udaf(*args: Any, **kwargs: Any): # noqa: D417 """Create a new User-Defined Aggregate Function (UDAF). This class allows you to define an **aggregate function** that can be used in @@ -300,13 +331,13 @@ class Summarize(Accumulator): def __init__(self, bias: float = 0.0): self._sum = pa.scalar(bias) - def state(self) -> List[pa.Scalar]: + def state(self) -> list[pa.Scalar]: return [self._sum] def update(self, values: pa.Array) -> None: self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) - def merge(self, states: List[pa.Array]) -> None: + def merge(self, states: list[pa.Array]) -> None: self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) def evaluate(self) -> pa.Scalar: @@ -344,37 +375,23 @@ def udf4() -> Summarize: aggregation or window function calls. """ - def __new__(cls, *args, **kwargs): - """Create a new UDAF. - - Trigger UDAF function or decorator depending on if the first args is - callable - """ - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return cls._function(*args, **kwargs) - else: - # Case 2: Used as a decorator with parameters - return cls._decorator(*args, **kwargs) - - @staticmethod def _function( accum: Callable[[], Accumulator], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], volatility: Volatility | str, name: Optional[str] = None, ) -> AggregateUDF: if not callable(accum): - raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): - raise TypeError( - "Accumulator must implement the abstract base class Accumulator" - ) + msg = "`func` must be callable." + raise TypeError(msg) + if not isinstance(accum(), Accumulator): + msg = "Accumulator must implement the abstract base class Accumulator" + raise TypeError(msg) if name is None: - name = accum.__call__().__class__.__qualname__.lower() - if isinstance(input_types, pyarrow.DataType): + name = accum().__class__.__qualname__.lower() + if isinstance(input_types, pa.DataType): input_types = [input_types] return AggregateUDF( name=name, @@ -385,29 +402,34 @@ def _function( volatility=volatility, ) - @staticmethod def _decorator( - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], volatility: Volatility | str, name: Optional[str] = None, - ): - def decorator(accum: Callable[[], Accumulator]): + ) -> Callable[..., Callable[..., Expr]]: + def decorator(accum: Callable[[], Accumulator]) -> Callable[..., Expr]: udaf_caller = AggregateUDF.udaf( accum, input_types, return_type, state_type, volatility, name ) @functools.wraps(accum) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any) -> Expr: return udaf_caller(*args, **kwargs) return wrapper return decorator + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return _function(*args, **kwargs) + # Case 2: Used as a decorator with parameters + return _decorator(*args, **kwargs) + -class WindowEvaluator(metaclass=ABCMeta): +class WindowEvaluator: """Evaluator class for user-defined window functions (UDWF). It is up to the user to decide which evaluate function is appropriate. @@ -423,7 +445,7 @@ class WindowEvaluator(metaclass=ABCMeta): +------------------------+--------------------------------+------------------+---------------------------+ | True | True/False | True/False | ``evaluate`` | +------------------------+--------------------------------+------------------+---------------------------+ - """ # noqa: W505 + """ # noqa: W505, E501 def memoize(self) -> None: """Perform a memoize operation to improve performance. @@ -436,9 +458,8 @@ def memoize(self) -> None: `memoize` is called after each input batch is processed, and such functions can save whatever they need """ - pass - def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: + def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002 """Return the range for the window fuction. If `uses_window_frame` flag is `false`. This method is used to @@ -460,14 +481,17 @@ def is_causal(self) -> bool: """Get whether evaluator needs future data for its result.""" return False - def evaluate_all(self, values: list[pyarrow.Array], num_rows: int) -> pyarrow.Array: + def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: """Evaluate a window function on an entire input partition. This function is called once per input *partition* for window functions that *do not use* values from the window frame, such as - :py:func:`~datafusion.functions.row_number`, :py:func:`~datafusion.functions.rank`, - :py:func:`~datafusion.functions.dense_rank`, :py:func:`~datafusion.functions.percent_rank`, - :py:func:`~datafusion.functions.cume_dist`, :py:func:`~datafusion.functions.lead`, + :py:func:`~datafusion.functions.row_number`, + :py:func:`~datafusion.functions.rank`, + :py:func:`~datafusion.functions.dense_rank`, + :py:func:`~datafusion.functions.percent_rank`, + :py:func:`~datafusion.functions.cume_dist`, + :py:func:`~datafusion.functions.lead`, and :py:func:`~datafusion.functions.lag`. It produces the result of all rows in a single pass. It @@ -499,12 +523,11 @@ def evaluate_all(self, values: list[pyarrow.Array], num_rows: int) -> pyarrow.Ar .. code-block:: text avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) - """ # noqa: W505 - pass + """ # noqa: W505, E501 def evaluate( - self, values: list[pyarrow.Array], eval_range: tuple[int, int] - ) -> pyarrow.Scalar: + self, values: list[pa.Array], eval_range: tuple[int, int] + ) -> pa.Scalar: """Evaluate window function on a range of rows in an input partition. This is the simplest and most general function to implement @@ -519,11 +542,10 @@ def evaluate( and evaluation results of ORDER BY expressions. If function has a single argument, `values[1..]` will contain ORDER BY expression results. """ - pass def evaluate_all_with_rank( self, num_rows: int, ranks_in_partition: list[tuple[int, int]] - ) -> pyarrow.Array: + ) -> pa.Array: """Called for window functions that only need the rank of a row. Evaluate the partition evaluator against the partition using @@ -552,7 +574,6 @@ def evaluate_all_with_rank( The user must implement this method if ``include_rank`` returns True. """ - pass def supports_bounded_execution(self) -> bool: """Can the window function be incrementally computed using bounded memory?""" @@ -567,10 +588,6 @@ def include_rank(self) -> bool: return False -if TYPE_CHECKING: - _W = TypeVar("_W", bound=WindowEvaluator) - - class WindowUDF: """Class for performing window user-defined functions (UDF). @@ -582,8 +599,8 @@ def __init__( self, name: str, func: Callable[[], WindowEvaluator], - input_types: list[pyarrow.DataType], - return_type: pyarrow.DataType, + input_types: list[pa.DataType], + return_type: pa.DataType, volatility: Volatility | str, ) -> None: """Instantiate a user-defined window function (UDWF). @@ -607,8 +624,8 @@ def __call__(self, *args: Expr) -> Expr: @staticmethod def udwf( func: Callable[[], WindowEvaluator], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, volatility: Volatility | str, name: Optional[str] = None, ) -> WindowUDF: @@ -648,16 +665,16 @@ def bias_10() -> BiasedNumbers: Returns: A user-defined window function. - """ # noqa W505 + """ # noqa: W505, E501 if not callable(func): - raise TypeError("`func` must be callable.") - if not isinstance(func.__call__(), WindowEvaluator): - raise TypeError( - "`func` must implement the abstract base class WindowEvaluator" - ) + msg = "`func` must be callable." + raise TypeError(msg) + if not isinstance(func(), WindowEvaluator): + msg = "`func` must implement the abstract base class WindowEvaluator" + raise TypeError(msg) if name is None: - name = func.__call__().__class__.__qualname__.lower() - if isinstance(input_types, pyarrow.DataType): + name = func().__class__.__qualname__.lower() + if isinstance(input_types, pa.DataType): input_types = [input_types] return WindowUDF( name=name, @@ -666,3 +683,10 @@ def bias_10() -> BiasedNumbers: return_type=return_type, volatility=volatility, ) + + +# Convenience exports so we can import instead of treating as +# variables at the package root +udf = ScalarUDF.udf +udaf = AggregateUDF.udaf +udwf = WindowUDF.udwf diff --git a/python/tests/generic.py b/python/tests/generic.py index 0177e2df0..1b98fdf9e 100644 --- a/python/tests/generic.py +++ b/python/tests/generic.py @@ -16,6 +16,7 @@ # under the License. import datetime +from datetime import timezone import numpy as np import pyarrow as pa @@ -26,29 +27,29 @@ def data(): - np.random.seed(1) + rng = np.random.default_rng(1) data = np.concatenate( [ - np.random.normal(0, 0.01, size=50), - np.random.normal(50, 0.01, size=50), + rng.normal(0, 0.01, size=50), + rng.normal(50, 0.01, size=50), ] ) return pa.array(data) def data_with_nans(): - np.random.seed(0) - data = np.random.normal(0, 0.01, size=50) - mask = np.random.randint(0, 2, size=50) + rng = np.random.default_rng(0) + data = rng.normal(0, 0.01, size=50) + mask = rng.normal(0, 2, size=50) data[mask == 0] = np.nan return data def data_datetime(f): data = [ - datetime.datetime.now(), - datetime.datetime.now() - datetime.timedelta(days=1), - datetime.datetime.now() + datetime.timedelta(days=1), + datetime.datetime.now(tz=timezone.utc), + datetime.datetime.now(tz=timezone.utc) - datetime.timedelta(days=1), + datetime.datetime.now(tz=timezone.utc) + datetime.timedelta(days=1), ] return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False])) diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 5ef46131b..61b1c7d80 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -66,7 +66,7 @@ def df_aggregate_100(): @pytest.mark.parametrize( - "agg_expr, calc_expected", + ("agg_expr", "calc_expected"), [ (f.avg(column("a")), lambda a, b, c, d: np.array(np.average(a))), ( @@ -114,7 +114,7 @@ def test_aggregation_stats(df, agg_expr, calc_expected): @pytest.mark.parametrize( - "agg_expr, expected, array_sort", + ("agg_expr", "expected", "array_sort"), [ (f.approx_distinct(column("b")), pa.array([2], type=pa.uint64()), False), ( @@ -182,12 +182,11 @@ def test_aggregation(df, agg_expr, expected, array_sort): agg_df.show() result = agg_df.collect()[0] - print(result) assert result.column(0) == expected @pytest.mark.parametrize( - "name,expr,expected", + ("name", "expr", "expected"), [ ( "approx_percentile_cont", @@ -299,7 +298,9 @@ def test_aggregate_100(df_aggregate_100, name, expr, expected): ] -@pytest.mark.parametrize("name,expr,result", data_test_bitwise_and_boolean_functions) +@pytest.mark.parametrize( + ("name", "expr", "result"), data_test_bitwise_and_boolean_functions +) def test_bit_and_bool_fns(df, name, expr, result): df = df.aggregate([], [expr.alias(name)]) @@ -311,7 +312,7 @@ def test_bit_and_bool_fns(df, name, expr, result): @pytest.mark.parametrize( - "name,expr,result", + ("name", "expr", "result"), [ ("first_value", f.first_value(column("a")), [0, 4]), ( @@ -361,7 +362,6 @@ def test_bit_and_bool_fns(df, name, expr, result): ), [8, 9], ), - ("first_value", f.first_value(column("a")), [0, 4]), ( "nth_value_ordered", f.nth_value(column("a"), 2, order_by=[column("a").sort(ascending=False)]), @@ -401,7 +401,7 @@ def test_first_last_value(df_partitioned, name, expr, result) -> None: @pytest.mark.parametrize( - "name,expr,result", + ("name", "expr", "result"), [ ("string_agg", f.string_agg(column("a"), ","), "one,two,three,two"), ("string_agg", f.string_agg(column("b"), ""), "03124"), diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py index 214f6b165..23b328458 100644 --- a/python/tests/test_catalog.py +++ b/python/tests/test_catalog.py @@ -19,6 +19,9 @@ import pytest +# Note we take in `database` as a variable even though we don't use +# it because that will cause the fixture to set up the context with +# the tables we need. def test_basic(ctx, database): with pytest.raises(KeyError): ctx.catalog("non-existent") @@ -26,10 +29,10 @@ def test_basic(ctx, database): default = ctx.catalog() assert default.names() == ["public"] - for database in [default.database("public"), default.database()]: - assert database.names() == {"csv1", "csv", "csv2"} + for db in [default.database("public"), default.database()]: + assert db.names() == {"csv1", "csv", "csv2"} - table = database.table("csv") + table = db.table("csv") assert table.kind == "physical" assert table.schema == pa.schema( [ diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 91046e6b8..7a0a7aa08 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -16,7 +16,6 @@ # under the License. import datetime as dt import gzip -import os import pathlib import pyarrow as pa @@ -45,7 +44,7 @@ def test_create_context_runtime_config_only(): SessionContext(runtime=RuntimeEnvBuilder()) -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_runtime_configs(tmp_path, path_to_str): path1 = tmp_path / "dir1" path2 = tmp_path / "dir2" @@ -62,7 +61,7 @@ def test_runtime_configs(tmp_path, path_to_str): assert db is not None -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_temporary_files(tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -79,14 +78,14 @@ def test_create_context_with_all_valid_args(): runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() - .with_create_default_catalog_and_schema(True) + .with_create_default_catalog_and_schema(enabled=True) .with_default_catalog_and_schema("foo", "bar") .with_target_partitions(1) - .with_information_schema(True) - .with_repartition_joins(False) - .with_repartition_aggregations(False) - .with_repartition_windows(False) - .with_parquet_pruning(False) + .with_information_schema(enabled=True) + .with_repartition_joins(enabled=False) + .with_repartition_aggregations(enabled=False) + .with_repartition_windows(enabled=False) + .with_parquet_pruning(enabled=False) ) ctx = SessionContext(config, runtime) @@ -167,7 +166,7 @@ def test_from_arrow_table(ctx): def record_batch_generator(num_batches: int): schema = pa.schema([("a", pa.int64()), ("b", pa.int64())]) - for i in range(num_batches): + for _i in range(num_batches): yield pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 5, 6])], schema=schema ) @@ -492,10 +491,10 @@ def test_table_not_found(ctx): def test_read_json(ctx): - path = os.path.dirname(os.path.abspath(__file__)) + path = pathlib.Path(__file__).parent.resolve() # Default - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = path / "data_test_context" / "data.json" df = ctx.read_json(test_data_path) result = df.collect() @@ -515,7 +514,7 @@ def test_read_json(ctx): assert result[0].schema == schema # File extension - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = path / "data_test_context" / "data.json" df = ctx.read_json(test_data_path, file_extension=".json") result = df.collect() @@ -524,15 +523,17 @@ def test_read_json(ctx): def test_read_json_compressed(ctx, tmp_path): - path = os.path.dirname(os.path.abspath(__file__)) - test_data_path = os.path.join(path, "data_test_context", "data.json") + path = pathlib.Path(__file__).parent.resolve() + test_data_path = path / "data_test_context" / "data.json" # File compression type gzip_path = tmp_path / "data.json.gz" - with open(test_data_path, "rb") as csv_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(csv_file) + with ( + pathlib.Path.open(test_data_path, "rb") as csv_file, + gzip.open(gzip_path, "wb") as gzipped_file, + ): + gzipped_file.writelines(csv_file) df = ctx.read_json(gzip_path, file_extension=".gz", file_compression_type="gz") result = df.collect() @@ -563,14 +564,16 @@ def test_read_csv_list(ctx): def test_read_csv_compressed(ctx, tmp_path): - test_data_path = "testing/data/csv/aggregate_test_100.csv" + test_data_path = pathlib.Path("testing/data/csv/aggregate_test_100.csv") # File compression type gzip_path = tmp_path / "aggregate_test_100.csv.gz" - with open(test_data_path, "rb") as csv_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(csv_file) + with ( + pathlib.Path.open(test_data_path, "rb") as csv_file, + gzip.open(gzip_path, "wb") as gzipped_file, + ): + gzipped_file.writelines(csv_file) csv_df = ctx.read_csv(gzip_path, file_extension=".gz", file_compression_type="gz") csv_df.select(column("c1")).show() @@ -603,7 +606,7 @@ def test_create_sql_options(): def test_sql_with_options_no_ddl(ctx): sql = "CREATE TABLE IF NOT EXISTS valuetable AS VALUES(1,'HELLO'),(12,'DATAFUSION')" ctx.sql(sql) - options = SQLOptions().with_allow_ddl(False) + options = SQLOptions().with_allow_ddl(allow=False) with pytest.raises(Exception, match="DDL"): ctx.sql_with_options(sql, options=options) @@ -618,7 +621,7 @@ def test_sql_with_options_no_dml(ctx): ctx.register_dataset(table_name, dataset) sql = f'INSERT INTO "{table_name}" VALUES (1, 2), (2, 3);' ctx.sql(sql) - options = SQLOptions().with_allow_dml(False) + options = SQLOptions().with_allow_dml(allow=False) with pytest.raises(Exception, match="DML"): ctx.sql_with_options(sql, options=options) @@ -626,6 +629,6 @@ def test_sql_with_options_no_dml(ctx): def test_sql_with_options_no_statements(ctx): sql = "SET time zone = 1;" ctx.sql(sql) - options = SQLOptions().with_allow_statements(False) + options = SQLOptions().with_allow_statements(allow=False) with pytest.raises(Exception, match="SetVariable"): ctx.sql_with_options(sql, options=options) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index c636e896a..d084f12dd 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -339,7 +339,7 @@ def test_join(): # Verify we don't make a breaking change to pre-43.0.0 # where users would pass join_keys as a positional argument - df2 = df.join(df1, (["a"], ["a"]), how="inner") # type: ignore + df2 = df.join(df1, (["a"], ["a"]), how="inner") df2.show() df2 = df2.sort(column("l.a")) table = pa.Table.from_batches(df2.collect()) @@ -375,17 +375,17 @@ def test_join_invalid_params(): with pytest.raises( ValueError, match=r"`left_on` or `right_on` should not provided with `on`" ): - df2 = df.join(df1, on="a", how="inner", right_on="test") # type: ignore + df2 = df.join(df1, on="a", how="inner", right_on="test") with pytest.raises( ValueError, match=r"`left_on` and `right_on` should both be provided." ): - df2 = df.join(df1, left_on="a", how="inner") # type: ignore + df2 = df.join(df1, left_on="a", how="inner") with pytest.raises( ValueError, match=r"either `on` or `left_on` and `right_on` should be provided." ): - df2 = df.join(df1, how="inner") # type: ignore + df2 = df.join(df1, how="inner") def test_join_on(): @@ -567,7 +567,7 @@ def test_distinct(): ] -@pytest.mark.parametrize("name,expr,result", data_test_window_functions) +@pytest.mark.parametrize(("name", "expr", "result"), data_test_window_functions) def test_window_functions(partitioned_df, name, expr, result): df = partitioned_df.select( column("a"), column("b"), column("c"), f.alias(expr, name) @@ -731,7 +731,7 @@ def test_execution_plan(aggregate_df): plan = aggregate_df.execution_plan() expected = ( - "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n" # noqa: E501 + "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n" ) assert expected == plan.display() @@ -756,7 +756,7 @@ def test_execution_plan(aggregate_df): ctx = SessionContext() rows_returned = 0 - for idx in range(0, plan.partition_count): + for idx in range(plan.partition_count): stream = ctx.execute(plan, idx) try: batch = stream.next() @@ -885,7 +885,7 @@ def test_union_distinct(ctx): ) df_c = ctx.create_dataframe([[batch]]).sort(column("a")) - df_a_u_b = df_a.union(df_b, True).sort(column("a")) + df_a_u_b = df_a.union(df_b, distinct=True).sort(column("a")) assert df_c.collect() == df_a_u_b.collect() assert df_c.collect() == df_a_u_b.collect() @@ -954,8 +954,6 @@ def test_to_arrow_table(df): def test_execute_stream(df): stream = df.execute_stream() - for s in stream: - print(type(s)) assert all(batch is not None for batch in stream) assert not list(stream) # after one iteration the generator must be exhausted @@ -969,7 +967,7 @@ def test_execute_stream_to_arrow_table(df, schema): (batch.to_pyarrow() for batch in stream), schema=df.schema() ) else: - pyarrow_table = pa.Table.from_batches((batch.to_pyarrow() for batch in stream)) + pyarrow_table = pa.Table.from_batches(batch.to_pyarrow() for batch in stream) assert isinstance(pyarrow_table, pa.Table) assert pyarrow_table.shape == (3, 3) @@ -1033,7 +1031,7 @@ def test_describe(df): } -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_write_csv(ctx, df, tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -1046,7 +1044,7 @@ def test_write_csv(ctx, df, tmp_path, path_to_str): assert result == expected -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_write_json(ctx, df, tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -1059,7 +1057,7 @@ def test_write_json(ctx, df, tmp_path, path_to_str): assert result == expected -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_write_parquet(df, tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -1071,7 +1069,7 @@ def test_write_parquet(df, tmp_path, path_to_str): @pytest.mark.parametrize( - "compression, compression_level", + ("compression", "compression_level"), [("gzip", 6), ("brotli", 7), ("zstd", 15)], ) def test_write_compressed_parquet(df, tmp_path, compression, compression_level): @@ -1082,7 +1080,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level): ) # test that the actual compression scheme is the one written - for root, dirs, files in os.walk(path): + for _root, _dirs, files in os.walk(path): for file in files: if file.endswith(".parquet"): metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict() @@ -1097,7 +1095,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level): @pytest.mark.parametrize( - "compression, compression_level", + ("compression", "compression_level"), [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)], ) def test_write_compressed_parquet_wrong_compression_level( @@ -1152,7 +1150,7 @@ def test_dataframe_export(df) -> None: table = pa.table(df, schema=desired_schema) assert table.num_columns == 1 assert table.num_rows == 3 - for i in range(0, 3): + for i in range(3): assert table[0][i].as_py() is None # Expect an error when we cannot convert schema @@ -1186,8 +1184,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: result = df.to_pydict() assert result["a"] == [1, 2, 3] - assert result["string_col"] == ["string data" for _i in range(0, 3)] - assert result["new_col"] == [3 for _i in range(0, 3)] + assert result["string_col"] == ["string data" for _i in range(3)] + assert result["new_col"] == [3 for _i in range(3)] def test_dataframe_repr_html(df) -> None: diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 354c7e180..926e69845 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -85,18 +85,14 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) - # TODO: Upstream now has expressions for skip and fetch - # REF: https://github.com/apache/datafusion/pull/12836 - # assert plan.skip() == 0 + assert "Skip: None" in str(plan) df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") plan = df.logical_plan() plan = plan.to_variant() assert isinstance(plan, Limit) - # TODO: Upstream now has expressions for skip and fetch - # REF: https://github.com/apache/datafusion/pull/12836 - # assert plan.skip() == 5 + assert "Skip: Some(Literal(Int64(5)))" in str(plan) def test_aggregate_query(test_ctx): @@ -165,6 +161,7 @@ def traverse_logical_plan(plan): res = traverse_logical_plan(input_plan) if res is not None: return res + return None ctx = SessionContext() data = {"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]} @@ -176,7 +173,7 @@ def traverse_logical_plan(plan): assert variant.expr().to_variant().qualified_name() == "table1.name" assert ( str(variant.list()) - == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]' + == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]' # noqa: E501 ) assert not variant.negated() diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index fca05bb8f..ed88a16e3 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import math -from datetime import datetime +from datetime import datetime, timezone import numpy as np import pyarrow as pa @@ -25,6 +25,8 @@ np.seterr(invalid="ignore") +DEFAULT_TZ = timezone.utc + @pytest.fixture def df(): @@ -37,9 +39,9 @@ def df(): pa.array(["hello ", " world ", " !"], type=pa.string_view()), pa.array( [ - datetime(2022, 12, 31), - datetime(2027, 6, 26), - datetime(2020, 7, 2), + datetime(2022, 12, 31, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 26, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 2, tzinfo=DEFAULT_TZ), ] ), pa.array([False, True, True]), @@ -221,12 +223,12 @@ def py_indexof(arr, v): def py_arr_remove(arr, v, n=None): new_arr = arr[:] found = 0 - while found != n: - try: + try: + while found != n: new_arr.remove(v) found += 1 - except ValueError: - break + except ValueError: + pass return new_arr @@ -234,13 +236,13 @@ def py_arr_remove(arr, v, n=None): def py_arr_replace(arr, from_, to, n=None): new_arr = arr[:] found = 0 - while found != n: - try: + try: + while found != n: idx = new_arr.index(from_) new_arr[idx] = to found += 1 - except ValueError: - break + except ValueError: + pass return new_arr @@ -268,266 +270,266 @@ def py_flatten(arr): @pytest.mark.parametrize( ("stmt", "py_expr"), [ - [ + ( lambda col: f.array_append(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_push_back(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_append(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_push_back(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_concat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.array_cat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.list_cat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.list_concat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.array_dims(col), lambda data: [[len(r)] for r in data], - ], - [ + ), + ( lambda col: f.array_distinct(col), lambda data: [list(set(r)) for r in data], - ], - [ + ), + ( lambda col: f.list_distinct(col), lambda data: [list(set(r)) for r in data], - ], - [ + ), + ( lambda col: f.list_dims(col), lambda data: [[len(r)] for r in data], - ], - [ + ), + ( lambda col: f.array_element(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.array_empty(col), lambda data: [len(r) == 0 for r in data], - ], - [ + ), + ( lambda col: f.empty(col), lambda data: [len(r) == 0 for r in data], - ], - [ + ), + ( lambda col: f.array_extract(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.list_element(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.list_extract(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.array_length(col), lambda data: [len(r) for r in data], - ], - [ + ), + ( lambda col: f.list_length(col), lambda data: [len(r) for r in data], - ], - [ + ), + ( lambda col: f.array_has(col, literal(1.0)), lambda data: [1.0 in r for r in data], - ], - [ + ), + ( lambda col: f.array_has_all( col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]]) ), lambda data: [np.all([v in r for v in [1.0, 3.0, 5.0]]) for r in data], - ], - [ + ), + ( lambda col: f.array_has_any( col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]]) ), lambda data: [np.any([v in r for v in [1.0, 3.0, 5.0]]) for r in data], - ], - [ + ), + ( lambda col: f.array_position(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.array_indexof(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.list_position(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.list_indexof(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.array_positions(col, literal(1.0)), lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data], - ], - [ + ), + ( lambda col: f.list_positions(col, literal(1.0)), lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data], - ], - [ + ), + ( lambda col: f.array_ndims(col), lambda data: [np.array(r).ndim for r in data], - ], - [ + ), + ( lambda col: f.list_ndims(col), lambda data: [np.array(r).ndim for r in data], - ], - [ + ), + ( lambda col: f.array_prepend(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_push_front(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_prepend(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_push_front(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_pop_back(col), lambda data: [arr[:-1] for arr in data], - ], - [ + ), + ( lambda col: f.array_pop_front(col), lambda data: [arr[1:] for arr in data], - ], - [ + ), + ( lambda col: f.array_remove(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.list_remove(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.array_remove_n(col, literal(3.0), literal(2)), lambda data: [py_arr_remove(arr, 3.0, 2) for arr in data], - ], - [ + ), + ( lambda col: f.list_remove_n(col, literal(3.0), literal(2)), lambda data: [py_arr_remove(arr, 3.0, 2) for arr in data], - ], - [ + ), + ( lambda col: f.array_remove_all(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_remove_all(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_repeat(col, literal(2)), lambda data: [[arr] * 2 for arr in data], - ], - [ + ), + ( lambda col: f.list_repeat(col, literal(2)), lambda data: [[arr] * 2 for arr in data], - ], - [ + ), + ( lambda col: f.array_replace(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.list_replace(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.array_replace_n(col, literal(3.0), literal(4.0), literal(1)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.list_replace_n(col, literal(3.0), literal(4.0), literal(2)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 2) for arr in data], - ], - [ + ), + ( lambda col: f.array_replace_all(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_replace_all(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_sort(col, descending=True, null_first=True), lambda data: [np.sort(arr)[::-1] for arr in data], - ], - [ + ), + ( lambda col: f.list_sort(col, descending=False, null_first=False), lambda data: [np.sort(arr) for arr in data], - ], - [ + ), + ( lambda col: f.array_slice(col, literal(2), literal(4)), lambda data: [arr[1:4] for arr in data], - ], + ), pytest.param( lambda col: f.list_slice(col, literal(-1), literal(2)), lambda data: [arr[-1:2] for arr in data], ), - [ + ( lambda col: f.array_intersect(col, literal([3.0, 4.0])), lambda data: [np.intersect1d(arr, [3.0, 4.0]) for arr in data], - ], - [ + ), + ( lambda col: f.list_intersect(col, literal([3.0, 4.0])), lambda data: [np.intersect1d(arr, [3.0, 4.0]) for arr in data], - ], - [ + ), + ( lambda col: f.array_union(col, literal([12.0, 999.0])), lambda data: [np.union1d(arr, [12.0, 999.0]) for arr in data], - ], - [ + ), + ( lambda col: f.list_union(col, literal([12.0, 999.0])), lambda data: [np.union1d(arr, [12.0, 999.0]) for arr in data], - ], - [ + ), + ( lambda col: f.array_except(col, literal([3.0])), lambda data: [np.setdiff1d(arr, [3.0]) for arr in data], - ], - [ + ), + ( lambda col: f.list_except(col, literal([3.0])), lambda data: [np.setdiff1d(arr, [3.0]) for arr in data], - ], - [ + ), + ( lambda col: f.array_resize(col, literal(10), literal(0.0)), lambda data: [py_arr_resize(arr, 10, 0.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_resize(col, literal(10), literal(0.0)), lambda data: [py_arr_resize(arr, 10, 0.0) for arr in data], - ], - [ + ), + ( lambda col: f.range(literal(1), literal(5), literal(2)), lambda data: [np.arange(1, 5, 2)], - ], + ), ], ) def test_array_functions(stmt, py_expr): @@ -611,22 +613,22 @@ def test_make_array_functions(make_func): @pytest.mark.parametrize( ("stmt", "py_expr"), [ - [ + ( f.array_to_string(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], - [ + ), + ( f.array_join(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], - [ + ), + ( f.list_to_string(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], - [ + ), + ( f.list_join(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], + ), ], ) def test_array_function_obj_tests(stmt, py_expr): @@ -640,7 +642,7 @@ def test_array_function_obj_tests(stmt, py_expr): @pytest.mark.parametrize( - "function, expected_result", + ("function", "expected_result"), [ ( f.ascii(column("a")), @@ -894,54 +896,72 @@ def test_temporal_functions(df): assert result.column(0) == pa.array([12, 6, 7], type=pa.int32()) assert result.column(1) == pa.array([2022, 2027, 2020], type=pa.int32()) assert result.column(2) == pa.array( - [datetime(2022, 12, 1), datetime(2027, 6, 1), datetime(2020, 7, 1)], - type=pa.timestamp("us"), + [ + datetime(2022, 12, 1, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 1, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 1, tzinfo=DEFAULT_TZ), + ], + type=pa.timestamp("ns", tz=DEFAULT_TZ), ) assert result.column(3) == pa.array( - [datetime(2022, 12, 31), datetime(2027, 6, 26), datetime(2020, 7, 2)], - type=pa.timestamp("us"), + [ + datetime(2022, 12, 31, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 26, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 2, tzinfo=DEFAULT_TZ), + ], + type=pa.timestamp("ns", tz=DEFAULT_TZ), ) assert result.column(4) == pa.array( [ - datetime(2022, 12, 30, 23, 47, 30), - datetime(2027, 6, 25, 23, 47, 30), - datetime(2020, 7, 1, 23, 47, 30), + datetime(2022, 12, 30, 23, 47, 30, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 25, 23, 47, 30, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 1, 23, 47, 30, tzinfo=DEFAULT_TZ), ], - type=pa.timestamp("ns"), + type=pa.timestamp("ns", tz=DEFAULT_TZ), ) assert result.column(5) == pa.array( - [datetime(2023, 1, 10, 20, 52, 54)] * 3, type=pa.timestamp("s") + [datetime(2023, 1, 10, 20, 52, 54, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("s"), ) assert result.column(6) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) assert result.column(7) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + [datetime(2023, 9, 7, 5, 6, 14, tzinfo=DEFAULT_TZ)] * 3, type=pa.timestamp("s") ) assert result.column(8) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + [datetime(2023, 9, 7, 5, 6, 14, 523000, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ms"), ) assert result.column(9) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("us"), ) assert result.column(10) == pa.array([31, 26, 2], type=pa.int32()) assert result.column(11) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) assert result.column(12) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + [datetime(2023, 9, 7, 5, 6, 14, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("s"), ) assert result.column(13) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + [datetime(2023, 9, 7, 5, 6, 14, 523000, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ms"), ) assert result.column(14) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("us"), ) assert result.column(15) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) assert result.column(16) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) @@ -1057,7 +1077,7 @@ def test_regr_funcs_sql_2(): @pytest.mark.parametrize( - "func, expected", + ("func", "expected"), [ pytest.param(f.regr_slope(column("c2"), column("c1")), [4.6], id="regr_slope"), pytest.param( @@ -1160,7 +1180,7 @@ def test_binary_string_functions(df): @pytest.mark.parametrize( - "python_datatype, name, expected", + ("python_datatype", "name", "expected"), [ pytest.param(bool, "e", pa.bool_(), id="bool"), pytest.param(int, "b", pa.int64(), id="int"), @@ -1179,7 +1199,7 @@ def test_cast(df, python_datatype, name: str, expected): @pytest.mark.parametrize( - "negated, low, high, expected", + ("negated", "low", "high", "expected"), [ pytest.param(False, 3, 5, {"filtered": [4, 5]}), pytest.param(False, 4, 5, {"filtered": [4, 5]}), diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 0c155cbde..9ef7ed89a 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -169,14 +169,15 @@ def test_class_module_is_datafusion(): def test_import_from_functions_submodule(): - from datafusion.functions import abs, sin # noqa + from datafusion.functions import abs as df_abs + from datafusion.functions import sin - assert functions.abs is abs + assert functions.abs is df_abs assert functions.sin is sin msg = "cannot import name 'foobar' from 'datafusion.functions'" with pytest.raises(ImportError, match=msg): - from datafusion.functions import foobar # noqa + from datafusion.functions import foobar # noqa: F401 def test_classes_are_inheritable(): diff --git a/python/tests/test_input.py b/python/tests/test_input.py index 806471357..4663f6148 100644 --- a/python/tests/test_input.py +++ b/python/tests/test_input.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import os +import pathlib from datafusion.input.location import LocationInputPlugin @@ -23,10 +23,10 @@ def test_location_input(): location_input = LocationInputPlugin() - cwd = os.getcwd() - input_file = cwd + "/testing/data/parquet/generated_simple_numerics/blogs.parquet" + cwd = pathlib.Path.cwd() + input_file = cwd / "testing/data/parquet/generated_simple_numerics/blogs.parquet" table_name = "blog" - tbl = location_input.build_table(input_file, table_name) - assert "blog" == tbl.name - assert 3 == len(tbl.columns) + tbl = location_input.build_table(str(input_file), table_name) + assert tbl.name == "blog" + assert len(tbl.columns) == 3 assert "blogs.parquet" in tbl.filepaths[0] diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 21ad188ee..7ca509689 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -14,8 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import os -import pathlib +from pathlib import Path import pyarrow as pa from datafusion import column @@ -23,10 +22,10 @@ def test_read_json_global_ctx(ctx): - path = os.path.dirname(os.path.abspath(__file__)) + path = Path(__file__).parent.resolve() # Default - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = Path(path) / "data_test_context" / "data.json" df = read_json(test_data_path) result = df.collect() @@ -46,7 +45,7 @@ def test_read_json_global_ctx(ctx): assert result[0].schema == schema # File extension - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = Path(path) / "data_test_context" / "data.json" df = read_json(test_data_path, file_extension=".json") result = df.collect() @@ -59,7 +58,7 @@ def test_read_parquet_global(): parquet_df.show() assert parquet_df is not None - path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet" + path = Path.cwd() / "parquet/data/alltypes_plain.parquet" parquet_df = read_parquet(path=path) assert parquet_df is not None @@ -90,6 +89,6 @@ def test_read_avro(): avro_df.show() assert avro_df is not None - path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro" + path = Path.cwd() / "testing/data/avro/alltypes_plain.avro" avro_df = read_avro(path=path) assert avro_df is not None diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 862f745bf..b6348e3a0 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import gzip -import os +from pathlib import Path import numpy as np import pyarrow as pa @@ -47,9 +47,8 @@ def test_register_csv(ctx, tmp_path): ) write_csv(table, path) - with open(path, "rb") as csv_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(csv_file) + with Path.open(path, "rb") as csv_file, gzip.open(gzip_path, "wb") as gzipped_file: + gzipped_file.writelines(csv_file) ctx.register_csv("csv", path) ctx.register_csv("csv1", str(path)) @@ -158,7 +157,7 @@ def test_register_parquet(ctx, tmp_path): assert result.to_pydict() == {"cnt": [100]} -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_register_parquet_partitioned(ctx, tmp_path, path_to_str): dir_root = tmp_path / "dataset_parquet_partitioned" dir_root.mkdir(exist_ok=False) @@ -194,7 +193,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str): assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1} -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_register_dataset(ctx, tmp_path, path_to_str): path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) path = str(path) if path_to_str else path @@ -209,13 +208,15 @@ def test_register_dataset(ctx, tmp_path, path_to_str): def test_register_json(ctx, tmp_path): - path = os.path.dirname(os.path.abspath(__file__)) - test_data_path = os.path.join(path, "data_test_context", "data.json") + path = Path(__file__).parent.resolve() + test_data_path = Path(path) / "data_test_context" / "data.json" gzip_path = tmp_path / "data.json.gz" - with open(test_data_path, "rb") as json_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(json_file) + with ( + Path.open(test_data_path, "rb") as json_file, + gzip.open(gzip_path, "wb") as gzipped_file, + ): + gzipped_file.writelines(json_file) ctx.register_json("json", test_data_path) ctx.register_json("json1", str(test_data_path)) @@ -470,16 +471,18 @@ def test_simple_select(ctx, tmp_path, arr): # In DF 43.0.0 we now default to having BinaryView and StringView # so the array that is saved to the parquet is slightly different # than the array read. Convert to values for comparison. - if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray): + if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)): arr = arr.tolist() result = result.tolist() np.testing.assert_equal(result, arr) -@pytest.mark.parametrize("file_sort_order", (None, [[col("int").sort(True, True)]])) -@pytest.mark.parametrize("pass_schema", (True, False)) -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize( + "file_sort_order", [None, [[col("int").sort(ascending=True, nulls_first=True)]]] +) +@pytest.mark.parametrize("pass_schema", [True, False]) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_register_listing_table( ctx, tmp_path, pass_schema, file_sort_order, path_to_str ): @@ -528,7 +531,7 @@ def test_register_listing_table( assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2} result = ctx.sql( - "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp" + "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp" # noqa: E501 ).collect() result = pa.Table.from_batches(result) diff --git a/python/tests/test_store.py b/python/tests/test_store.py index 53ffc3acf..ac9af98f3 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import os +from pathlib import Path import pytest from datafusion import SessionContext @@ -23,17 +23,16 @@ @pytest.fixture def ctx(): - ctx = SessionContext() - return ctx + return SessionContext() def test_read_parquet(ctx): ctx.register_parquet( "test", - f"file://{os.getcwd()}/parquet/data/alltypes_plain.parquet", - [], - True, - ".parquet", + f"file://{Path.cwd()}/parquet/data/alltypes_plain.parquet", + table_partition_cols=[], + parquet_pruning=True, + file_extension=".parquet", ) df = ctx.sql("SELECT * FROM test") assert isinstance(df.collect(), list) diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index feada7cde..f367a447d 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -50,7 +50,7 @@ def test_substrait_serialization(ctx): substrait_plan = ss.Producer.to_substrait_plan(df.logical_plan(), ctx) -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_substrait_file_serialization(ctx, tmp_path, path_to_str): batch = pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 5, 6])], diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 97cf81f3c..453ff6f4f 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -17,8 +17,6 @@ from __future__ import annotations -from typing import List - import pyarrow as pa import pyarrow.compute as pc import pytest @@ -31,7 +29,7 @@ class Summarize(Accumulator): def __init__(self, initial_value: float = 0.0): self._sum = pa.scalar(initial_value) - def state(self) -> List[pa.Scalar]: + def state(self) -> list[pa.Scalar]: return [self._sum] def update(self, values: pa.Array) -> None: @@ -39,7 +37,7 @@ def update(self, values: pa.Array) -> None: # This breaks on `None` self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) - def merge(self, states: List[pa.Array]) -> None: + def merge(self, states: list[pa.Array]) -> None: # Not nice since pyarrow scalars can't be summed yet. # This breaks on `None` self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) @@ -56,7 +54,7 @@ class MissingMethods(Accumulator): def __init__(self): self._sum = pa.scalar(0) - def state(self) -> List[pa.Scalar]: + def state(self) -> list[pa.Scalar]: return [self._sum] @@ -86,7 +84,7 @@ def test_errors(df): "evaluate, merge, update)" ) with pytest.raises(Exception, match=msg): - accum = udaf( # noqa F841 + accum = udaf( # noqa: F841 MissingMethods, pa.int64(), pa.int64(), diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 2fea34aa3..3d6dcf9d8 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -298,7 +298,7 @@ def test_udwf_errors(df): ] -@pytest.mark.parametrize("name,expr,expected", data_test_udwf_functions) +@pytest.mark.parametrize(("name", "expr", "expected"), data_test_udwf_functions) def test_udwf_functions(df, name, expr, expected): df = df.select("a", "b", f.round(expr, lit(3)).alias(name)) diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index ac064ba95..d7f6f6e35 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -19,6 +19,7 @@ import datafusion.functions import datafusion.object_store import datafusion.substrait +import pytest # EnumType introduced in 3.11. 3.10 and prior it was called EnumMeta. try: @@ -41,10 +42,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None: internal_attr = getattr(internal_obj, attr) wrapped_attr = getattr(wrapped_obj, attr) - if internal_attr is not None: - if wrapped_attr is None: - print("Missing attribute: ", attr) - assert False + if internal_attr is not None and wrapped_attr is None: + pytest.fail(f"Missing attribute: {attr}") if attr in ["__self__", "__class__"]: continue From 3dcf7c7e5c0af0eb3c5e3bdf9c6e33fd4541b070 Mon Sep 17 00:00:00 2001 From: jsai28 <54253219+jsai28@users.noreply.github.com> Date: Thu, 13 Mar 2025 04:09:03 -0600 Subject: [PATCH 057/248] feat/making global context accessible for users (#1060) * Rename _global_ctx to global_ctx * Add global context to python wrapper code * Update context.py * singleton for global context * formatting * remove udf from import * remove _global_instance * formatting * formatting * unnecessary test * fix test_io.py * ran ruff * ran ruff format --- python/datafusion/context.py | 12 +++++++ python/datafusion/io.py | 63 ++++++++++++++++-------------------- python/tests/test_context.py | 18 +++++++++++ src/context.rs | 2 +- 4 files changed, 58 insertions(+), 37 deletions(-) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 0ab1a908a..58ad9a943 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -496,6 +496,18 @@ def __init__( self.ctx = SessionContextInternal(config, runtime) + @classmethod + def global_ctx(cls) -> SessionContext: + """Retrieve the global context as a `SessionContext` wrapper. + + Returns: + A `SessionContext` object that wraps the global `SessionContextInternal`. + """ + internal_ctx = SessionContextInternal.global_ctx() + wrapper = cls() + wrapper.ctx = internal_ctx + return wrapper + def enable_url_table(self) -> SessionContext: """Control if local files can be queried as tables. diff --git a/python/datafusion/io.py b/python/datafusion/io.py index 3e39703e3..ef5ebf96f 100644 --- a/python/datafusion/io.py +++ b/python/datafusion/io.py @@ -21,10 +21,9 @@ from typing import TYPE_CHECKING +from datafusion.context import SessionContext from datafusion.dataframe import DataFrame -from ._internal import SessionContext as SessionContextInternal - if TYPE_CHECKING: import pathlib @@ -68,16 +67,14 @@ def read_parquet( """ if table_partition_cols is None: table_partition_cols = [] - return DataFrame( - SessionContextInternal._global_ctx().read_parquet( - str(path), - table_partition_cols, - parquet_pruning, - file_extension, - skip_metadata, - schema, - file_sort_order, - ) + return SessionContext.global_ctx().read_parquet( + str(path), + table_partition_cols, + parquet_pruning, + file_extension, + skip_metadata, + schema, + file_sort_order, ) @@ -110,15 +107,13 @@ def read_json( """ if table_partition_cols is None: table_partition_cols = [] - return DataFrame( - SessionContextInternal._global_ctx().read_json( - str(path), - schema, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) + return SessionContext.global_ctx().read_json( + str(path), + schema, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, ) @@ -161,17 +156,15 @@ def read_csv( path = [str(p) for p in path] if isinstance(path, list) else str(path) - return DataFrame( - SessionContextInternal._global_ctx().read_csv( - path, - schema, - has_header, - delimiter, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) + return SessionContext.global_ctx().read_csv( + path, + schema, + has_header, + delimiter, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, ) @@ -198,8 +191,6 @@ def read_avro( """ if file_partition_cols is None: file_partition_cols = [] - return DataFrame( - SessionContextInternal._global_ctx().read_avro( - str(path), schema, file_partition_cols, file_extension - ) + return SessionContext.global_ctx().read_avro( + str(path), schema, file_partition_cols, file_extension ) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 7a0a7aa08..4a15ac9cf 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -632,3 +632,21 @@ def test_sql_with_options_no_statements(ctx): options = SQLOptions().with_allow_statements(allow=False) with pytest.raises(Exception, match="SetVariable"): ctx.sql_with_options(sql, options=options) + + +@pytest.fixture +def batch(): + return pa.RecordBatch.from_arrays( + [pa.array([4, 5, 6])], + names=["a"], + ) + + +def test_create_dataframe_with_global_ctx(batch): + ctx = SessionContext.global_ctx() + + df = ctx.create_dataframe([[batch]]) + + result = df.collect()[0].column(0) + + assert result == pa.array([4, 5, 6]) diff --git a/src/context.rs b/src/context.rs index 9ba87eb8a..0db0f4d7e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -308,7 +308,7 @@ impl PySessionContext { #[classmethod] #[pyo3(signature = ())] - fn _global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { + fn global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { Ok(Self { ctx: get_global_ctx().clone(), }) From 55141bad7c2270c14742e962d8bab1d4f1be27f5 Mon Sep 17 00:00:00 2001 From: Spaarsh <67336892+Spaarsh@users.noreply.github.com> Date: Fri, 14 Mar 2025 18:26:31 +0530 Subject: [PATCH 058/248] Renaming Internal Structs (#1059) * Renamed Expr to RawExpr * Fixed CI test for exported classes to include RawExpr as well * Fixed CI test for exported classes to check if Expr class covers RawExpr * Generalized Raw* class checking * fixes * fixes * fixed the CI test to not look for Raw classes in the datafusion module * Add additional text to unit test describing operation and ensure wrapped Raw classes are checked * New ruff rule on main * Resolve ruff errors --------- Co-authored-by: Tim Saucer --- python/datafusion/expr.py | 8 ++-- python/tests/test_wrapper_coverage.py | 55 +++++++++++++++++++-------- src/expr.rs | 2 +- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 702f75aed..77b6c272d 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -193,7 +193,7 @@ class Expr: :ref:`Expressions` in the online documentation for more information. """ - def __init__(self, expr: expr_internal.Expr) -> None: + def __init__(self, expr: expr_internal.RawExpr) -> None: """This constructor should not be called by the end user.""" self.expr = expr @@ -383,7 +383,7 @@ def literal(value: Any) -> Expr: value = pa.scalar(value, type=pa.string_view()) if not isinstance(value, pa.Scalar): value = pa.scalar(value) - return Expr(expr_internal.Expr.literal(value)) + return Expr(expr_internal.RawExpr.literal(value)) @staticmethod def string_literal(value: str) -> Expr: @@ -398,13 +398,13 @@ def string_literal(value: str) -> Expr: """ if isinstance(value, str): value = pa.scalar(value, type=pa.string()) - return Expr(expr_internal.Expr.literal(value)) + return Expr(expr_internal.RawExpr.literal(value)) return Expr.literal(value) @staticmethod def column(value: str) -> Expr: """Creates a new expression representing a column.""" - return Expr(expr_internal.Expr.column(value)) + return Expr(expr_internal.RawExpr.column(value)) def alias(self, name: str) -> Expr: """Assign a name to the expression.""" diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index d7f6f6e35..a2de2d32b 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -28,37 +28,62 @@ from enum import EnumMeta as EnumType -def missing_exports(internal_obj, wrapped_obj) -> None: - # Special case enums - just make sure they exist since dir() - # and other functions get overridden. +def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 + """ + Identify if any of the rust exposted structs or functions do not have wrappers. + + Special handling for: + - Raw* classes: Internal implementation details that shouldn't be exposed + - _global_ctx: Internal implementation detail + - __self__, __class__: Python special attributes + """ + # Special case enums - EnumType overrides a some of the internal functions, + # so check all of the values exist and move on if isinstance(wrapped_obj, EnumType): + expected_values = [v for v in dir(internal_obj) if not v.startswith("__")] + for value in expected_values: + assert value in dir(wrapped_obj) return - for attr in dir(internal_obj): - if attr in ["_global_ctx"]: - continue - assert attr in dir(wrapped_obj) + for internal_attr_name in dir(internal_obj): + wrapped_attr_name = internal_attr_name.removeprefix("Raw") + assert wrapped_attr_name in dir(wrapped_obj) - internal_attr = getattr(internal_obj, attr) - wrapped_attr = getattr(wrapped_obj, attr) + internal_attr = getattr(internal_obj, internal_attr_name) + wrapped_attr = getattr(wrapped_obj, wrapped_attr_name) - if internal_attr is not None and wrapped_attr is None: - pytest.fail(f"Missing attribute: {attr}") + # There are some auto generated attributes that can be None, such as + # __kwdefaults__ and __doc__. As long as these are None on the internal + # object, it's okay to skip them. However if they do exist on the internal + # object they must also exist on the wrapped object. + if internal_attr is not None: + if wrapped_attr is None: + pytest.fail(f"Missing attribute: {internal_attr_name}") - if attr in ["__self__", "__class__"]: + if internal_attr_name in ["__self__", "__class__"]: continue + if isinstance(internal_attr, list): assert isinstance(wrapped_attr, list) + + # We have cases like __all__ that are a list and we want to be certain that + # every value in the list in the internal object is also in the wrapper list for val in internal_attr: - assert val in wrapped_attr + if isinstance(val, str) and val.startswith("Raw"): + assert val[3:] in wrapped_attr + else: + assert val in wrapped_attr elif hasattr(internal_attr, "__dict__"): + # Check all submodules recursively missing_exports(internal_attr, wrapped_attr) def test_datafusion_missing_exports() -> None: """Check for any missing python exports. - This test verifies that every exposed class, attribute, and function in - the internal (pyo3) module is also exposed in our python wrappers. + This test verifies that every exposed class, attribute, + and function in the internal (pyo3) module - datafusion._internal + is also exposed in our python wrappers - datafusion - + i.e., the ones exposed to the public. """ missing_exports(datafusion._internal, datafusion) diff --git a/src/expr.rs b/src/expr.rs index e750be6a4..d3c528eb4 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -101,7 +101,7 @@ pub mod window; use sort_expr::{to_sort_expressions, PySortExpr}; /// A PyExpr that can be used on a DataFrame -#[pyclass(name = "Expr", module = "datafusion.expr", subclass)] +#[pyclass(name = "RawExpr", module = "datafusion.expr", subclass)] #[derive(Debug, Clone)] pub struct PyExpr { pub expr: Expr, From 4f457030f171a26d0c4cce4d55cf541519956fcc Mon Sep 17 00:00:00 2001 From: jsai28 <54253219+jsai28@users.noreply.github.com> Date: Sat, 15 Mar 2025 04:57:38 -0600 Subject: [PATCH 059/248] added pytest asyncio tests (#1063) --- pyproject.toml | 1 + python/tests/test_dataframe.py | 54 ++++++++++++++++++++++++++++++++++ uv.lock | 17 ++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 060e3b80a..a4ed18c4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,6 +150,7 @@ dev = [ "maturin>=1.8.1", "numpy>1.25.0", "pytest>=7.4.4", + "pytest-asyncio>=0.23.3", "ruff>=0.9.1", "toml>=0.10.2", "pygithub==2.5.0", diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index d084f12dd..384b17878 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -771,6 +771,16 @@ def test_execution_plan(aggregate_df): assert rows_returned == 5 +@pytest.mark.asyncio +async def test_async_iteration_of_df(aggregate_df): + rows_returned = 0 + async for batch in aggregate_df.execute_stream(): + assert batch is not None + rows_returned += len(batch.to_pyarrow()[0]) + + assert rows_returned == 5 + + def test_repartition(df): df.repartition(2) @@ -958,6 +968,18 @@ def test_execute_stream(df): assert not list(stream) # after one iteration the generator must be exhausted +@pytest.mark.asyncio +async def test_execute_stream_async(df): + stream = df.execute_stream() + batches = [batch async for batch in stream] + + assert all(batch is not None for batch in batches) + + # After consuming all batches, the stream should be exhausted + remaining_batches = [batch async for batch in stream] + assert not remaining_batches + + @pytest.mark.parametrize("schema", [True, False]) def test_execute_stream_to_arrow_table(df, schema): stream = df.execute_stream() @@ -974,6 +996,25 @@ def test_execute_stream_to_arrow_table(df, schema): assert set(pyarrow_table.column_names) == {"a", "b", "c"} +@pytest.mark.asyncio +@pytest.mark.parametrize("schema", [True, False]) +async def test_execute_stream_to_arrow_table_async(df, schema): + stream = df.execute_stream() + + if schema: + pyarrow_table = pa.Table.from_batches( + [batch.to_pyarrow() async for batch in stream], schema=df.schema() + ) + else: + pyarrow_table = pa.Table.from_batches( + [batch.to_pyarrow() async for batch in stream] + ) + + assert isinstance(pyarrow_table, pa.Table) + assert pyarrow_table.shape == (3, 3) + assert set(pyarrow_table.column_names) == {"a", "b", "c"} + + def test_execute_stream_partitioned(df): streams = df.execute_stream_partitioned() assert all(batch is not None for stream in streams for batch in stream) @@ -982,6 +1023,19 @@ def test_execute_stream_partitioned(df): ) # after one iteration all generators must be exhausted +@pytest.mark.asyncio +async def test_execute_stream_partitioned_async(df): + streams = df.execute_stream_partitioned() + + for stream in streams: + batches = [batch async for batch in stream] + assert all(batch is not None for batch in batches) + + # Ensure the stream is exhausted after iteration + remaining_batches = [batch async for batch in stream] + assert not remaining_batches + + def test_empty_to_arrow_table(df): # Convert empty datafusion dataframe to pyarrow Table pyarrow_table = df.limit(0).to_arrow_table() diff --git a/uv.lock b/uv.lock index 619b92856..7e4bc4c6b 100644 --- a/uv.lock +++ b/uv.lock @@ -284,9 +284,11 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "maturin" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "pygithub" }, { name = "pytest" }, + { name = "pytest-asyncio" }, { name = "ruff" }, { name = "toml" }, ] @@ -314,9 +316,10 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "maturin", specifier = ">=1.8.1" }, - { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" }, + { name = "numpy", specifier = ">1.25.0" }, { name = "pygithub", specifier = "==2.5.0" }, { name = "pytest", specifier = ">=7.4.4" }, + { name = "pytest-asyncio", specifier = ">=0.23.3" }, { name = "ruff", specifier = ">=0.9.1" }, { name = "toml", specifier = ">=0.10.2" }, ] @@ -1079,6 +1082,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, ] +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From 2f52688d76e84794343c17ffaf3002534ecfd716 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 15 Mar 2025 19:00:50 +0800 Subject: [PATCH 060/248] Add decorator for udwf (#1061) * feat: Introduce create_udwf method for User-Defined Window Functions - Added `create_udwf` static method to `WindowUDF` class, allowing users to create User-Defined Window Functions (UDWF) as both a function and a decorator. - Updated type hinting for `_R` using `TypeAlias` for better clarity. - Enhanced documentation with usage examples for both function and decorator styles, improving usability and understanding. * refactor: Simplify UDWF test suite and introduce SimpleWindowCount evaluator - Removed multiple exponential smoothing classes to streamline the code. - Introduced SimpleWindowCount class for basic row counting functionality. - Updated test cases to validate the new SimpleWindowCount evaluator. - Refactored fixture and test functions for clarity and consistency. - Enhanced error handling in UDWF creation tests. * fix: Update type alias import to use typing_extensions for compatibility * Add udwf tests for multiple input types and decorator syntax * replace old def udwf * refactor: Simplify df fixture by passing ctx as an argument * refactor: Rename DataFrame fixtures and update test functions - Renamed `df` fixture to `complex_window_df` for clarity. - Renamed `simple_df` fixture to `count_window_df` to better reflect its purpose. - Updated test functions to use the new fixture names, enhancing readability and maintainability. * refactor: Update udwf calls in WindowUDF to use BiasedNumbers directly - Changed udwf1 to use BiasedNumbers instead of bias_10. - Added udwf2 to call udwf with bias_10. - Introduced udwf3 to demonstrate a lambda function returning BiasedNumbers(20). * feat: Add overloads for udwf function to support multiple input types and decorator syntax * refactor: Simplify udwf method signature by removing redundant type hints * refactor: Remove state_type from udwf method signature and update return type handling - Eliminated the state_type parameter from the udwf method to simplify the function signature. - Updated return type handling in the _function and _decorator methods to use a generic type _R for better type flexibility. - Enhanced the decorator to wrap the original function, allowing for improved argument handling and expression return. * refactor: Update volatility parameter type in udwf method signature to support Volatility enum * Fix ruff errors * fix C901 for def udwf * refactor: Update udwf method signature and simplify input handling - Changed the type hint for the return type in the _create_window_udf_decorator method to use pa.DataType directly instead of a TypeVar. - Simplified the handling of input types by removing redundant checks and directly using the input types list. - Removed unnecessary comments and cleaned up the code for better readability. - Updated the test for udwf to use parameterized tests for better coverage and maintainability. * refactor: Rename input_type to input_types in udwf method signature for clarity * refactor: Enhance typing in udf.py by introducing Protocol for WindowEvaluator and improving import organization * Revert "refactor: Enhance typing in udf.py by introducing Protocol for WindowEvaluator and improving import organization" This reverts commit 16dbe5f3fd88f42d0a304384b162009bd9e49a35. --- python/datafusion/udf.py | 123 +++++++++++++++++++++------ python/tests/test_udwf.py | 170 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 264 insertions(+), 29 deletions(-) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 603b7063d..e93a34ca5 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -621,6 +621,16 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udwf.__call__(*args_raw)) + @overload + @staticmethod + def udwf( + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[..., WindowUDF]: ... + + @overload @staticmethod def udwf( func: Callable[[], WindowEvaluator], @@ -628,24 +638,31 @@ def udwf( return_type: pa.DataType, volatility: Volatility | str, name: Optional[str] = None, - ) -> WindowUDF: - """Create a new User-Defined Window Function. + ) -> WindowUDF: ... - If your :py:class:`WindowEvaluator` can be instantiated with no arguments, you - can simply pass it's type as ``func``. If you need to pass additional arguments - to it's constructor, you can define a lambda or a factory method. During runtime - the :py:class:`WindowEvaluator` will be constructed for every instance in - which this UDWF is used. The following examples are all valid. + @staticmethod + def udwf(*args: Any, **kwargs: Any): # noqa: D417 + """Create a new User-Defined Window Function (UDWF). - .. code-block:: python + This class can be used both as a **function** and as a **decorator**. + + Usage: + - **As a function**: Call `udwf(func, input_types, return_type, volatility, + name)`. + - **As a decorator**: Use `@udwf(input_types, return_type, volatility, + name)`. When using `udwf` as a decorator, **do not pass `func` + explicitly**. + **Function example:** + ``` import pyarrow as pa class BiasedNumbers(WindowEvaluator): def __init__(self, start: int = 0) -> None: self.start = start - def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: + def evaluate_all(self, values: list[pa.Array], + num_rows: int) -> pa.Array: return pa.array([self.start + i for i in range(num_rows)]) def bias_10() -> BiasedNumbers: @@ -655,35 +672,93 @@ def bias_10() -> BiasedNumbers: udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable") udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable") + ``` + + **Decorator example:** + ``` + @udwf(pa.int64(), pa.int64(), "immutable") + def biased_numbers() -> BiasedNumbers: + return BiasedNumbers(10) + ``` + Args: - func: A callable to create the window function. - input_types: The data types of the arguments to ``func``. + func: **Only needed when calling as a function. Skip this argument when + using `udwf` as a decorator.** + input_types: The data types of the arguments. return_type: The data type of the return value. volatility: See :py:class:`Volatility` for allowed values. - arguments: A list of arguments to pass in to the __init__ method for accum. name: A descriptive name for the function. Returns: - A user-defined window function. - """ # noqa: W505, E501 + A user-defined window function that can be used in window function calls. + """ + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return WindowUDF._create_window_udf(*args, **kwargs) + # Case 2: Used as a decorator with parameters + return WindowUDF._create_window_udf_decorator(*args, **kwargs) + + @staticmethod + def _create_window_udf( + func: Callable[[], WindowEvaluator], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> WindowUDF: + """Create a WindowUDF instance from function arguments.""" if not callable(func): msg = "`func` must be callable." raise TypeError(msg) if not isinstance(func(), WindowEvaluator): msg = "`func` must implement the abstract base class WindowEvaluator" raise TypeError(msg) - if name is None: - name = func().__class__.__qualname__.lower() - if isinstance(input_types, pa.DataType): - input_types = [input_types] - return WindowUDF( - name=name, - func=func, - input_types=input_types, - return_type=return_type, - volatility=volatility, + + name = name or func.__qualname__.lower() + input_types = ( + [input_types] if isinstance(input_types, pa.DataType) else input_types ) + return WindowUDF(name, func, input_types, return_type, volatility) + + @staticmethod + def _get_default_name(func: Callable) -> str: + """Get the default name for a function based on its attributes.""" + if hasattr(func, "__qualname__"): + return func.__qualname__.lower() + return func.__class__.__name__.lower() + + @staticmethod + def _normalize_input_types( + input_types: pa.DataType | list[pa.DataType], + ) -> list[pa.DataType]: + """Convert a single DataType to a list if needed.""" + if isinstance(input_types, pa.DataType): + return [input_types] + return input_types + + @staticmethod + def _create_window_udf_decorator( + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]: + """Create a decorator for a WindowUDF.""" + + def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]: + udwf_caller = WindowUDF._create_window_udf( + func, input_types, return_type, volatility, name + ) + + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Expr: + return udwf_caller(*args, **kwargs) + + return wrapper + + return decorator + # Convenience exports so we can import instead of treating as # variables at the package root diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 3d6dcf9d8..4190e7d64 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -162,14 +162,27 @@ def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: return pa.array(results) +class SimpleWindowCount(WindowEvaluator): + """A simple window evaluator that counts rows.""" + + def __init__(self, base: int = 0) -> None: + self.base = base + + def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: + return pa.array([self.base + i for i in range(num_rows)]) + + class NotSubclassOfWindowEvaluator: pass @pytest.fixture -def df(): - ctx = SessionContext() +def ctx(): + return SessionContext() + +@pytest.fixture +def complex_window_df(ctx): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [ @@ -182,7 +195,17 @@ def df(): return ctx.create_dataframe([[batch]]) -def test_udwf_errors(df): +@pytest.fixture +def count_window_df(ctx): + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]], name="test_table") + + +def test_udwf_errors(complex_window_df): with pytest.raises(TypeError): udwf( NotSubclassOfWindowEvaluator, @@ -192,6 +215,103 @@ def test_udwf_errors(df): ) +def test_udwf_errors_with_message(): + """Test error cases for UDWF creation.""" + with pytest.raises( + TypeError, match="`func` must implement the abstract base class WindowEvaluator" + ): + udwf( + NotSubclassOfWindowEvaluator, pa.int64(), pa.int64(), volatility="immutable" + ) + + +def test_udwf_basic_usage(count_window_df): + """Test basic UDWF usage with a simple counting window function.""" + simple_count = udwf( + SimpleWindowCount, pa.int64(), pa.int64(), volatility="immutable" + ) + + df = count_window_df.select( + simple_count(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + +def test_udwf_with_args(count_window_df): + """Test UDWF with constructor arguments.""" + count_base10 = udwf( + lambda: SimpleWindowCount(10), pa.int64(), pa.int64(), volatility="immutable" + ) + + df = count_window_df.select( + count_base10(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([10, 11, 12]) + + +def test_udwf_decorator_basic(count_window_df): + """Test UDWF used as a decorator.""" + + @udwf([pa.int64()], pa.int64(), "immutable") + def window_count() -> WindowEvaluator: + return SimpleWindowCount() + + df = count_window_df.select( + window_count(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + +def test_udwf_decorator_with_args(count_window_df): + """Test UDWF decorator with constructor arguments.""" + + @udwf([pa.int64()], pa.int64(), "immutable") + def window_count_base10() -> WindowEvaluator: + return SimpleWindowCount(10) + + df = count_window_df.select( + window_count_base10(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([10, 11, 12]) + + +def test_register_udwf(ctx, count_window_df): + """Test registering and using UDWF in SQL context.""" + window_count = udwf( + SimpleWindowCount, + [pa.int64()], + pa.int64(), + volatility="immutable", + name="window_count", + ) + + ctx.register_udwf(window_count) + result = ctx.sql( + """ + SELECT window_count(a) + OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING) FROM test_table + """ + ).collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + smooth_default = udwf( ExponentialSmoothDefault, pa.float64(), @@ -299,10 +419,50 @@ def test_udwf_errors(df): @pytest.mark.parametrize(("name", "expr", "expected"), data_test_udwf_functions) -def test_udwf_functions(df, name, expr, expected): - df = df.select("a", "b", f.round(expr, lit(3)).alias(name)) +def test_udwf_functions(complex_window_df, name, expr, expected): + df = complex_window_df.select("a", "b", f.round(expr, lit(3)).alias(name)) # execute and collect the first (and only) batch result = df.sort(column("a")).select(column(name)).collect()[0] assert result.column(0) == pa.array(expected) + + +@pytest.mark.parametrize( + "udwf_func", + [ + udwf(SimpleWindowCount, pa.int64(), pa.int64(), "immutable"), + udwf(SimpleWindowCount, [pa.int64()], pa.int64(), "immutable"), + udwf([pa.int64()], pa.int64(), "immutable")(lambda: SimpleWindowCount()), + udwf(pa.int64(), pa.int64(), "immutable")(lambda: SimpleWindowCount()), + ], +) +def test_udwf_overloads(udwf_func, count_window_df): + df = count_window_df.select( + udwf_func(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + +def test_udwf_named_function(ctx, count_window_df): + """Test UDWF with explicit name parameter.""" + window_count = udwf( + SimpleWindowCount, + pa.int64(), + pa.int64(), + volatility="immutable", + name="my_custom_counter", + ) + + ctx.register_udwf(window_count) + result = ctx.sql( + """ + SELECT my_custom_counter(a) + OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING) FROM test_table""" + ).collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) From 7c1c08f8617ac97a2568eb0664e9d4ee30fceba9 Mon Sep 17 00:00:00 2001 From: Nirnay Roy <32942494+nirnayroy@users.noreply.github.com> Date: Sat, 15 Mar 2025 17:05:05 +0530 Subject: [PATCH 061/248] feat: expose regex_count function (#1066) * Added wrapper for regex_count function * fix comment --------- Co-authored-by: Nirnay Roy --- python/datafusion/functions.py | 18 ++++++++++++++++++ python/tests/test_functions.py | 4 ++++ src/functions.rs | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0cc7434cf..26bac149c 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -217,6 +217,7 @@ "random", "range", "rank", + "regexp_count", "regexp_like", "regexp_match", "regexp_replace", @@ -779,6 +780,23 @@ def regexp_replace( return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags)) +def regexp_count( + string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None +) -> Expr: + """Returns the number of matches in a string. + + Optional start position (the first position is 1) to search for the regular + expression. + """ + if flags is not None: + flags = flags.expr + if start is not None: + start = start.expr + else: + start = Expr.expr + return Expr(f.regexp_count(string.expr, pattern.expr, start, flags)) + + def repeat(string: Expr, n: Expr) -> Expr: """Repeats the ``string`` to ``n`` times.""" return Expr(f.repeat(string.expr, n.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index ed88a16e3..161e1e3bb 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr): f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), pa.array(["H-o", "W-d", "!"]), ), + ( + f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)), + pa.array([1, 1, 0], type=pa.int64()), + ), ], ) def test_string_functions(df, function, expected_result): diff --git a/src/functions.rs b/src/functions.rs index 6a8abb18d..8fac239b4 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -173,6 +173,25 @@ fn regexp_replace( ) .into()) } + +#[pyfunction] +#[pyo3(signature = (string, pattern, start, flags=None))] +/// Returns the number of matches found in the string. +fn regexp_count( + string: PyExpr, + pattern: PyExpr, + start: Option, + flags: Option, +) -> PyResult { + Ok(functions::expr_fn::regexp_count( + string.expr, + pattern.expr, + start.map(|x| x.expr), + flags.map(|x| x.expr), + ) + .into()) +} + /// Creates a new Sort Expr #[pyfunction] fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { @@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(power))?; m.add_wrapped(wrap_pyfunction!(radians))?; m.add_wrapped(wrap_pyfunction!(random))?; + m.add_wrapped(wrap_pyfunction!(regexp_count))?; m.add_wrapped(wrap_pyfunction!(regexp_like))?; m.add_wrapped(wrap_pyfunction!(regexp_match))?; m.add_wrapped(wrap_pyfunction!(regexp_replace))?; From b8dd97bc8eefcfecfa8dcc864c4898c654b236a9 Mon Sep 17 00:00:00 2001 From: Spaarsh <67336892+Spaarsh@users.noreply.github.com> Date: Mon, 17 Mar 2025 20:08:16 +0530 Subject: [PATCH 062/248] Add additional ruff suggestions (#1062) * Enabled ruff rule PT001 and ANN204 * Enabled ruff rule B008 * Enabled ruff rule EM101 * Enabled ruff rule PLR1714 * Enabled ruff rule ANN201 * Enabled ruff rule C400 * Enabled ruff rule B904 * Enabled ruff rule UP006 * Enabled ruff rule RUF012 * Enabled ruff rule FBT003 * Enabled ruff rule C416 * Enabled ruff rule SIM102 * Enabled ruff rule PGH003 * Enabled ruff rule PERF401 * Enabled ruff rule EM102 * Enabled ruff rule SIM108 * Enabled ruff rule ICN001 * Enabled ruff rule ICN001 * implemented reviews * Update pyproject.toml to ignore `SIM102` * Enabled ruff rule PLW2901 * Enabled ruff rule RET503 * Fixed failing ruff tests --- benchmarks/db-benchmark/groupby-datafusion.py | 24 ++-- benchmarks/db-benchmark/join-datafusion.py | 5 +- benchmarks/tpch/tpch.py | 7 +- dev/release/generate-changelog.py | 6 +- docs/source/conf.py | 4 +- examples/create-context.py | 12 +- examples/python-udaf.py | 36 +++-- examples/python-udf-comparisons.py | 9 +- examples/python-udf.py | 12 +- examples/query-pyarrow-data.py | 10 +- examples/sql-using-python-udaf.py | 2 +- examples/tpch/_tests.py | 4 +- examples/tpch/convert_data_to_parquet.py | 134 +++++++++--------- examples/tpch/q08_market_share.py | 2 +- examples/tpch/q19_discounted_revenue.py | 4 +- .../tpch/q21_suppliers_kept_orders_waiting.py | 2 +- pyproject.toml | 20 --- python/datafusion/__init__.py | 8 +- python/datafusion/catalog.py | 4 +- python/datafusion/context.py | 51 +++---- python/datafusion/dataframe.py | 55 +++---- python/datafusion/expr.py | 31 ++-- python/datafusion/functions.py | 9 +- python/tests/test_functions.py | 2 +- python/tests/test_wrapper_coverage.py | 7 +- 25 files changed, 213 insertions(+), 247 deletions(-) diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 04bf7a149..f9e8d638b 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -20,7 +20,7 @@ import timeit import datafusion as df -import pyarrow +import pyarrow as pa from datafusion import ( RuntimeEnvBuilder, SessionConfig, @@ -37,7 +37,7 @@ exec(open("./_helpers/helpers.py").read()) -def ans_shape(batches): +def ans_shape(batches) -> tuple[int, int]: rows, cols = 0, 0 for batch in batches: rows += batch.num_rows @@ -48,7 +48,7 @@ def ans_shape(batches): return rows, cols -def execute(df): +def execute(df) -> list: print(df.execution_plan().display_indent()) return df.collect() @@ -68,14 +68,14 @@ def execute(df): src_grp = os.path.join("data", data_name + ".csv") print("loading dataset %s" % src_grp, flush=True) -schema = pyarrow.schema( +schema = pa.schema( [ - ("id4", pyarrow.int32()), - ("id5", pyarrow.int32()), - ("id6", pyarrow.int32()), - ("v1", pyarrow.int32()), - ("v2", pyarrow.int32()), - ("v3", pyarrow.float64()), + ("id4", pa.int32()), + ("id5", pa.int32()), + ("id6", pa.int32()), + ("v1", pa.int32()), + ("v2", pa.int32()), + ("v3", pa.float64()), ] ) @@ -93,8 +93,8 @@ def execute(df): ) config = ( SessionConfig() - .with_repartition_joins(False) - .with_repartition_aggregations(False) + .with_repartition_joins(enabled=False) + .with_repartition_aggregations(enabled=False) .set("datafusion.execution.coalesce_batches", "false") ) ctx = SessionContext(config, runtime) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index b45ebf632..039868031 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -29,7 +29,7 @@ exec(open("./_helpers/helpers.py").read()) -def ans_shape(batches): +def ans_shape(batches) -> tuple[int, int]: rows, cols = 0, 0 for batch in batches: rows += batch.num_rows @@ -57,7 +57,8 @@ def ans_shape(batches): os.path.join("data", y_data_name[2] + ".csv"), ] if len(src_jn_y) != 3: - raise Exception("Something went wrong in preparing files used for join") + error_msg = "Something went wrong in preparing files used for join" + raise Exception(error_msg) print( "loading datasets " diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index bfb9ac398..2d1bbae5b 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -21,7 +21,7 @@ from datafusion import SessionContext -def bench(data_path, query_path): +def bench(data_path, query_path) -> None: with open("results.csv", "w") as results: # register tables start = time.time() @@ -68,10 +68,7 @@ def bench(data_path, query_path): with open(f"{query_path}/q{query}.sql") as f: text = f.read() tmp = text.split(";") - queries = [] - for str in tmp: - if len(str.strip()) > 0: - queries.append(str.strip()) + queries = [s.strip() for s in tmp if len(s.strip()) > 0] try: start = time.time() diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index e30e2def2..d86736773 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -24,7 +24,7 @@ from github import Github -def print_pulls(repo_name, title, pulls): +def print_pulls(repo_name, title, pulls) -> None: if len(pulls) > 0: print(f"**{title}:**") print() @@ -34,7 +34,7 @@ def print_pulls(repo_name, title, pulls): print() -def generate_changelog(repo, repo_name, tag1, tag2, version): +def generate_changelog(repo, repo_name, tag1, tag2, version) -> None: # get a list of commits between two tags print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) comparison = repo.compare(tag1, tag2) @@ -154,7 +154,7 @@ def generate_changelog(repo, repo_name, tag1, tag2, version): ) -def cli(args=None): +def cli(args=None) -> None: """Process command line arguments.""" if not args: args = sys.argv[1:] diff --git a/docs/source/conf.py b/docs/source/conf.py index c82a189e0..0be03d81d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,7 +73,7 @@ autoapi_python_class_content = "both" -def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001 +def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool: # noqa: ARG001 skip_contents = [ # Re-exports ("class", "datafusion.DataFrame"), @@ -93,7 +93,7 @@ def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001 return skip -def setup(sphinx): +def setup(sphinx) -> None: sphinx.connect("autoapi-skip-member", autoapi_skip_member_fn) diff --git a/examples/create-context.py b/examples/create-context.py index 760c8513e..0026d6162 100644 --- a/examples/create-context.py +++ b/examples/create-context.py @@ -25,14 +25,14 @@ runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() - .with_create_default_catalog_and_schema(True) + .with_create_default_catalog_and_schema(enabled=True) .with_default_catalog_and_schema("foo", "bar") .with_target_partitions(8) - .with_information_schema(True) - .with_repartition_joins(False) - .with_repartition_aggregations(False) - .with_repartition_windows(False) - .with_parquet_pruning(False) + .with_information_schema(enabled=True) + .with_repartition_joins(enabled=False) + .with_repartition_aggregations(enabled=False) + .with_repartition_windows(enabled=False) + .with_parquet_pruning(enabled=False) .set("datafusion.execution.parquet.pushdown_filters", "true") ) ctx = SessionContext(config, runtime) diff --git a/examples/python-udaf.py b/examples/python-udaf.py index 538f69571..6655edb0a 100644 --- a/examples/python-udaf.py +++ b/examples/python-udaf.py @@ -16,7 +16,7 @@ # under the License. import datafusion -import pyarrow +import pyarrow as pa import pyarrow.compute from datafusion import Accumulator, col, udaf @@ -26,25 +26,21 @@ class MyAccumulator(Accumulator): Interface of a user-defined accumulation. """ - def __init__(self): - self._sum = pyarrow.scalar(0.0) + def __init__(self) -> None: + self._sum = pa.scalar(0.0) - def update(self, values: pyarrow.Array) -> None: + def update(self, values: pa.Array) -> None: # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar( - self._sum.as_py() + pyarrow.compute.sum(values).as_py() - ) + self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(values).as_py()) - def merge(self, states: pyarrow.Array) -> None: + def merge(self, states: pa.Array) -> None: # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar( - self._sum.as_py() + pyarrow.compute.sum(states).as_py() - ) + self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(states).as_py()) - def state(self) -> pyarrow.Array: - return pyarrow.array([self._sum.as_py()]) + def state(self) -> pa.Array: + return pa.array([self._sum.as_py()]) - def evaluate(self) -> pyarrow.Scalar: + def evaluate(self) -> pa.Scalar: return self._sum @@ -52,17 +48,17 @@ def evaluate(self) -> pyarrow.Scalar: ctx = datafusion.SessionContext() # create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], +batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]]) my_udaf = udaf( MyAccumulator, - pyarrow.float64(), - pyarrow.float64(), - [pyarrow.float64()], + pa.float64(), + pa.float64(), + [pa.float64()], "stable", ) @@ -70,4 +66,4 @@ def evaluate(self) -> pyarrow.Scalar: result = df.collect()[0] -assert result.column(0) == pyarrow.array([6.0]) +assert result.column(0) == pa.array([6.0]) diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py index c5d5ec8dd..eb0825011 100644 --- a/examples/python-udf-comparisons.py +++ b/examples/python-udf-comparisons.py @@ -112,8 +112,8 @@ def is_of_interest_impl( returnflag_arr: pa.Array, ) -> pa.Array: result = [] - for idx, partkey in enumerate(partkey_arr): - partkey = partkey.as_py() + for idx, partkey_val in enumerate(partkey_arr): + partkey = partkey_val.as_py() suppkey = suppkey_arr[idx].as_py() returnflag = returnflag_arr[idx].as_py() value = (partkey, suppkey, returnflag) @@ -162,10 +162,7 @@ def udf_using_pyarrow_compute_impl( resultant_arr = pc.and_(filtered_partkey_arr, filtered_suppkey_arr) resultant_arr = pc.and_(resultant_arr, filtered_returnflag_arr) - if results is None: - results = resultant_arr - else: - results = pc.or_(results, resultant_arr) + results = resultant_arr if results is None else pc.or_(results, resultant_arr) return results diff --git a/examples/python-udf.py b/examples/python-udf.py index fb2bc253e..1c08acd1a 100644 --- a/examples/python-udf.py +++ b/examples/python-udf.py @@ -15,23 +15,23 @@ # specific language governing permissions and limitations # under the License. -import pyarrow +import pyarrow as pa from datafusion import SessionContext, udf from datafusion import functions as f -def is_null(array: pyarrow.Array) -> pyarrow.Array: +def is_null(array: pa.Array) -> pa.Array: return array.is_null() -is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), "stable") +is_null_arr = udf(is_null, [pa.int64()], pa.bool_(), "stable") # create a context ctx = SessionContext() # create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], +batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]]) @@ -40,4 +40,4 @@ def is_null(array: pyarrow.Array) -> pyarrow.Array: result = df.collect()[0] -assert result.column(0) == pyarrow.array([False] * 3) +assert result.column(0) == pa.array([False] * 3) diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py index e3456fb5b..9cfe8a62b 100644 --- a/examples/query-pyarrow-data.py +++ b/examples/query-pyarrow-data.py @@ -16,15 +16,15 @@ # under the License. import datafusion -import pyarrow +import pyarrow as pa from datafusion import col # create a context ctx = datafusion.SessionContext() # create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], +batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]]) @@ -38,5 +38,5 @@ # execute and collect the first (and only) batch result = df.collect()[0] -assert result.column(0) == pyarrow.array([5, 7, 9]) -assert result.column(1) == pyarrow.array([-3, -3, -3]) +assert result.column(0) == pa.array([5, 7, 9]) +assert result.column(1) == pa.array([-3, -3, -3]) diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py index 60ab8d134..32ce38900 100644 --- a/examples/sql-using-python-udaf.py +++ b/examples/sql-using-python-udaf.py @@ -25,7 +25,7 @@ class MyAccumulator(Accumulator): Interface of a user-defined accumulation. """ - def __init__(self): + def __init__(self) -> None: self._sum = pa.scalar(0.0) def update(self, values: pa.Array) -> None: diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 2be4dfabd..80ff80244 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -91,7 +91,7 @@ def check_q17(df): ("q22_global_sales_opportunity", "q22"), ], ) -def test_tpch_query_vs_answer_file(query_code: str, answer_file: str): +def test_tpch_query_vs_answer_file(query_code: str, answer_file: str) -> None: module = import_module(query_code) df: DataFrame = module.df @@ -122,3 +122,5 @@ def test_tpch_query_vs_answer_file(query_code: str, answer_file: str): assert df.join(df_expected, on=cols, how="anti").count() == 0 assert df.count() == df_expected.count() + + return None diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py index 73097fac5..fd0fcca49 100644 --- a/examples/tpch/convert_data_to_parquet.py +++ b/examples/tpch/convert_data_to_parquet.py @@ -25,112 +25,112 @@ import os import datafusion -import pyarrow +import pyarrow as pa ctx = datafusion.SessionContext() all_schemas = {} all_schemas["customer"] = [ - ("C_CUSTKEY", pyarrow.int64()), - ("C_NAME", pyarrow.string()), - ("C_ADDRESS", pyarrow.string()), - ("C_NATIONKEY", pyarrow.int64()), - ("C_PHONE", pyarrow.string()), - ("C_ACCTBAL", pyarrow.decimal128(15, 2)), - ("C_MKTSEGMENT", pyarrow.string()), - ("C_COMMENT", pyarrow.string()), + ("C_CUSTKEY", pa.int64()), + ("C_NAME", pa.string()), + ("C_ADDRESS", pa.string()), + ("C_NATIONKEY", pa.int64()), + ("C_PHONE", pa.string()), + ("C_ACCTBAL", pa.decimal128(15, 2)), + ("C_MKTSEGMENT", pa.string()), + ("C_COMMENT", pa.string()), ] all_schemas["lineitem"] = [ - ("L_ORDERKEY", pyarrow.int64()), - ("L_PARTKEY", pyarrow.int64()), - ("L_SUPPKEY", pyarrow.int64()), - ("L_LINENUMBER", pyarrow.int32()), - ("L_QUANTITY", pyarrow.decimal128(15, 2)), - ("L_EXTENDEDPRICE", pyarrow.decimal128(15, 2)), - ("L_DISCOUNT", pyarrow.decimal128(15, 2)), - ("L_TAX", pyarrow.decimal128(15, 2)), - ("L_RETURNFLAG", pyarrow.string()), - ("L_LINESTATUS", pyarrow.string()), - ("L_SHIPDATE", pyarrow.date32()), - ("L_COMMITDATE", pyarrow.date32()), - ("L_RECEIPTDATE", pyarrow.date32()), - ("L_SHIPINSTRUCT", pyarrow.string()), - ("L_SHIPMODE", pyarrow.string()), - ("L_COMMENT", pyarrow.string()), + ("L_ORDERKEY", pa.int64()), + ("L_PARTKEY", pa.int64()), + ("L_SUPPKEY", pa.int64()), + ("L_LINENUMBER", pa.int32()), + ("L_QUANTITY", pa.decimal128(15, 2)), + ("L_EXTENDEDPRICE", pa.decimal128(15, 2)), + ("L_DISCOUNT", pa.decimal128(15, 2)), + ("L_TAX", pa.decimal128(15, 2)), + ("L_RETURNFLAG", pa.string()), + ("L_LINESTATUS", pa.string()), + ("L_SHIPDATE", pa.date32()), + ("L_COMMITDATE", pa.date32()), + ("L_RECEIPTDATE", pa.date32()), + ("L_SHIPINSTRUCT", pa.string()), + ("L_SHIPMODE", pa.string()), + ("L_COMMENT", pa.string()), ] all_schemas["nation"] = [ - ("N_NATIONKEY", pyarrow.int64()), - ("N_NAME", pyarrow.string()), - ("N_REGIONKEY", pyarrow.int64()), - ("N_COMMENT", pyarrow.string()), + ("N_NATIONKEY", pa.int64()), + ("N_NAME", pa.string()), + ("N_REGIONKEY", pa.int64()), + ("N_COMMENT", pa.string()), ] all_schemas["orders"] = [ - ("O_ORDERKEY", pyarrow.int64()), - ("O_CUSTKEY", pyarrow.int64()), - ("O_ORDERSTATUS", pyarrow.string()), - ("O_TOTALPRICE", pyarrow.decimal128(15, 2)), - ("O_ORDERDATE", pyarrow.date32()), - ("O_ORDERPRIORITY", pyarrow.string()), - ("O_CLERK", pyarrow.string()), - ("O_SHIPPRIORITY", pyarrow.int32()), - ("O_COMMENT", pyarrow.string()), + ("O_ORDERKEY", pa.int64()), + ("O_CUSTKEY", pa.int64()), + ("O_ORDERSTATUS", pa.string()), + ("O_TOTALPRICE", pa.decimal128(15, 2)), + ("O_ORDERDATE", pa.date32()), + ("O_ORDERPRIORITY", pa.string()), + ("O_CLERK", pa.string()), + ("O_SHIPPRIORITY", pa.int32()), + ("O_COMMENT", pa.string()), ] all_schemas["part"] = [ - ("P_PARTKEY", pyarrow.int64()), - ("P_NAME", pyarrow.string()), - ("P_MFGR", pyarrow.string()), - ("P_BRAND", pyarrow.string()), - ("P_TYPE", pyarrow.string()), - ("P_SIZE", pyarrow.int32()), - ("P_CONTAINER", pyarrow.string()), - ("P_RETAILPRICE", pyarrow.decimal128(15, 2)), - ("P_COMMENT", pyarrow.string()), + ("P_PARTKEY", pa.int64()), + ("P_NAME", pa.string()), + ("P_MFGR", pa.string()), + ("P_BRAND", pa.string()), + ("P_TYPE", pa.string()), + ("P_SIZE", pa.int32()), + ("P_CONTAINER", pa.string()), + ("P_RETAILPRICE", pa.decimal128(15, 2)), + ("P_COMMENT", pa.string()), ] all_schemas["partsupp"] = [ - ("PS_PARTKEY", pyarrow.int64()), - ("PS_SUPPKEY", pyarrow.int64()), - ("PS_AVAILQTY", pyarrow.int32()), - ("PS_SUPPLYCOST", pyarrow.decimal128(15, 2)), - ("PS_COMMENT", pyarrow.string()), + ("PS_PARTKEY", pa.int64()), + ("PS_SUPPKEY", pa.int64()), + ("PS_AVAILQTY", pa.int32()), + ("PS_SUPPLYCOST", pa.decimal128(15, 2)), + ("PS_COMMENT", pa.string()), ] all_schemas["region"] = [ - ("r_REGIONKEY", pyarrow.int64()), - ("r_NAME", pyarrow.string()), - ("r_COMMENT", pyarrow.string()), + ("r_REGIONKEY", pa.int64()), + ("r_NAME", pa.string()), + ("r_COMMENT", pa.string()), ] all_schemas["supplier"] = [ - ("S_SUPPKEY", pyarrow.int64()), - ("S_NAME", pyarrow.string()), - ("S_ADDRESS", pyarrow.string()), - ("S_NATIONKEY", pyarrow.int32()), - ("S_PHONE", pyarrow.string()), - ("S_ACCTBAL", pyarrow.decimal128(15, 2)), - ("S_COMMENT", pyarrow.string()), + ("S_SUPPKEY", pa.int64()), + ("S_NAME", pa.string()), + ("S_ADDRESS", pa.string()), + ("S_NATIONKEY", pa.int32()), + ("S_PHONE", pa.string()), + ("S_ACCTBAL", pa.decimal128(15, 2)), + ("S_COMMENT", pa.string()), ] curr_dir = os.path.dirname(os.path.abspath(__file__)) -for filename, curr_schema in all_schemas.items(): +for filename, curr_schema_val in all_schemas.items(): # For convenience, go ahead and convert the schema column names to lowercase - curr_schema = [(s[0].lower(), s[1]) for s in curr_schema] + curr_schema = [(s[0].lower(), s[1]) for s in curr_schema_val] # Pre-collect the output columns so we can ignore the null field we add # in to handle the trailing | in the file output_cols = [r[0] for r in curr_schema] - curr_schema = [pyarrow.field(r[0], r[1], nullable=False) for r in curr_schema] + curr_schema = [pa.field(r[0], r[1], nullable=False) for r in curr_schema] # Trailing | requires extra field for in processing - curr_schema.append(("some_null", pyarrow.null())) + curr_schema.append(("some_null", pa.null())) - schema = pyarrow.schema(curr_schema) + schema = pa.schema(curr_schema) source_file = os.path.abspath( os.path.join(curr_dir, f"../../benchmarks/tpch/data/{filename}.csv") diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index d46df30f2..4bf50efba 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -150,7 +150,7 @@ df = df.with_column( "national_volume", F.case(col("s_suppkey").is_null()) - .when(lit(False), col("volume")) + .when(lit(value=False), col("volume")) .otherwise(lit(0.0)), ) diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index 2b87e1120..bd492aac0 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -89,8 +89,8 @@ def is_of_interest( same number of rows in the output. """ result = [] - for idx, brand in enumerate(brand_arr): - brand = brand.as_py() + for idx, brand_val in enumerate(brand_arr): + brand = brand_val.as_py() if brand in items_of_interest: values_of_interest = items_of_interest[brand] diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index 9bbaad779..619c4406b 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -65,7 +65,7 @@ df = df.with_column( "failed_supp", F.case(col("l_receiptdate") > col("l_commitdate")) - .when(lit(True), col("l_suppkey")) + .when(lit(value=True), col("l_suppkey")) .end(), ) diff --git a/pyproject.toml b/pyproject.toml index a4ed18c4c..d86b657ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,37 +80,17 @@ ignore = [ "TD003", # Allow TODO lines "UP007", # Disallowing Union is pedantic # TODO: Enable all of the following, but this PR is getting too large already - "PT001", - "ANN204", - "B008", - "EM101", "PLR0913", - "PLR1714", - "ANN201", - "C400", "TRY003", - "B904", - "UP006", - "RUF012", - "FBT003", - "C416", - "SIM102", - "PGH003", "PLR2004", - "PERF401", "PD901", - "EM102", "ERA001", - "SIM108", - "ICN001", "ANN001", "ANN202", "PTH", "N812", "INP001", "DTZ007", - "PLW2901", - "RET503", "RUF015", "A005", "TC001", diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 286e5dc31..d871fdb71 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -92,17 +92,17 @@ ] -def column(value: str): +def column(value: str) -> Expr: """Create a column expression.""" return Expr.column(value) -def col(value: str): +def col(value: str) -> Expr: """Create a column expression.""" return Expr.column(value) -def literal(value): +def literal(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) @@ -120,6 +120,6 @@ def str_lit(value): return string_literal(value) -def lit(value): +def lit(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 0560f4704..6c3f188cc 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -24,7 +24,7 @@ import datafusion._internal as df_internal if TYPE_CHECKING: - import pyarrow + import pyarrow as pa class Catalog: @@ -67,7 +67,7 @@ def __init__(self, table: df_internal.Table) -> None: self.table = table @property - def schema(self) -> pyarrow.Schema: + def schema(self) -> pa.Schema: """Returns the schema associated with this table.""" return self.table.schema diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 58ad9a943..1429a4975 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -40,9 +40,9 @@ if TYPE_CHECKING: import pathlib - import pandas - import polars - import pyarrow + import pandas as pd + import polars as pl + import pyarrow as pa from datafusion.plan import ExecutionPlan, LogicalPlan @@ -537,7 +537,7 @@ def register_listing_table( path: str | pathlib.Path, table_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".parquet", - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> None: """Register multiple files as a single table. @@ -606,14 +606,14 @@ def sql_with_options(self, query: str, options: SQLOptions) -> DataFrame: def create_dataframe( self, - partitions: list[list[pyarrow.RecordBatch]], + partitions: list[list[pa.RecordBatch]], name: str | None = None, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, ) -> DataFrame: """Create and return a dataframe using the provided partitions. Args: - partitions: :py:class:`pyarrow.RecordBatch` partitions to register. + partitions: :py:class:`pa.RecordBatch` partitions to register. name: Resultant dataframe name. schema: Schema for the partitions. @@ -684,16 +684,14 @@ def from_arrow( return DataFrame(self.ctx.from_arrow(data, name)) @deprecated("Use ``from_arrow`` instead.") - def from_arrow_table( - self, data: pyarrow.Table, name: str | None = None - ) -> DataFrame: + def from_arrow_table(self, data: pa.Table, name: str | None = None) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow table. This is an alias for :py:func:`from_arrow`. """ return self.from_arrow(data, name) - def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFrame: + def from_pandas(self, data: pd.DataFrame, name: str | None = None) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Pandas DataFrame. Args: @@ -705,7 +703,7 @@ def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFr """ return DataFrame(self.ctx.from_pandas(data, name)) - def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFrame: + def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Polars DataFrame. Args: @@ -719,7 +717,7 @@ def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFr # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 # is the discussion on how we arrived at adding register_view - def register_view(self, name: str, df: DataFrame): + def register_view(self, name: str, df: DataFrame) -> None: """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view. Args: @@ -755,7 +753,7 @@ def register_table_provider( self.ctx.register_table_provider(name, provider) def register_record_batches( - self, name: str, partitions: list[list[pyarrow.RecordBatch]] + self, name: str, partitions: list[list[pa.RecordBatch]] ) -> None: """Register record batches as a table. @@ -776,7 +774,7 @@ def register_parquet( parquet_pruning: bool = True, file_extension: str = ".parquet", skip_metadata: bool = True, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[SortExpr]] | None = None, ) -> None: """Register a Parquet file as a table. @@ -817,7 +815,7 @@ def register_csv( self, name: str, path: str | pathlib.Path | list[str | pathlib.Path], - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, has_header: bool = True, delimiter: str = ",", schema_infer_max_records: int = 1000, @@ -843,10 +841,7 @@ def register_csv( selected for data input. file_compression_type: File compression type. """ - if isinstance(path, list): - path = [str(p) for p in path] - else: - path = str(path) + path = [str(p) for p in path] if isinstance(path, list) else str(path) self.ctx.register_csv( name, @@ -863,7 +858,7 @@ def register_json( self, name: str, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = ".json", table_partition_cols: list[tuple[str, str]] | None = None, @@ -901,7 +896,7 @@ def register_avro( self, name: str, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_extension: str = ".avro", table_partition_cols: list[tuple[str, str]] | None = None, ) -> None: @@ -923,8 +918,8 @@ def register_avro( name, str(path), schema, file_extension, table_partition_cols ) - def register_dataset(self, name: str, dataset: pyarrow.dataset.Dataset) -> None: - """Register a :py:class:`pyarrow.dataset.Dataset` as a table. + def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None: + """Register a :py:class:`pa.dataset.Dataset` as a table. Args: name: Name of the table to register. @@ -975,7 +970,7 @@ def session_id(self) -> str: def read_json( self, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = ".json", table_partition_cols: list[tuple[str, str]] | None = None, @@ -1012,7 +1007,7 @@ def read_json( def read_csv( self, path: str | pathlib.Path | list[str] | list[pathlib.Path], - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, has_header: bool = True, delimiter: str = ",", schema_infer_max_records: int = 1000, @@ -1065,7 +1060,7 @@ def read_parquet( parquet_pruning: bool = True, file_extension: str = ".parquet", skip_metadata: bool = True, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -1110,7 +1105,7 @@ def read_parquet( def read_avro( self, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".avro", ) -> DataFrame: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index d1c71c2bb..26fe8f453 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -26,10 +26,8 @@ TYPE_CHECKING, Any, Iterable, - List, Literal, Optional, - Type, Union, overload, ) @@ -75,7 +73,7 @@ class Compression(Enum): LZ4_RAW = "lz4_raw" @classmethod - def from_str(cls: Type[Compression], value: str) -> Compression: + def from_str(cls: type[Compression], value: str) -> Compression: """Convert a string to a Compression enum value. Args: @@ -89,11 +87,13 @@ def from_str(cls: Type[Compression], value: str) -> Compression: """ try: return cls(value.lower()) - except ValueError: + except ValueError as err: valid_values = str([item.value for item in Compression]) - raise ValueError( - f"{value} is not a valid Compression. Valid values are: {valid_values}" - ) + error_msg = f""" + {value} is not a valid Compression. + Valid values are: {valid_values} + """ + raise ValueError(error_msg) from err def get_default_level(self) -> Optional[int]: """Get the default compression level for the compression type. @@ -132,7 +132,7 @@ def into_view(self) -> pa.Table: """Convert DataFrame as a ViewTable which can be used in register_table.""" return self.df.into_view() - def __getitem__(self, key: str | List[str]) -> DataFrame: + def __getitem__(self, key: str | list[str]) -> DataFrame: """Return a new :py:class`DataFrame` with the specified column or columns. Args: @@ -287,8 +287,7 @@ def _simplify_expression( if isinstance(expr, Expr): expr_list.append(expr.expr) elif isinstance(expr, Iterable): - for inner_expr in expr: - expr_list.append(inner_expr.expr) + expr_list.extend(inner_expr.expr for inner_expr in expr) else: raise NotImplementedError if named_exprs: @@ -513,10 +512,15 @@ def join( # This check is to prevent breaking API changes where users prior to # DF 43.0.0 would pass the join_keys as a positional argument instead # of a keyword argument. - if isinstance(on, tuple) and len(on) == 2: - if isinstance(on[0], list) and isinstance(on[1], list): - join_keys = on # type: ignore - on = None + if ( + isinstance(on, tuple) + and len(on) == 2 + and isinstance(on[0], list) + and isinstance(on[1], list) + ): + # We know this is safe because we've checked the types + join_keys = on # type: ignore[assignment] + on = None if join_keys is not None: warnings.warn( @@ -529,18 +533,17 @@ def join( if on is not None: if left_on is not None or right_on is not None: - raise ValueError( - "`left_on` or `right_on` should not provided with `on`" - ) + error_msg = "`left_on` or `right_on` should not provided with `on`" + raise ValueError(error_msg) left_on = on right_on = on elif left_on is not None or right_on is not None: if left_on is None or right_on is None: - raise ValueError("`left_on` and `right_on` should both be provided.") + error_msg = "`left_on` and `right_on` should both be provided." + raise ValueError(error_msg) else: - raise ValueError( - "either `on` or `left_on` and `right_on` should be provided." - ) + error_msg = "either `on` or `left_on` and `right_on` should be provided." + raise ValueError(error_msg) if isinstance(left_on, str): left_on = [left_on] if isinstance(right_on, str): @@ -726,9 +729,11 @@ def write_parquet( if isinstance(compression, str): compression = Compression.from_str(compression) - if compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}: - if compression_level is None: - compression_level = compression.get_default_level() + if ( + compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD} + and compression_level is None + ): + compression_level = compression.get_default_level() self.df.write_parquet(str(path), compression.value, compression_level) @@ -824,7 +829,7 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram Returns: A DataFrame with the columns expanded. """ - columns = [c for c in columns] + columns = list(columns) return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls)) def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any: diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 77b6c272d..2697d8143 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -22,7 +22,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Type +from typing import TYPE_CHECKING, Any, ClassVar, Optional import pyarrow as pa @@ -176,7 +176,7 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: """Helper function to return a default Sort if an Expr is provided.""" if isinstance(e, SortExpr): return e.raw_sort - return SortExpr(e, True, True).raw_sort + return SortExpr(e, ascending=True, nulls_first=True).raw_sort def sort_list_to_raw_sort_list( @@ -439,24 +439,21 @@ def fill_null(self, value: Any | Expr | None = None) -> Expr: value = Expr.literal(value) return Expr(functions_internal.nvl(self.expr, value.expr)) - _to_pyarrow_types = { + _to_pyarrow_types: ClassVar[dict[type, pa.DataType]] = { float: pa.float64(), int: pa.int64(), str: pa.string(), bool: pa.bool_(), } - def cast( - self, to: pa.DataType[Any] | Type[float] | Type[int] | Type[str] | Type[bool] - ) -> Expr: + def cast(self, to: pa.DataType[Any] | type[float | int | str | bool]) -> Expr: """Cast to a new data type.""" if not isinstance(to, pa.DataType): try: to = self._to_pyarrow_types[to] - except KeyError: - raise TypeError( - "Expected instance of pyarrow.DataType or builtins.type" - ) + except KeyError as err: + error_msg = "Expected instance of pyarrow.DataType or builtins.type" + raise TypeError(error_msg) from err return Expr(self.expr.cast(to)) @@ -565,9 +562,7 @@ def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder: set parameters for either window or aggregate functions. If used on any other type of expression, an error will be generated when ``build()`` is called. """ - return ExprFuncBuilder( - self.expr.partition_by(list(e.expr for e in partition_by)) - ) + return ExprFuncBuilder(self.expr.partition_by([e.expr for e in partition_by])) def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: """Set the frame fora window function. @@ -610,7 +605,7 @@ def over(self, window: Window) -> Expr: class ExprFuncBuilder: - def __init__(self, builder: expr_internal.ExprFuncBuilder): + def __init__(self, builder: expr_internal.ExprFuncBuilder) -> None: self.builder = builder def order_by(self, *exprs: Expr) -> ExprFuncBuilder: @@ -638,7 +633,7 @@ def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder: """Set partitioning for window functions.""" return ExprFuncBuilder( - self.builder.partition_by(list(e.expr for e in partition_by)) + self.builder.partition_by([e.expr for e in partition_by]) ) def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: @@ -693,11 +688,11 @@ def __init__( """ if not isinstance(start_bound, pa.Scalar) and start_bound is not None: start_bound = pa.scalar(start_bound) - if units == "rows" or units == "groups": + if units in ("rows", "groups"): start_bound = start_bound.cast(pa.uint64()) if not isinstance(end_bound, pa.Scalar) and end_bound is not None: end_bound = pa.scalar(end_bound) - if units == "rows" or units == "groups": + if units in ("rows", "groups"): end_bound = end_bound.cast(pa.uint64()) self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound) @@ -709,7 +704,7 @@ def get_lower_bound(self) -> WindowFrameBound: """Returns starting bound.""" return WindowFrameBound(self.window_frame.get_lower_bound()) - def get_upper_bound(self): + def get_upper_bound(self) -> WindowFrameBound: """Returns end bound.""" return WindowFrameBound(self.window_frame.get_upper_bound()) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 26bac149c..5cf914e16 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -790,10 +790,7 @@ def regexp_count( """ if flags is not None: flags = flags.expr - if start is not None: - start = start.expr - else: - start = Expr.expr + start = start.expr if start is not None else Expr.expr return Expr(f.regexp_count(string.expr, pattern.expr, start, flags)) @@ -817,13 +814,15 @@ def right(string: Expr, n: Expr) -> Expr: return Expr(f.right(string.expr, n.expr)) -def round(value: Expr, decimal_places: Expr = Expr.literal(0)) -> Expr: +def round(value: Expr, decimal_places: Expr | None = None) -> Expr: """Round the argument to the nearest integer. If the optional ``decimal_places`` is specified, round to the nearest number of decimal places. You can specify a negative number of decimal places. For example ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. """ + if decimal_places is None: + decimal_places = Expr.literal(0) return Expr(f.round(value.expr, decimal_places.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 161e1e3bb..37f2075f5 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -81,7 +81,7 @@ def test_literal(df): literal("1"), literal("OK"), literal(3.14), - literal(True), + literal(value=True), literal(b"hello world"), ) result = df.collect() diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index a2de2d32b..926a65961 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -28,7 +28,7 @@ from enum import EnumMeta as EnumType -def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 +def missing_exports(internal_obj, wrapped_obj) -> None: """ Identify if any of the rust exposted structs or functions do not have wrappers. @@ -56,9 +56,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 # __kwdefaults__ and __doc__. As long as these are None on the internal # object, it's okay to skip them. However if they do exist on the internal # object they must also exist on the wrapped object. - if internal_attr is not None: - if wrapped_attr is None: - pytest.fail(f"Missing attribute: {internal_attr_name}") + if internal_attr is not None and wrapped_attr is None: + pytest.fail(f"Missing attribute: {internal_attr_name}") if internal_attr_name in ["__self__", "__class__"]: continue From 42982dad27ad03e7e9395d4c3ae3064c2b489434 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 22 Mar 2025 10:14:55 -0400 Subject: [PATCH 063/248] Improve collection during repr and repr_html (#1036) * Improve table readout of a dataframe in jupyter notebooks by making the table scrollable and displaying the first record batch up to 2MB * Add option to only display a portion of a cell data and the user can click on a button to toggle showing more or less * We cannot expect that the first non-empy batch is sufficient for our 2MB limit, so switch over to collecting until we run out or use up the size * Update python unit test to allow the additional formatting data to exist and only check the table contents * Combining collection for repr and repr_html into one function * Small clippy suggestion * Collect was occuring twice on repr * Switch to execute_stream_partitioned --- python/tests/test_dataframe.py | 23 ++-- src/dataframe.rs | 240 ++++++++++++++++++++++++++++----- src/utils.rs | 2 +- 3 files changed, 225 insertions(+), 40 deletions(-) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 384b17878..718ebf69d 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import os +import re from typing import Any import pyarrow as pa @@ -1245,13 +1246,17 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: def test_dataframe_repr_html(df) -> None: output = df._repr_html_() - ref_html = """
- - - - -
abc
148
255
368
- """ + # Since we've added a fair bit of processing to the html output, lets just verify + # the values we are expecting in the table exist. Use regex and ignore everything + # between the and . We also don't want the closing > on the + # td and th segments because that is where the formatting data is written. - # Ignore whitespace just to make this test look cleaner - assert output.replace(" ", "") == ref_html.replace(" ", "") + headers = ["a", "b", "c"] + headers = [f"{v}" for v in headers] + header_pattern = "(.*?)".join(headers) + assert len(re.findall(header_pattern, output, re.DOTALL)) == 1 + + body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] + body_lines = [f"{v}" for inner in body_data for v in inner] + body_pattern = "(.*?)".join(body_lines) + assert len(re.findall(body_pattern, output, re.DOTALL)) == 1 diff --git a/src/dataframe.rs b/src/dataframe.rs index 243e2e14f..be10b8c28 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -31,9 +31,11 @@ use datafusion::common::UnnestOptions; use datafusion::config::{CsvOptions, TableParquetOptions}; use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; use datafusion::datasource::TableProvider; +use datafusion::error::DataFusionError; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; +use futures::{StreamExt, TryStreamExt}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; @@ -70,6 +72,9 @@ impl PyTableProvider { PyTable::new(table_provider) } } +const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB +const MIN_TABLE_ROWS_TO_DISPLAY: usize = 20; +const MAX_LENGTH_CELL_WITHOUT_MINIMIZE: usize = 25; /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. @@ -111,56 +116,151 @@ impl PyDataFrame { } fn __repr__(&self, py: Python) -> PyDataFusionResult { - let df = self.df.as_ref().clone().limit(0, Some(10))?; - let batches = wait_for_future(py, df.collect())?; - let batches_as_string = pretty::pretty_format_batches(&batches); - match batches_as_string { - Ok(batch) => Ok(format!("DataFrame()\n{batch}")), - Err(err) => Ok(format!("Error: {:?}", err.to_string())), + let (batches, has_more) = wait_for_future( + py, + collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10), + )?; + if batches.is_empty() { + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); } - } - fn _repr_html_(&self, py: Python) -> PyDataFusionResult { - let mut html_str = "\n".to_string(); + let batches_as_displ = + pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?; + + let additional_str = match has_more { + true => "\nData truncated.", + false => "", + }; - let df = self.df.as_ref().clone().limit(0, Some(10))?; - let batches = wait_for_future(py, df.collect())?; + Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}")) + } + fn _repr_html_(&self, py: Python) -> PyDataFusionResult { + let (batches, has_more) = wait_for_future( + py, + collect_record_batches_to_display( + self.df.as_ref().clone(), + MIN_TABLE_ROWS_TO_DISPLAY, + usize::MAX, + ), + )?; if batches.is_empty() { - html_str.push_str("
\n"); - return Ok(html_str); + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); } + let table_uuid = uuid::Uuid::new_v4().to_string(); + + let mut html_str = " + + +

+ + \n".to_string(); + let schema = batches[0].schema(); let mut header = Vec::new(); for field in schema.fields() { - header.push(format!("", field.name())); } let header_str = header.join(""); - html_str.push_str(&format!("{}\n", header_str)); - - for batch in batches { - let formatters = batch - .columns() - .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) - .map(|c| { - c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) - }) - .collect::, _>>()?; - - for row in 0..batch.num_rows() { + html_str.push_str(&format!("{}\n", header_str)); + + let batch_formatters = batches + .iter() + .map(|batch| { + batch + .columns() + .iter() + .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) + .map(|c| { + c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) + }) + .collect::, _>>() + }) + .collect::, _>>()?; + + let rows_per_batch = batches.iter().map(|batch| batch.num_rows()); + + // We need to build up row by row for html + let mut table_row = 0; + for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) { + for batch_row in 0..num_rows_in_batch { + table_row += 1; let mut cells = Vec::new(); - for formatter in &formatters { - cells.push(format!("", formatter.value(row))); + for (col, formatter) in batch_formatter.iter().enumerate() { + let cell_data = formatter.value(batch_row).to_string(); + // From testing, primitive data types do not typically get larger than 21 characters + if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE { + let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE]; + cells.push(format!(" + ")); + } else { + cells.push(format!("", formatter.value(batch_row))); + } } let row_str = cells.join(""); html_str.push_str(&format!("{}\n", row_str)); } } + html_str.push_str("
{}", field.name())); + header.push(format!("{}
{} +
+ {short_cell_data} + {cell_data} + +
+
{}
\n"); + + html_str.push_str(" + + "); - html_str.push_str("\n"); + if has_more { + html_str.push_str("Data truncated due to size."); + } Ok(html_str) } @@ -771,3 +871,83 @@ fn record_batch_into_schema( RecordBatch::try_new(schema, data_arrays) } + +/// This is a helper function to return the first non-empty record batch from executing a DataFrame. +/// It additionally returns a bool, which indicates if there are more record batches available. +/// We do this so we can determine if we should indicate to the user that the data has been +/// truncated. This collects until we have achived both of these two conditions +/// +/// - We have collected our minimum number of rows +/// - We have reached our limit, either data size or maximum number of rows +/// +/// Otherwise it will return when the stream has exhausted. If you want a specific number of +/// rows, set min_rows == max_rows. +async fn collect_record_batches_to_display( + df: DataFrame, + min_rows: usize, + max_rows: usize, +) -> Result<(Vec, bool), DataFusionError> { + let partitioned_stream = df.execute_stream_partitioned().await?; + let mut stream = futures::stream::iter(partitioned_stream).flatten(); + let mut size_estimate_so_far = 0; + let mut rows_so_far = 0; + let mut record_batches = Vec::default(); + let mut has_more = false; + + while (size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows) + || rows_so_far < min_rows + { + let mut rb = match stream.next().await { + None => { + break; + } + Some(Ok(r)) => r, + Some(Err(e)) => return Err(e), + }; + + let mut rows_in_rb = rb.num_rows(); + if rows_in_rb > 0 { + size_estimate_so_far += rb.get_array_memory_size(); + + if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY { + let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32; + let total_rows = rows_in_rb + rows_so_far; + + let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize; + if reduced_row_num < min_rows { + reduced_row_num = min_rows.min(total_rows); + } + + let limited_rows_this_rb = reduced_row_num - rows_so_far; + if limited_rows_this_rb < rows_in_rb { + rows_in_rb = limited_rows_this_rb; + rb = rb.slice(0, limited_rows_this_rb); + has_more = true; + } + } + + if rows_in_rb + rows_so_far > max_rows { + rb = rb.slice(0, max_rows - rows_so_far); + has_more = true; + } + + rows_so_far += rb.num_rows(); + record_batches.push(rb); + } + } + + if record_batches.is_empty() { + return Ok((Vec::default(), false)); + } + + if !has_more { + // Data was not already truncated, so check to see if more record batches remain + has_more = match stream.try_next().await { + Ok(None) => false, // reached end + Ok(Some(_)) => true, + Err(_) => false, // Stream disconnected + }; + } + + Ok((record_batches, has_more)) +} diff --git a/src/utils.rs b/src/utils.rs index 999aad755..3487de21b 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -42,7 +42,7 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { #[inline] pub(crate) fn get_global_ctx() -> &'static SessionContext { static CTX: OnceLock = OnceLock::new(); - CTX.get_or_init(|| SessionContext::new()) + CTX.get_or_init(SessionContext::new) } /// Utility to collect rust futures with GIL released From d0315ffa704aba467f769f444208b7ce26d83037 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 22 Mar 2025 14:37:24 -0400 Subject: [PATCH 064/248] feat: Update DataFusion dependency to 46 (#1079) * Update DataFusion dependency to 46 * There was an update upstream in the exec but it is not a breaking change and only needs unit test updates --- Cargo.lock | 296 +++++++++++++++++++-------------- Cargo.toml | 18 +- python/tests/test_dataframe.py | 3 +- src/expr.rs | 39 +++-- src/expr/aggregate.rs | 10 +- src/expr/aggregate_expr.rs | 11 +- src/expr/window.rs | 24 ++- src/functions.rs | 34 ++-- 8 files changed, 252 insertions(+), 183 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c7f2bf3c..3a4915f23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" +checksum = "84ef243634a39fb6e9d1710737e7a5ef96c9bacabd2326859ff889bc9ef755e5" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" +checksum = "8f420c6aef51dad2e4a96ce29c0ec90ad84880bdb60b321c74c652a6be07b93f" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" +checksum = "24bda5ff6461a4ff9739959b3d57b377f45e3f878f7be1a4f28137c0a8f339fa" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" +checksum = "bc6ed265c73f134a583d02c3cab5e16afab9446d8048ede8707e31f85fad58a0" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" +checksum = "01c648572391edcef10e5fd458db70ba27ed6f71bcaee04397d0cfb100b34f8b" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" +checksum = "a02fb265a6d8011a7d3ad1a36f25816ad0a3bb04cb8e9fe7929c165b98c0cbcd" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" +checksum = "5f2cebf504bb6a92a134a87fff98f01b14fbb3a93ecf7aef90cd0f888c5fffa4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" +checksum = "8e6405b287671c88846e7751f7291f717b164911474cabac6d3d8614d5aa7374" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" +checksum = "5329bf9e7390cbb6b117ddd4d82e94c5362ea4cab5095697139429f36a38350c" dependencies = [ "arrow-array", "arrow-buffer", @@ -319,16 +319,18 @@ dependencies = [ "half", "indexmap", "lexical-core", + "memchr", "num", "serde", "serde_json", + "simdutf8", ] [[package]] name = "arrow-ord" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" +checksum = "e103c13d4b80da28339c1d7aa23dd85bd59f42158acc45d39eeb6770627909ce" dependencies = [ "arrow-array", "arrow-buffer", @@ -339,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" +checksum = "170549a11b8534f3097a0619cfe89c42812345dc998bcf81128fc700b84345b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -352,18 +354,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" +checksum = "a5c53775bba63f319189f366d2b86e9a8889373eb198f07d8544938fc9f8ed9a" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" +checksum = "0a99003b2eb562b8d9c99dfb672306f15e94b20d3734179d596895703e821dcf" dependencies = [ "ahash", "arrow-array", @@ -375,9 +377,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" +checksum = "90fdb130ee8325f4cd8262e19bb6baa3cbcef2b2573c4bee8c6fda7ea08199d7" dependencies = [ "arrow-array", "arrow-buffer", @@ -535,9 +537,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.5" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" +checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" dependencies = [ "arrayref", "arrayvec", @@ -649,15 +651,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-targets", + "windows-link", ] [[package]] @@ -864,30 +866,32 @@ dependencies = [ [[package]] name = "datafusion" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" dependencies = [ "apache-avro", "arrow", - "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", "bzip2 0.5.1", "chrono", "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", + "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -896,7 +900,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "glob", "itertools 0.14.0", "log", "num-traits", @@ -908,7 +911,6 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", "xz2", @@ -917,9 +919,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" dependencies = [ "arrow", "async-trait", @@ -933,22 +935,40 @@ dependencies = [ "itertools 0.14.0", "log", "parking_lot", - "sqlparser", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "log", + "object_store", + "tokio", ] [[package]] name = "datafusion-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" dependencies = [ "ahash", "apache-avro", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ipc", - "arrow-schema", "base64 0.22.1", "half", "hashbrown 0.14.5", @@ -966,25 +986,59 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-datasource" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.5.1", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + [[package]] name = "datafusion-doc" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" +checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" [[package]] name = "datafusion-execution" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" dependencies = [ "arrow", "dashmap", @@ -1001,9 +1055,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" dependencies = [ "arrow", "chrono", @@ -1022,26 +1076,25 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" dependencies = [ "arrow", "datafusion-common", + "indexmap", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" +checksum = "d740dd9f32a4f4ed1b907e6934201bb059efe6c877532512c661771d973c7b21" dependencies = [ "abi_stable", "arrow", - "arrow-array", - "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1055,9 +1108,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" dependencies = [ "arrow", "arrow-buffer", @@ -1071,7 +1124,6 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", - "hashbrown 0.14.5", "hex", "itertools 0.14.0", "log", @@ -1085,14 +1137,12 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1108,9 +1158,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" dependencies = [ "ahash", "arrow", @@ -1121,15 +1171,12 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1145,9 +1192,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" dependencies = [ "arrow", "async-trait", @@ -1161,9 +1208,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1178,9 +1225,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1188,9 +1235,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" dependencies = [ "datafusion-expr", "quote", @@ -1199,9 +1246,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" dependencies = [ "arrow", "chrono", @@ -1218,15 +1265,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1243,13 +1287,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1258,12 +1301,11 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1271,23 +1313,19 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "futures", "itertools 0.14.0", "log", "recursive", - "url", ] [[package]] name = "datafusion-physical-plan" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait", @@ -1312,9 +1350,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" +checksum = "6f6ef4c6eb52370cb48639e25e2331a415aac0b2b0a0a472b36e26603bdf184f" dependencies = [ "arrow", "chrono", @@ -1328,9 +1366,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" +checksum = "5faf4a9bbb0d0a305fea8a6db21ba863286b53e53a212e687d2774028dd6f03f" dependencies = [ "arrow", "datafusion-common", @@ -1362,13 +1400,11 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", "bigdecimal", "datafusion-common", "datafusion-expr", @@ -1381,11 +1417,10 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944" +checksum = "2c2be3226a683e02cff65181e66e62eba9f812ed0e9b7ec8fe11ac8dabf1a73f" dependencies = [ - "arrow-buffer", "async-recursion", "async-trait", "chrono", @@ -1395,6 +1430,7 @@ dependencies = [ "pbjson-types", "prost", "substrait", + "tokio", "url", ] @@ -1472,9 +1508,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" dependencies = [ "crc32fast", "miniz_oxide", @@ -2117,9 +2153,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libflate" @@ -2447,9 +2483,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.1.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" +checksum = "94243778210509a5a5e9e012872127180c155d73a9cd6e2df9243d213e81e100" dependencies = [ "ahash", "arrow-array", @@ -2479,7 +2515,6 @@ dependencies = [ "tokio", "twox-hash", "zstd", - "zstd-sys", ] [[package]] @@ -3401,11 +3436,12 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.54.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -3466,9 +3502,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.52.3" +version = "0.53.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756" +checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b" dependencies = [ "heck", "pbjson", @@ -3922,12 +3958,14 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.13.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "getrandom 0.3.1", + "js-sys", "serde", + "wasm-bindgen", ] [[package]] @@ -4114,6 +4152,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-registry" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 50967a219..8afabdd82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,24 +34,24 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.43", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} -arrow = { version = "54", features = ["pyarrow"] } -datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "45.0.0", optional = true } -datafusion-proto = { version = "45.0.0" } -datafusion-ffi = { version = "45.0.0" } -prost = "0.13" # keep in line with `datafusion-substrait` +arrow = { version = "54.2.1", features = ["pyarrow"] } +datafusion = { version = "46.0.1", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "46.0.1", optional = true } +datafusion-proto = { version = "46.0.1" } +datafusion-ffi = { version = "46.0.1" } +prost = "0.13.1" # keep in line with `datafusion-substrait` uuid = { version = "1.12", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } -async-trait = "0.1" +async-trait = "0.1.73" futures = "0.3" object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] } url = "2" [build-dependencies] -prost-types = "0.13" # keep in line with `datafusion-substrait` +prost-types = "0.13.1" # keep in line with `datafusion-substrait` pyo3-build-config = "0.23" [lib] diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 718ebf69d..eda13930d 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -753,7 +753,8 @@ def test_execution_plan(aggregate_df): assert "AggregateExec:" in indent assert "CoalesceBatchesExec:" in indent assert "RepartitionExec:" in indent - assert "CsvExec:" in indent + assert "DataSourceExec:" in indent + assert "file_type=csv" in indent ctx = SessionContext() rows_returned = 0 diff --git a/src/expr.rs b/src/expr.rs index d3c528eb4..561170289 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams}; use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, @@ -172,6 +173,7 @@ impl PyExpr { Expr::ScalarSubquery(value) => { Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?) } + #[allow(deprecated)] Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", qualifier, options @@ -332,7 +334,6 @@ impl PyExpr { | Expr::AggregateFunction { .. } | Expr::WindowFunction { .. } | Expr::InList { .. } - | Expr::Wildcard { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } | Expr::GroupingSet(..) @@ -346,6 +347,10 @@ impl PyExpr { | Expr::Unnest(_) | Expr::IsNotUnknown(_) => RexType::Call, Expr::ScalarSubquery(..) => RexType::ScalarSubquery, + #[allow(deprecated)] + Expr::Wildcard { .. } => { + return Err(py_unsupported_variant_err("Expr::Wildcard is unsupported")) + } }) } @@ -394,11 +399,15 @@ impl PyExpr { | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]), // Expr variants containing a collection of Expr(s) for operands - Expr::AggregateFunction(AggregateFunction { args, .. }) + Expr::AggregateFunction(AggregateFunction { + params: AggregateFunctionParams { args, .. }, + .. + }) | Expr::ScalarFunction(ScalarFunction { args, .. }) - | Expr::WindowFunction(WindowFunction { args, .. }) => { - Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) - } + | Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { args, .. }, + .. + }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()), // Expr(s) that require more specific processing Expr::Case(Case { @@ -465,13 +474,17 @@ impl PyExpr { Expr::GroupingSet(..) | Expr::Unnest(_) | Expr::OuterReferenceColumn(_, _) - | Expr::Wildcard { .. } | Expr::ScalarSubquery(..) | Expr::Placeholder { .. } | Expr::Exists { .. } => Err(py_runtime_err(format!( "Unimplemented Expr type: {}", self.expr ))), + + #[allow(deprecated)] + Expr::Wildcard { .. } => { + Err(py_unsupported_variant_err("Expr::Wildcard is unsupported")) + } } } @@ -575,7 +588,7 @@ impl PyExpr { Expr::AggregateFunction(agg_fn) => { let window_fn = Expr::WindowFunction(WindowFunction::new( WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()), - agg_fn.args.clone(), + agg_fn.params.args.clone(), )); add_builder_fns_to_window( @@ -663,16 +676,8 @@ impl PyExpr { /// Create a [Field] representing an [Expr], given an input [LogicalPlan] to resolve against pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> PyDataFusionResult> { - match expr { - Expr::Wildcard { .. } => { - // Since * could be any of the valid column names just return the first one - Ok(Arc::new(input_plan.schema().field(0).clone())) - } - _ => { - let fields = exprlist_to_fields(&[expr.clone()], input_plan)?; - Ok(fields[0].1.clone()) - } - } + let fields = exprlist_to_fields(&[expr.clone()], input_plan)?; + Ok(fields[0].1.clone()) } fn _types(expr: &Expr) -> PyResult { match expr { diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 8fc9da5b0..a99d83d23 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::common::DataFusionError; -use datafusion::logical_expr::expr::{AggregateFunction, Alias}; +use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias}; use datafusion::logical_expr::logical_plan::Aggregate; use datafusion::logical_expr::Expr; use pyo3::{prelude::*, IntoPyObjectExt}; @@ -126,9 +126,11 @@ impl PyAggregate { match expr { // TODO: This Alias logic seems to be returning some strange results that we should investigate Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { func: _, args, .. }) => { - Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()) - } + Expr::AggregateFunction(AggregateFunction { + func: _, + params: AggregateFunctionParams { args, .. }, + .. + }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()), _ => Err(py_type_err( "Encountered a non Aggregate type in aggregation_arguments", )), diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs index 09471097f..c09f116e3 100644 --- a/src/expr/aggregate_expr.rs +++ b/src/expr/aggregate_expr.rs @@ -40,7 +40,13 @@ impl From for PyAggregateFunction { impl Display for PyAggregateFunction { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let args: Vec = self.aggr.args.iter().map(|expr| expr.to_string()).collect(); + let args: Vec = self + .aggr + .params + .args + .iter() + .map(|expr| expr.to_string()) + .collect(); write!(f, "{}({})", self.aggr.func.name(), args.join(", ")) } } @@ -54,12 +60,13 @@ impl PyAggregateFunction { /// is this a distinct aggregate such as `COUNT(DISTINCT expr)` fn is_distinct(&self) -> bool { - self.aggr.distinct + self.aggr.params.distinct } /// Get the arguments to the aggregate function fn args(&self) -> Vec { self.aggr + .params .args .iter() .map(|expr| PyExpr::from(expr.clone())) diff --git a/src/expr/window.rs b/src/expr/window.rs index 13deaec25..c5467bf94 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::common::{DataFusionError, ScalarValue}; -use datafusion::logical_expr::expr::WindowFunction; +use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; @@ -118,7 +118,10 @@ impl PyWindowExpr { /// Returns order by columns in a window function expression pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { order_by, .. }) => py_sort_expr_list(&order_by), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { order_by, .. }, + .. + }) => py_sort_expr_list(&order_by), other => Err(not_window_function_err(other)), } } @@ -126,9 +129,10 @@ impl PyWindowExpr { /// Return partition by columns in a window function expression pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { partition_by, .. }) => { - py_expr_list(&partition_by) - } + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { partition_by, .. }, + .. + }) => py_expr_list(&partition_by), other => Err(not_window_function_err(other)), } } @@ -136,7 +140,10 @@ impl PyWindowExpr { /// Return input args for window function pub fn get_args(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { args, .. }) => py_expr_list(&args), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { args, .. }, + .. + }) => py_expr_list(&args), other => Err(not_window_function_err(other)), } } @@ -152,7 +159,10 @@ impl PyWindowExpr { /// Returns a Pywindow frame for a given window function expression pub fn get_frame(&self, expr: PyExpr) -> Option { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { window_frame, .. }) => Some(window_frame.into()), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { window_frame, .. }, + .. + }) => Some(window_frame.into()), _ => None, } } diff --git a/src/functions.rs b/src/functions.rs index 8fac239b4..9c406b95a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -17,6 +17,7 @@ use datafusion::functions_aggregate::all_default_aggregate_functions; use datafusion::functions_window::all_default_window_functions; +use datafusion::logical_expr::expr::WindowFunctionParams; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -215,10 +216,7 @@ fn alias(expr: PyExpr, name: &str) -> PyResult { #[pyfunction] fn col(name: &str) -> PyResult { Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Column(Column { - relation: None, - name: name.to_string(), - }), + expr: datafusion::logical_expr::Expr::Column(Column::new_unqualified(name)), }) } @@ -333,19 +331,21 @@ fn window( Ok(PyExpr { expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction { fun, - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.into()) - .collect::>(), - window_frame, - null_treatment: None, + params: WindowFunctionParams { + args: args.into_iter().map(|x| x.expr).collect::>(), + partition_by: partition_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + order_by: order_by + .unwrap_or_default() + .into_iter() + .map(|x| x.into()) + .collect::>(), + window_frame, + null_treatment: None, + }, }), }) } From 583e1e9420906c99b1fbdf57c0138f1e67548008 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 30 Mar 2025 08:44:55 -0400 Subject: [PATCH 065/248] Update changelog and version number (#1089) --- Cargo.lock | 2 +- Cargo.toml | 2 +- dev/changelog/46.0.0.md | 73 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 dev/changelog/46.0.0.md diff --git a/Cargo.lock b/Cargo.lock index 3a4915f23..f90038c50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1377,7 +1377,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "45.2.0" +version = "46.0.0" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 8afabdd82..bc8639d4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "45.2.0" +version = "46.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/46.0.0.md b/dev/changelog/46.0.0.md new file mode 100644 index 000000000..3e5768099 --- /dev/null +++ b/dev/changelog/46.0.0.md @@ -0,0 +1,73 @@ + + +# Apache DataFusion Python 46.0.0 Changelog + +This release consists of 21 commits from 11 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: reads using global ctx [#982](https://github.com/apache/datafusion-python/pull/982) (ion-elgreco) +- feat: Implementation of udf and udaf decorator [#1040](https://github.com/apache/datafusion-python/pull/1040) (CrystalZhou0529) +- feat: expose regex_count function [#1066](https://github.com/apache/datafusion-python/pull/1066) (nirnayroy) +- feat: Update DataFusion dependency to 46 [#1079](https://github.com/apache/datafusion-python/pull/1079) (timsaucer) + +**Fixed bugs:** + +- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky) +- fix: type checking [#993](https://github.com/apache/datafusion-python/pull/993) (chenkovsky) + +**Other:** + +- [infra] Fail Clippy on rust build warnings [#1029](https://github.com/apache/datafusion-python/pull/1029) (kevinjqliu) +- Add user documentation for the FFI approach [#1031](https://github.com/apache/datafusion-python/pull/1031) (timsaucer) +- build(deps): bump arrow from 54.1.0 to 54.2.0 [#1035](https://github.com/apache/datafusion-python/pull/1035) (dependabot[bot]) +- Chore: Release datafusion-python 45 [#1024](https://github.com/apache/datafusion-python/pull/1024) (timsaucer) +- Enable Dataframe to be converted into views which can be used in register_table [#1016](https://github.com/apache/datafusion-python/pull/1016) (kosiew) +- Add ruff check for missing futures import [#1052](https://github.com/apache/datafusion-python/pull/1052) (timsaucer) +- Enable take comments to assign issues to users [#1058](https://github.com/apache/datafusion-python/pull/1058) (timsaucer) +- Update python min version to 3.9 [#1043](https://github.com/apache/datafusion-python/pull/1043) (kevinjqliu) +- feat/improve ruff test coverage [#1055](https://github.com/apache/datafusion-python/pull/1055) (timsaucer) +- feat/making global context accessible for users [#1060](https://github.com/apache/datafusion-python/pull/1060) (jsai28) +- Renaming Internal Structs [#1059](https://github.com/apache/datafusion-python/pull/1059) (Spaarsh) +- test: add pytest asyncio tests [#1063](https://github.com/apache/datafusion-python/pull/1063) (jsai28) +- Add decorator for udwf [#1061](https://github.com/apache/datafusion-python/pull/1061) (kosiew) +- Add additional ruff suggestions [#1062](https://github.com/apache/datafusion-python/pull/1062) (Spaarsh) +- Improve collection during repr and repr_html [#1036](https://github.com/apache/datafusion-python/pull/1036) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 7 Tim Saucer + 2 Kevin Liu + 2 Spaarsh + 2 jsai28 + 2 kosiew + 1 Chen Chongchen + 1 Chongchen Chen + 1 Crystal Zhou + 1 Ion Koutsouris + 1 Nirnay Roy + 1 dependabot[bot] +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + From ffafb59e1b1b7f49f4ba4507b28ba1cecfb0225a Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Sun, 30 Mar 2025 20:45:15 +0800 Subject: [PATCH 066/248] feat: support unparser (#1088) * support unparser * add license * add export * format * format --- python/datafusion/__init__.py | 3 +- python/datafusion/unparser.py | 80 +++++++++++++++++++++++++++++++++++ python/tests/test_unparser.py | 33 +++++++++++++++ src/lib.rs | 5 +++ src/unparser/dialect.rs | 63 +++++++++++++++++++++++++++ src/unparser/mod.rs | 66 +++++++++++++++++++++++++++++ 6 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 python/datafusion/unparser.py create mode 100644 python/tests/test_unparser.py create mode 100644 src/unparser/dialect.rs create mode 100644 src/unparser/mod.rs diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index d871fdb71..ecf5545bc 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,7 +26,7 @@ except ImportError: import importlib_metadata -from . import functions, object_store, substrait +from . import functions, object_store, substrait, unparser # The following imports are okay to remain as opaque to the user. from ._internal import Config @@ -89,6 +89,7 @@ "udaf", "udf", "udwf", + "unparser", ] diff --git a/python/datafusion/unparser.py b/python/datafusion/unparser.py new file mode 100644 index 000000000..7ca5b9190 --- /dev/null +++ b/python/datafusion/unparser.py @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""This module provides support for unparsing datafusion plans to SQL. + +For additional information about unparsing, see https://docs.rs/datafusion-sql/latest/datafusion_sql/unparser/index.html +""" + +from ._internal import unparser as unparser_internal +from .plan import LogicalPlan + + +class Dialect: + """DataFusion data catalog.""" + + def __init__(self, dialect: unparser_internal.Dialect) -> None: + """This constructor is not typically called by the end user.""" + self.dialect = dialect + + @staticmethod + def default() -> "Dialect": + """Create a new default dialect.""" + return Dialect(unparser_internal.Dialect.default()) + + @staticmethod + def mysql() -> "Dialect": + """Create a new MySQL dialect.""" + return Dialect(unparser_internal.Dialect.mysql()) + + @staticmethod + def postgres() -> "Dialect": + """Create a new PostgreSQL dialect.""" + return Dialect(unparser_internal.Dialect.postgres()) + + @staticmethod + def sqlite() -> "Dialect": + """Create a new SQLite dialect.""" + return Dialect(unparser_internal.Dialect.sqlite()) + + @staticmethod + def duckdb() -> "Dialect": + """Create a new DuckDB dialect.""" + return Dialect(unparser_internal.Dialect.duckdb()) + + +class Unparser: + """DataFusion unparser.""" + + def __init__(self, dialect: Dialect) -> None: + """This constructor is not typically called by the end user.""" + self.unparser = unparser_internal.Unparser(dialect.dialect) + + def plan_to_sql(self, plan: LogicalPlan) -> str: + """Convert a logical plan to a SQL string.""" + return self.unparser.plan_to_sql(plan._raw_plan) + + def with_pretty(self, pretty: bool) -> "Unparser": + """Set the pretty flag.""" + self.unparser = self.unparser.with_pretty(pretty) + return self + + +__all__ = [ + "Dialect", + "Unparser", +] diff --git a/python/tests/test_unparser.py b/python/tests/test_unparser.py new file mode 100644 index 000000000..c4e05780c --- /dev/null +++ b/python/tests/test_unparser.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datafusion.context import SessionContext +from datafusion.unparser import Dialect, Unparser + + +def test_unparser(): + ctx = SessionContext() + df = ctx.sql("SELECT 1") + for dialect in [ + Dialect.mysql(), + Dialect.postgres(), + Dialect.sqlite(), + Dialect.duckdb(), + ]: + unparser = Unparser(dialect) + sql = unparser.plan_to_sql(df.logical_plan()) + assert sql == "SELECT 1" diff --git a/src/lib.rs b/src/lib.rs index ce93ff0c3..6eeda0878 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,6 +52,7 @@ pub mod pyarrow_util; mod record_batch; pub mod sql; pub mod store; +pub mod unparser; #[cfg(feature = "substrait")] pub mod substrait; @@ -103,6 +104,10 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { expr::init_module(&expr)?; m.add_submodule(&expr)?; + let unparser = PyModule::new(py, "unparser")?; + unparser::init_module(&unparser)?; + m.add_submodule(&unparser)?; + // Register the functions as a submodule let funcs = PyModule::new(py, "functions")?; functions::init_module(&funcs)?; diff --git a/src/unparser/dialect.rs b/src/unparser/dialect.rs new file mode 100644 index 000000000..caeef9949 --- /dev/null +++ b/src/unparser/dialect.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion::sql::unparser::dialect::{ + DefaultDialect, Dialect, DuckDBDialect, MySqlDialect, PostgreSqlDialect, SqliteDialect, +}; +use pyo3::prelude::*; + +#[pyclass(name = "Dialect", module = "datafusion.unparser", subclass)] +#[derive(Clone)] +pub struct PyDialect { + pub dialect: Arc, +} + +#[pymethods] +impl PyDialect { + #[staticmethod] + pub fn default() -> Self { + Self { + dialect: Arc::new(DefaultDialect {}), + } + } + #[staticmethod] + pub fn postgres() -> Self { + Self { + dialect: Arc::new(PostgreSqlDialect {}), + } + } + #[staticmethod] + pub fn mysql() -> Self { + Self { + dialect: Arc::new(MySqlDialect {}), + } + } + #[staticmethod] + pub fn sqlite() -> Self { + Self { + dialect: Arc::new(SqliteDialect {}), + } + } + #[staticmethod] + pub fn duckdb() -> Self { + Self { + dialect: Arc::new(DuckDBDialect::new()), + } + } +} diff --git a/src/unparser/mod.rs b/src/unparser/mod.rs new file mode 100644 index 000000000..b4b0fed10 --- /dev/null +++ b/src/unparser/mod.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod dialect; + +use std::sync::Arc; + +use datafusion::sql::unparser::{dialect::Dialect, Unparser}; +use dialect::PyDialect; +use pyo3::{exceptions::PyValueError, prelude::*}; + +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "Unparser", module = "datafusion.unparser", subclass)] +#[derive(Clone)] +pub struct PyUnparser { + dialect: Arc, + pretty: bool, +} + +#[pymethods] +impl PyUnparser { + #[new] + pub fn new(dialect: PyDialect) -> Self { + Self { + dialect: dialect.dialect.clone(), + pretty: false, + } + } + + pub fn plan_to_sql(&self, plan: &PyLogicalPlan) -> PyResult { + let mut unparser = Unparser::new(self.dialect.as_ref()); + unparser = unparser.with_pretty(self.pretty); + let sql = unparser + .plan_to_sql(&plan.plan()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(sql.to_string()) + } + + pub fn with_pretty(&self, pretty: bool) -> Self { + Self { + dialect: self.dialect.clone(), + pretty, + } + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + Ok(()) +} From 09b929a65c27ce8c58563d4def8d79b426ae47e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Sch=C3=A4fer?= <33159547+floscha@users.noreply.github.com> Date: Sun, 30 Mar 2025 14:45:49 +0200 Subject: [PATCH 067/248] Documentation updates: mention correct dataset on basics page (#1081) * Documentation updates: mention correct dataset on basics page * Update docs/source/user-guide/basics.rst Co-authored-by: Kevin Liu * Make download hint more concise --------- Co-authored-by: Kevin Liu --- docs/source/user-guide/basics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index f37378a41..6636c0c6a 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -20,8 +20,8 @@ Concepts ======== -In this section, we will cover a basic example to introduce a few key concepts. We will use the same -source file as described in the :ref:`Introduction `, the Pokemon data set. +In this section, we will cover a basic example to introduce a few key concepts. We will use the +2021 Yellow Taxi Trip Records ([download](https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet)), from the [TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page). .. ipython:: python From 818975b5c43021fed109ebba3cb99d744e8f036a Mon Sep 17 00:00:00 2001 From: kosiew Date: Mon, 21 Apr 2025 19:51:25 +0800 Subject: [PATCH 068/248] Add Configurable HTML Table Formatter for DataFusion DataFrames in Python (#1100) * feat: add configurable HTML formatter for DataFrames * fix: update schema iteration in DataFrameHtmlFormatter to use correct format * refactor: remove unused constant MAX_LENGTH_CELL_WITHOUT_MINIMIZE in PyTableProvider * refactor: improve HTML rendering structure in DataFrameHtmlFormatter - Added List import to typing for type hints. - Refactored format_html method to modularize HTML component generation. - Created separate methods for building HTML header, table container, header, body, expandable cells, regular cells, and footer for better readability and maintainability. - Updated table_uuid generation to use f-string for consistency. - Ensured all HTML components are returned as lists for efficient joining. * doc: enhance docstrings for DataFrameHtmlFormatter methods to clarify usage * refactor: enhance DataFrameHtmlFormatter with customizable cell and header styles - Added methods `get_cell_style()` and `get_header_style()` to allow subclasses to customize the CSS styles for table cells and headers. - Updated `_build_table_header()` and `_build_regular_cell()` methods to utilize the new styling methods for improved maintainability. - Introduced a registry for custom type formatters in `DataFrameHtmlFormatter` to enable flexible formatting of cell values based on their types. - Enhanced `_format_cell_value()` to check for registered formatters before defaulting to string conversion, improving extensibility. * refactor: enhance DataFrameHtmlFormatter with custom cell and header builders - Introduced CellFormatter and StyleProvider protocols for better extensibility. - Added DefaultStyleProvider class with default CSS styles for cells and headers. - Updated DataFrameHtmlFormatter to support custom cell and header builders. - Refactored methods to utilize the new style provider for consistent styling. - Improved documentation for methods and classes to clarify usage and customization options. * doc: expand module docstring for DataFrameHtmlFormatter with usage examples and customization options * refactor: streamline HTML formatter by removing extensive docstring examples and enhancing cell formatting methods - Removed lengthy examples from the docstring of DataFrameHtmlFormatter to improve readability. - Added methods for extracting and formatting cell values, enhancing the clarity and maintainability of the code. - Updated cell building methods to utilize the new formatting logic, ensuring consistent application of styles and behaviors. - Introduced a reset fixture for tests to ensure the formatter is returned to default settings after each test case. - Added tests for HTML formatter configuration, custom style providers, type formatters, custom cell builders, and complex customizations to ensure robust functionality. * refactor: improve cell rendering logic in DataFrameHtmlFormatter by utilizing raw values for custom cell builders and optimizing expandable cell creation * refactor: enhance HTML representation in DataFrame by integrating latest formatter and improving cell value formatting logic * refactor: improve HTML formatting logic in DataFrame by separating data collection and schema retrieval for clarity refactor: enhance reset_formatter fixture to preserve original formatter configuration during tests * refactor: add debug utilities for HTML formatter integration testing and enhance debugging output in DataFrameHtmlFormatter * refactor: implement HTML formatter patch for DataFrame and enhance value retrieval in cell formatting * fix: correct typo in file extension check for parquet files in test_write_compressed_parquet * test: add test for DataFrame._repr_html_ to validate HTML output structure * refactor: remove monkeypatch for DataFrame._repr_html_ and associated logic * refactor: simplify _repr_html_ method in DataFrame to directly call internal representation * refactor: remove debug utilities for HTML formatter integration in DataFrame * refactor: remove debug print statements from DataFrameHtmlFormatter and add HTML formatter integration tests - Removed debug print statements from format_html, _build_table_body, and get_formatter methods in DataFrameHtmlFormatter to clean up the code. - Introduced a new debug_utils.py file containing a function to check HTML formatter integration. - Updated __init__.py to include configure_formatter for easier access. - Enhanced DataFrame class to include a docstring for _repr_html_ method. - Added comprehensive tests for HTML formatter configuration, custom style providers, type formatters, and cell/header builders in test_dataframe.py. * refactor: streamline imports and enhance HTML formatter integration in tests - Removed redundant import of `configure_formatter` in `__init__.py`. - Added `configure_formatter` to `__all__` in `__init__.py` for better module exposure. - Cleaned up import statements in `html_formatter.py` for clarity. - Consolidated import statements in `test_dataframe.py` for improved readability. - Simplified the `reset_formatter` fixture by removing unnecessary imports and comments. * refactor: remove redundant imports and debug print statements in HTML formatter tests * refactor: add reset_formatter function to reset global HTML formatter state - Implemented reset_formatter to create a new default DataFrame HTML formatter and update the global reference. - Added clean_formatter_state fixture in tests to ensure a fresh formatter state for each test case. - Updated test cases to use clean_formatter_state instead of the previous reset_formatter implementation. * refactor: enhance DataFrameHtmlFormatter initialization with parameter validation * test: add custom cell builder test for HTML formatter with value-based styling * test: enhance DataFrame HTML representation tests for structure and values * feat: enhance DataFrameHtmlFormatter with shared styles support and reset functionality - Added `use_shared_styles` parameter to control loading of styles/scripts. - Implemented logic to conditionally include styles based on `use_shared_styles`. - Updated the constructor to validate `use_shared_styles` as a boolean. - Introduced `reset_styles_loaded_state` function to reset the styles loaded state. - Modified `reset_formatter` to reset the `_styles_loaded` flag. * refactor: update footer comment in DataFrameHtmlFormatter to clarify content * test: enhance HTML representation test to accommodate span-wrapped values * docs: add usage examples to formatter functions in html_formatter.py * test: add HTML formatter tests for shared styles functionality * feat: add method to check if styles are loaded and enhance schema validation in DataFrameHtmlFormatter * refactor: streamline custom cell builder in HTML formatter tests for clarity and maintainability * fix ruff errors * chore: update license header in html_formatter.py for compliance * refactor: improve HTML formatter tests by updating import statements and enhancing regex patterns for body data * fix clippy errors --- python/datafusion/__init__.py | 2 + python/datafusion/html_formatter.py | 647 ++++++++++++++++++++++++++++ python/tests/test_dataframe.py | 396 ++++++++++++++++- src/dataframe.rs | 130 +----- 4 files changed, 1061 insertions(+), 114 deletions(-) create mode 100644 python/datafusion/html_formatter.py diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index ecf5545bc..60d0d61b4 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -45,6 +45,7 @@ Expr, WindowFrame, ) +from .html_formatter import configure_formatter from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream @@ -76,6 +77,7 @@ "col", "column", "common", + "configure_formatter", "expr", "functions", "lit", diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py new file mode 100644 index 000000000..a50e14fd5 --- /dev/null +++ b/python/datafusion/html_formatter.py @@ -0,0 +1,647 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""HTML formatting utilities for DataFusion DataFrames.""" + +from __future__ import annotations + +from typing import ( + Any, + Callable, + Optional, + Protocol, + runtime_checkable, +) + + +@runtime_checkable +class CellFormatter(Protocol): + """Protocol for cell value formatters.""" + + def __call__(self, value: Any) -> str: + """Format a cell value to string representation.""" + ... + + +@runtime_checkable +class StyleProvider(Protocol): + """Protocol for HTML style providers.""" + + def get_cell_style(self) -> str: + """Get the CSS style for table cells.""" + ... + + def get_header_style(self) -> str: + """Get the CSS style for header cells.""" + ... + + +class DefaultStyleProvider: + """Default implementation of StyleProvider.""" + + def get_cell_style(self) -> str: + """Get the CSS style for table cells. + + Returns: + CSS style string + """ + return ( + "border: 1px solid black; padding: 8px; text-align: left; " + "white-space: nowrap;" + ) + + def get_header_style(self) -> str: + """Get the CSS style for header cells. + + Returns: + CSS style string + """ + return ( + "border: 1px solid black; padding: 8px; text-align: left; " + "background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; " + "max-width: fit-content;" + ) + + +class DataFrameHtmlFormatter: + """Configurable HTML formatter for DataFusion DataFrames. + + This class handles the HTML rendering of DataFrames for display in + Jupyter notebooks and other rich display contexts. + + This class supports extension through composition. Key extension points: + - Provide a custom StyleProvider for styling cells and headers + - Register custom formatters for specific types + - Provide custom cell builders for specialized cell rendering + + Args: + max_cell_length: Maximum characters to display in a cell before truncation + max_width: Maximum width of the HTML table in pixels + max_height: Maximum height of the HTML table in pixels + enable_cell_expansion: Whether to add expand/collapse buttons for long cell + values + custom_css: Additional CSS to include in the HTML output + show_truncation_message: Whether to display a message when data is truncated + style_provider: Custom provider for cell and header styles + use_shared_styles: Whether to load styles and scripts only once per notebook + session + """ + + # Class variable to track if styles have been loaded in the notebook + _styles_loaded = False + + def __init__( + self, + max_cell_length: int = 25, + max_width: int = 1000, + max_height: int = 300, + enable_cell_expansion: bool = True, + custom_css: Optional[str] = None, + show_truncation_message: bool = True, + style_provider: Optional[StyleProvider] = None, + use_shared_styles: bool = True, + ) -> None: + """Initialize the HTML formatter. + + Parameters + ---------- + max_cell_length : int, default 25 + Maximum length of cell content before truncation. + max_width : int, default 1000 + Maximum width of the displayed table in pixels. + max_height : int, default 300 + Maximum height of the displayed table in pixels. + enable_cell_expansion : bool, default True + Whether to allow cells to expand when clicked. + custom_css : str, optional + Custom CSS to apply to the HTML table. + show_truncation_message : bool, default True + Whether to show a message indicating that content has been truncated. + style_provider : StyleProvider, optional + Provider of CSS styles for the HTML table. If None, DefaultStyleProvider + is used. + use_shared_styles : bool, default True + Whether to use shared styles across multiple tables. + + Raises: + ------ + ValueError + If max_cell_length, max_width, or max_height is not a positive integer. + TypeError + If enable_cell_expansion, show_truncation_message, or use_shared_styles is + not a boolean, + or if custom_css is provided but is not a string, + or if style_provider is provided but does not implement the StyleProvider + protocol. + """ + # Validate numeric parameters + + if not isinstance(max_cell_length, int) or max_cell_length <= 0: + msg = "max_cell_length must be a positive integer" + raise ValueError(msg) + if not isinstance(max_width, int) or max_width <= 0: + msg = "max_width must be a positive integer" + raise ValueError(msg) + if not isinstance(max_height, int) or max_height <= 0: + msg = "max_height must be a positive integer" + raise ValueError(msg) + + # Validate boolean parameters + if not isinstance(enable_cell_expansion, bool): + msg = "enable_cell_expansion must be a boolean" + raise TypeError(msg) + if not isinstance(show_truncation_message, bool): + msg = "show_truncation_message must be a boolean" + raise TypeError(msg) + if not isinstance(use_shared_styles, bool): + msg = "use_shared_styles must be a boolean" + raise TypeError(msg) + + # Validate custom_css + if custom_css is not None and not isinstance(custom_css, str): + msg = "custom_css must be None or a string" + raise TypeError(msg) + + # Validate style_provider + if style_provider is not None and not isinstance(style_provider, StyleProvider): + msg = "style_provider must implement the StyleProvider protocol" + raise TypeError(msg) + + self.max_cell_length = max_cell_length + self.max_width = max_width + self.max_height = max_height + self.enable_cell_expansion = enable_cell_expansion + self.custom_css = custom_css + self.show_truncation_message = show_truncation_message + self.style_provider = style_provider or DefaultStyleProvider() + self.use_shared_styles = use_shared_styles + # Registry for custom type formatters + self._type_formatters: dict[type, CellFormatter] = {} + # Custom cell builders + self._custom_cell_builder: Optional[Callable[[Any, int, int, str], str]] = None + self._custom_header_builder: Optional[Callable[[Any], str]] = None + + def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: + """Register a custom formatter for a specific data type. + + Args: + type_class: The type to register a formatter for + formatter: Function that takes a value of the given type and returns + a formatted string + """ + self._type_formatters[type_class] = formatter + + def set_custom_cell_builder( + self, builder: Callable[[Any, int, int, str], str] + ) -> None: + """Set a custom cell builder function. + + Args: + builder: Function that takes (value, row, col, table_id) and returns HTML + """ + self._custom_cell_builder = builder + + def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: + """Set a custom header builder function. + + Args: + builder: Function that takes a field and returns HTML + """ + self._custom_header_builder = builder + + @classmethod + def is_styles_loaded(cls) -> bool: + """Check if HTML styles have been loaded in the current session. + + This method is primarily intended for debugging UI rendering issues + related to style loading. + + Returns: + True if styles have been loaded, False otherwise + + Example: + >>> from datafusion.html_formatter import DataFrameHtmlFormatter + >>> DataFrameHtmlFormatter.is_styles_loaded() + False + """ + return cls._styles_loaded + + def format_html( + self, + batches: list, + schema: Any, + has_more: bool = False, + table_uuid: str | None = None, + ) -> str: + """Format record batches as HTML. + + This method is used by DataFrame's _repr_html_ implementation and can be + called directly when custom HTML rendering is needed. + + Args: + batches: List of Arrow RecordBatch objects + schema: Arrow Schema object + has_more: Whether there are more batches not shown + table_uuid: Unique ID for the table, used for JavaScript interactions + + Returns: + HTML string representation of the data + + Raises: + TypeError: If schema is invalid and no batches are provided + """ + if not batches: + return "No data to display" + + # Validate schema + if schema is None or not hasattr(schema, "__iter__"): + msg = "Schema must be provided" + raise TypeError(msg) + + # Generate a unique ID if none provided + table_uuid = table_uuid or f"df-{id(batches)}" + + # Build HTML components + html = [] + + # Only include styles and scripts if: + # 1. Not using shared styles, OR + # 2. Using shared styles but they haven't been loaded yet + include_styles = ( + not self.use_shared_styles or not DataFrameHtmlFormatter._styles_loaded + ) + + if include_styles: + html.extend(self._build_html_header()) + # If we're using shared styles, mark them as loaded + if self.use_shared_styles: + DataFrameHtmlFormatter._styles_loaded = True + + html.extend(self._build_table_container_start()) + + # Add table header and body + html.extend(self._build_table_header(schema)) + html.extend(self._build_table_body(batches, table_uuid)) + + html.append("") + html.append("") + + # Add footer (JavaScript and messages) + if include_styles and self.enable_cell_expansion: + html.append(self._get_javascript()) + + # Always add truncation message if needed (independent of styles) + if has_more and self.show_truncation_message: + html.append("
Data truncated due to size.
") + + return "\n".join(html) + + def _build_html_header(self) -> list[str]: + """Build the HTML header with CSS styles.""" + html = [] + html.append("") + return html + + def _build_table_container_start(self) -> list[str]: + """Build the opening tags for the table container.""" + html = [] + html.append( + f'
' + ) + html.append('') + return html + + def _build_table_header(self, schema: Any) -> list[str]: + """Build the HTML table header with column names.""" + html = [] + html.append("") + html.append("") + for field in schema: + if self._custom_header_builder: + html.append(self._custom_header_builder(field)) + else: + html.append( + f"" + ) + html.append("") + html.append("") + return html + + def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: + """Build the HTML table body with data rows.""" + html = [] + html.append("") + + row_count = 0 + for batch in batches: + for row_idx in range(batch.num_rows): + row_count += 1 + html.append("") + + for col_idx, column in enumerate(batch.columns): + # Get the raw value from the column + raw_value = self._get_cell_value(column, row_idx) + + # Always check for type formatters first to format the value + formatted_value = self._format_cell_value(raw_value) + + # Then apply either custom cell builder or standard cell formatting + if self._custom_cell_builder: + # Pass both the raw value and formatted value to let the + # builder decide + cell_html = self._custom_cell_builder( + raw_value, row_count, col_idx, table_uuid + ) + html.append(cell_html) + else: + # Standard cell formatting with formatted value + if ( + len(str(raw_value)) > self.max_cell_length + and self.enable_cell_expansion + ): + cell_html = self._build_expandable_cell( + formatted_value, row_count, col_idx, table_uuid + ) + else: + cell_html = self._build_regular_cell(formatted_value) + html.append(cell_html) + + html.append("") + + html.append("") + return html + + def _get_cell_value(self, column: Any, row_idx: int) -> Any: + """Extract a cell value from a column. + + Args: + column: Arrow array + row_idx: Row index + + Returns: + The raw cell value + """ + try: + value = column[row_idx] + + if hasattr(value, "as_py"): + return value.as_py() + except (AttributeError, TypeError): + pass + else: + return value + + def _format_cell_value(self, value: Any) -> str: + """Format a cell value for display. + + Uses registered type formatters if available. + + Args: + value: The cell value to format + + Returns: + Formatted cell value as string + """ + # Check for custom type formatters + for type_cls, formatter in self._type_formatters.items(): + if isinstance(value, type_cls): + return formatter(value) + + # If no formatter matched, return string representation + return str(value) + + def _build_expandable_cell( + self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str + ) -> str: + """Build an expandable cell for long content.""" + short_value = str(formatted_value)[: self.max_cell_length] + return ( + f"" + ) + + def _build_regular_cell(self, formatted_value: str) -> str: + """Build a regular table cell.""" + return ( + f"" + ) + + def _build_html_footer(self, has_more: bool) -> list[str]: + """Build the HTML footer with JavaScript and messages.""" + html = [] + + # Add JavaScript for interactivity only if cell expansion is enabled + # and we're not using the shared styles approach + if self.enable_cell_expansion and not self.use_shared_styles: + html.append(self._get_javascript()) + + # Add truncation message if needed + if has_more and self.show_truncation_message: + html.append("
Data truncated due to size.
") + + return html + + def _get_default_css(self) -> str: + """Get default CSS styles for the HTML table.""" + return """ + .expandable-container { + display: inline-block; + max-width: 200px; + } + .expandable { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + display: block; + } + .full-text { + display: none; + white-space: normal; + } + .expand-btn { + cursor: pointer; + color: blue; + text-decoration: underline; + border: none; + background: none; + font-size: inherit; + display: block; + margin-top: 5px; + } + """ + + def _get_javascript(self) -> str: + """Get JavaScript code for interactive elements.""" + return """ + + """ + + +class FormatterManager: + """Manager class for the global DataFrame HTML formatter instance.""" + + _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() + + @classmethod + def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + """ + cls._default_formatter = formatter + _refresh_formatter_reference() + + @classmethod + def get_formatter(cls) -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + Returns: + The global HTML formatter instance + """ + return cls._default_formatter + + +def get_formatter() -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + This function is used by the DataFrame._repr_html_ implementation to access + the shared formatter instance. It can also be used directly when custom + HTML rendering is needed. + + Returns: + The global HTML formatter instance + + Example: + >>> from datafusion.html_formatter import get_formatter + >>> formatter = get_formatter() + >>> formatter.max_cell_length = 50 # Increase cell length + """ + return FormatterManager.get_formatter() + + +def set_formatter(formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + + Example: + >>> from datafusion.html_formatter import get_formatter, set_formatter + >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) + >>> set_formatter(custom_formatter) + """ + FormatterManager.set_formatter(formatter) + + +def configure_formatter(**kwargs: Any) -> None: + """Configure the global DataFrame HTML formatter. + + This function creates a new formatter with the provided configuration + and sets it as the global formatter for all DataFrames. + + Args: + **kwargs: Formatter configuration parameters like max_cell_length, + max_width, max_height, enable_cell_expansion, etc. + + Example: + >>> from datafusion.html_formatter import configure_formatter + >>> configure_formatter( + ... max_cell_length=50, + ... max_height=500, + ... enable_cell_expansion=True, + ... use_shared_styles=True + ... ) + """ + set_formatter(DataFrameHtmlFormatter(**kwargs)) + + +def reset_formatter() -> None: + """Reset the global DataFrame HTML formatter to default settings. + + This function creates a new formatter with default configuration + and sets it as the global formatter for all DataFrames. + + Example: + >>> from datafusion.html_formatter import reset_formatter + >>> reset_formatter() # Reset formatter to default settings + """ + formatter = DataFrameHtmlFormatter() + # Reset the styles_loaded flag to ensure styles will be reloaded + DataFrameHtmlFormatter._styles_loaded = False + set_formatter(formatter) + + +def reset_styles_loaded_state() -> None: + """Reset the styles loaded state to force reloading of styles. + + This can be useful when switching between notebook sessions or + when styles need to be refreshed. + + Example: + >>> from datafusion.html_formatter import reset_styles_loaded_state + >>> reset_styles_loaded_state() # Force styles to reload in next render + """ + DataFrameHtmlFormatter._styles_loaded = False + + +def _refresh_formatter_reference() -> None: + """Refresh formatter reference in any modules using it. + + This helps ensure that changes to the formatter are reflected in existing + DataFrames that might be caching the formatter reference. + """ + # This is a no-op but signals modules to refresh their reference diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index eda13930d..464b884db 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -28,8 +28,17 @@ column, literal, ) -from datafusion import functions as f +from datafusion import ( + functions as f, +) from datafusion.expr import Window +from datafusion.html_formatter import ( + DataFrameHtmlFormatter, + configure_formatter, + get_formatter, + reset_formatter, + reset_styles_loaded_state, +) from pyarrow.csv import write_csv @@ -102,6 +111,12 @@ def partitioned_df(): return ctx.create_dataframe([[batch]]) +@pytest.fixture +def clean_formatter_state(): + """Reset the HTML formatter after each test.""" + reset_formatter() + + def test_select(df): df_1 = df.select( column("a") + column("b"), @@ -656,6 +671,252 @@ def test_window_frame_defaults_match_postgres(partitioned_df): assert df_2.sort(col_a).to_pydict() == expected +def test_html_formatter_configuration(df, clean_formatter_state): + """Test configuring the HTML formatter with different options.""" + # Configure with custom settings + configure_formatter( + max_cell_length=5, + max_width=500, + max_height=200, + enable_cell_expansion=False, + ) + + html_output = df._repr_html_() + + # Verify our configuration was applied + assert "max-height: 200px" in html_output + assert "max-width: 500px" in html_output + # With cell expansion disabled, we shouldn't see expandable-container elements + assert "expandable-container" not in html_output + + +def test_html_formatter_custom_style_provider(df, clean_formatter_state): + """Test using custom style providers with the HTML formatter.""" + + class CustomStyleProvider: + def get_cell_style(self) -> str: + return ( + "background-color: #f5f5f5; color: #333; padding: 8px; border: " + "1px solid #ddd;" + ) + + def get_header_style(self) -> str: + return ( + "background-color: #4285f4; color: white; font-weight: bold; " + "padding: 10px; border: 1px solid #3367d6;" + ) + + # Configure with custom style provider + configure_formatter(style_provider=CustomStyleProvider()) + + html_output = df._repr_html_() + + # Verify our custom styles were applied + assert "background-color: #4285f4" in html_output + assert "color: white" in html_output + assert "background-color: #f5f5f5" in html_output + + +def test_html_formatter_type_formatters(df, clean_formatter_state): + """Test registering custom type formatters for specific data types.""" + + # Get current formatter and register custom formatters + formatter = get_formatter() + + # Format integers with color based on value + # Using int as the type for the formatter will work since we convert + # Arrow scalar values to Python native types in _get_cell_value + def format_int(value): + return f' 2 else "blue"}">{value}' + + formatter.register_formatter(int, format_int) + + html_output = df._repr_html_() + + # Our test dataframe has values 1,2,3 so we should see: + assert '1' in html_output + + +def test_html_formatter_custom_cell_builder(df, clean_formatter_state): + """Test using a custom cell builder function.""" + + # Create a custom cell builder with distinct styling for different value ranges + def custom_cell_builder(value, row, col, table_id): + try: + num_value = int(value) + if num_value > 5: # Values > 5 get green background with indicator + return ( + '' + ) + if num_value < 3: # Values < 3 get blue background with indicator + return ( + '' + ) + except (ValueError, TypeError): + pass + + # Default styling for other cells (3, 4, 5) + return f'' + + # Set our custom cell builder + formatter = get_formatter() + formatter.set_custom_cell_builder(custom_cell_builder) + + html_output = df._repr_html_() + + # Extract cells with specific styling using regex + low_cells = re.findall( + r'', html_output + ) + mid_cells = re.findall( + r'', html_output + ) + high_cells = re.findall( + r'', html_output + ) + + # Sort the extracted values for consistent comparison + low_cells = sorted(map(int, low_cells)) + mid_cells = sorted(map(int, mid_cells)) + high_cells = sorted(map(int, high_cells)) + + # Verify specific values have the correct styling applied + assert low_cells == [1, 2] # Values < 3 + assert mid_cells == [3, 4, 5, 5] # Values 3-5 + assert high_cells == [6, 8, 8] # Values > 5 + + # Verify the exact content with styling appears in the output + assert ( + '' + in html_output + ) + assert ( + '' + in html_output + ) + assert ( + '' in html_output + ) + assert ( + '' in html_output + ) + assert ( + '' + in html_output + ) + assert ( + '' + in html_output + ) + + # Count occurrences to ensure all cells are properly styled + assert html_output.count("-low") == 2 # Two low values (1, 2) + assert html_output.count("-mid") == 4 # Four mid values (3, 4, 5, 5) + assert html_output.count("-high") == 3 # Three high values (6, 8, 8) + + # Create a custom cell builder that changes background color based on value + def custom_cell_builder(value, row, col, table_id): + # Handle numeric values regardless of their exact type + try: + num_value = int(value) + if num_value > 5: # Values > 5 get green background + return f'' + if num_value < 3: # Values < 3 get light blue background + return f'' + except (ValueError, TypeError): + pass + + # Default styling for other cells + return f'' + + # Set our custom cell builder + formatter = get_formatter() + formatter.set_custom_cell_builder(custom_cell_builder) + + html_output = df._repr_html_() + + # Verify our custom cell styling was applied + assert "background-color: #d3e9f0" in html_output # For values 1,2 + + +def test_html_formatter_custom_header_builder(df, clean_formatter_state): + """Test using a custom header builder function.""" + + # Create a custom header builder with tooltips + def custom_header_builder(field): + tooltips = { + "a": "Primary key column", + "b": "Secondary values", + "c": "Additional data", + } + tooltip = tooltips.get(field.name, "") + return ( + f'' + ) + + # Set our custom header builder + formatter = get_formatter() + formatter.set_custom_header_builder(custom_header_builder) + + html_output = df._repr_html_() + + # Verify our custom headers were applied + assert 'title="Primary key column"' in html_output + assert 'title="Secondary values"' in html_output + assert "background-color: #333; color: white" in html_output + + +def test_html_formatter_complex_customization(df, clean_formatter_state): + """Test combining multiple customization options together.""" + + # Create a dark mode style provider + class DarkModeStyleProvider: + def get_cell_style(self) -> str: + return ( + "background-color: #222; color: #eee; " + "padding: 8px; border: 1px solid #444;" + ) + + def get_header_style(self) -> str: + return ( + "background-color: #111; color: #fff; padding: 10px; " + "border: 1px solid #333;" + ) + + # Configure with dark mode style + configure_formatter( + max_cell_length=10, + style_provider=DarkModeStyleProvider(), + custom_css=""" + .datafusion-table { + font-family: monospace; + border-collapse: collapse; + } + .datafusion-table tr:hover td { + background-color: #444 !important; + } + """, + ) + + # Add type formatters for special formatting - now working with native int values + formatter = get_formatter() + formatter.register_formatter( + int, + lambda n: f'{n}', + ) + + html_output = df._repr_html_() + + # Verify our customizations were applied + assert "background-color: #222" in html_output + assert "background-color: #111" in html_output + assert ".datafusion-table" in html_output + assert "color: #5af" in html_output # Even numbers + + def test_get_dataframe(tmp_path): ctx = SessionContext() @@ -1244,7 +1505,10 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: assert result["new_col"] == [3 for _i in range(3)] -def test_dataframe_repr_html(df) -> None: +def test_dataframe_repr_html_structure(df) -> None: + """Test that DataFrame._repr_html_ produces expected HTML output structure.""" + import re + output = df._repr_html_() # Since we've added a fair bit of processing to the html output, lets just verify @@ -1255,9 +1519,131 @@ def test_dataframe_repr_html(df) -> None: headers = ["a", "b", "c"] headers = [f"{v}" for v in headers] header_pattern = "(.*?)".join(headers) - assert len(re.findall(header_pattern, output, re.DOTALL)) == 1 + header_matches = re.findall(header_pattern, output, re.DOTALL) + assert len(header_matches) == 1 + # Update the pattern to handle values that may be wrapped in spans body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] - body_lines = [f"{v}" for inner in body_data for v in inner] + + body_lines = [ + f"(?:]*?>)?{v}(?:)?" + for inner in body_data + for v in inner + ] body_pattern = "(.*?)".join(body_lines) - assert len(re.findall(body_pattern, output, re.DOTALL)) == 1 + + body_matches = re.findall(body_pattern, output, re.DOTALL) + + assert len(body_matches) == 1, "Expected pattern of values not found in HTML output" + + +def test_dataframe_repr_html_values(df): + """Test that DataFrame._repr_html_ contains the expected data values.""" + html = df._repr_html_() + assert html is not None + + # Create a more flexible pattern that handles values being wrapped in spans + # This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless + # of formatting + pattern = re.compile( + r"]*?>(?:]*?>)?1(?:)?.*?" + r"]*?>(?:]*?>)?4(?:)?.*?" + r"]*?>(?:]*?>)?8(?:)?.*?" + r"]*?>(?:]*?>)?2(?:)?.*?" + r"]*?>(?:]*?>)?5(?:)?.*?" + r"]*?>(?:]*?>)?5(?:)?.*?" + r"]*?>(?:]*?>)?3(?:)?.*?" + r"]*?>(?:]*?>)?6(?:)?.*?" + r"]*?>(?:]*?>)?8(?:)?", + re.DOTALL, + ) + + # Print debug info if the test fails + matches = re.findall(pattern, html) + if not matches: + print(f"HTML output snippet: {html[:500]}...") # noqa: T201 + + assert len(matches) > 0, "Expected pattern of values not found in HTML output" + + +def test_html_formatter_shared_styles(df, clean_formatter_state): + """Test that shared styles work correctly across multiple tables.""" + + # First, ensure we're using shared styles + configure_formatter(use_shared_styles=True) + + # Get HTML output for first table - should include styles + html_first = df._repr_html_() + + # Verify styles are included in first render + assert " + // Convert record batches to PyObject list + let py_batches = batches + .into_iter() + .map(|rb| rb.to_pyarrow(py)) + .collect::>>()?; -
-
" + f"{field.name}
" + f"
" + "" + "" + f"{formatted_value}" + f"" + f"
" + f"
{formatted_value}{value}-high{value}-low{value}-mid]*>(\d+)-low]*>(\d+)-mid]*>(\d+)-high1-low2-low3-mid4-mid6-high8-high{value}{value}{value}{field.name}
- \n".to_string(); + let py_schema = self.schema().into_pyobject(py)?; - let schema = batches[0].schema(); + // Get the Python formatter module and call format_html + let formatter_module = py.import("datafusion.html_formatter")?; + let get_formatter = formatter_module.getattr("get_formatter")?; + let formatter = get_formatter.call0()?; - let mut header = Vec::new(); - for field in schema.fields() { - header.push(format!("", field.name())); - } - let header_str = header.join(""); - html_str.push_str(&format!("{}\n", header_str)); - - let batch_formatters = batches - .iter() - .map(|batch| { - batch - .columns() - .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) - .map(|c| { - c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) - }) - .collect::, _>>() - }) - .collect::, _>>()?; - - let rows_per_batch = batches.iter().map(|batch| batch.num_rows()); - - // We need to build up row by row for html - let mut table_row = 0; - for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) { - for batch_row in 0..num_rows_in_batch { - table_row += 1; - let mut cells = Vec::new(); - for (col, formatter) in batch_formatter.iter().enumerate() { - let cell_data = formatter.value(batch_row).to_string(); - // From testing, primitive data types do not typically get larger than 21 characters - if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE { - let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE]; - cells.push(format!(" - ")); - } else { - cells.push(format!("", formatter.value(batch_row))); - } - } - let row_str = cells.join(""); - html_str.push_str(&format!("{}\n", row_str)); - } - } - html_str.push_str("
{}
-
- {short_cell_data} - {cell_data} - -
-
{}
\n"); - - html_str.push_str(" - - "); + // Call format_html method on the formatter + let kwargs = pyo3::types::PyDict::new(py); + let py_batches_list = PyList::new(py, py_batches.as_slice())?; + kwargs.set_item("batches", py_batches_list)?; + kwargs.set_item("schema", py_schema)?; + kwargs.set_item("has_more", has_more)?; + kwargs.set_item("table_uuid", table_uuid)?; - if has_more { - html_str.push_str("Data truncated due to size."); - } + let html_result = formatter.call_method("format_html", (), Some(&kwargs))?; + let html_str: String = html_result.extract()?; Ok(html_str) } @@ -835,7 +747,7 @@ fn record_batch_into_schema( ) -> Result { let schema = Arc::new(schema.clone()); let base_schema = record_batch.schema(); - if base_schema.fields().len() == 0 { + if base_schema.fields().is_empty() { // Nothing to project return Ok(RecordBatch::new_empty(schema)); } From d0d14f6e1584f9569cbf2e36c8a7abc7c70fd903 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 24 Apr 2025 09:38:38 -0400 Subject: [PATCH 069/248] feat: update datafusion dependency 47 (#1107) * Update cargo to use DF47 release candidate * Need to be explicit for collection of Expr due to change in dataframe API * Add missing enum variant * Add missing enum variants * The interface for last_value of aggregates upstream changed * Cargo fmt * last value aggregate without ordering is ill defined * Clippy warning * Set datafusion version to 47 now that it is released --- Cargo.lock | 600 +++++++++++++++++++------------ Cargo.toml | 24 +- python/tests/test_aggregation.py | 1 - src/dataframe.rs | 2 +- src/dataset_exec.rs | 4 +- src/expr.rs | 16 +- src/functions.rs | 37 +- 7 files changed, 415 insertions(+), 269 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f90038c50..b32d19d4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84ef243634a39fb6e9d1710737e7a5ef96c9bacabd2326859ff889bc9ef755e5" +checksum = "3095aaf545942ff5abd46654534f15b03a90fba78299d661e045e5d587222f0d" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f420c6aef51dad2e4a96ce29c0ec90ad84880bdb60b321c74c652a6be07b93f" +checksum = "00752064ff47cee746e816ddb8450520c3a52cbad1e256f6fa861a35f86c45e7" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24bda5ff6461a4ff9739959b3d57b377f45e3f878f7be1a4f28137c0a8f339fa" +checksum = "cebfe926794fbc1f49ddd0cdaf898956ca9f6e79541efce62dabccfd81380472" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6ed265c73f134a583d02c3cab5e16afab9446d8048ede8707e31f85fad58a0" +checksum = "0303c7ec4cf1a2c60310fc4d6bbc3350cd051a17bf9e9c0a8e47b4db79277824" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01c648572391edcef10e5fd458db70ba27ed6f71bcaee04397d0cfb100b34f8b" +checksum = "335f769c5a218ea823d3760a743feba1ef7857cba114c01399a891c2fff34285" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a02fb265a6d8011a7d3ad1a36f25816ad0a3bb04cb8e9fe7929c165b98c0cbcd" +checksum = "510db7dfbb4d5761826516cc611d97b3a68835d0ece95b034a052601109c0b1b" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f2cebf504bb6a92a134a87fff98f01b14fbb3a93ecf7aef90cd0f888c5fffa4" +checksum = "e8affacf3351a24039ea24adab06f316ded523b6f8c3dbe28fbac5f18743451b" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6405b287671c88846e7751f7291f717b164911474cabac6d3d8614d5aa7374" +checksum = "69880a9e6934d9cba2b8630dd08a3463a91db8693b16b499d54026b6137af284" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5329bf9e7390cbb6b117ddd4d82e94c5362ea4cab5095697139429f36a38350c" +checksum = "d8dafd17a05449e31e0114d740530e0ada7379d7cb9c338fd65b09a8130960b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -328,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e103c13d4b80da28339c1d7aa23dd85bd59f42158acc45d39eeb6770627909ce" +checksum = "895644523af4e17502d42c3cb6b27cb820f0cb77954c22d75c23a85247c849e1" dependencies = [ "arrow-array", "arrow-buffer", @@ -341,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170549a11b8534f3097a0619cfe89c42812345dc998bcf81128fc700b84345b8" +checksum = "9be8a2a4e5e7d9c822b2b8095ecd77010576d824f654d347817640acfc97d229" dependencies = [ "arrow-array", "arrow-buffer", @@ -354,18 +354,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c53775bba63f319189f366d2b86e9a8889373eb198f07d8544938fc9f8ed9a" +checksum = "7450c76ab7c5a6805be3440dc2e2096010da58f7cab301fdc996a4ee3ee74e49" dependencies = [ - "bitflags 2.8.0", + "bitflags", ] [[package]] name = "arrow-select" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a99003b2eb562b8d9c99dfb672306f15e94b20d3734179d596895703e821dcf" +checksum = "aa5f5a93c75f46ef48e4001535e7b6c922eeb0aa20b73cf58d09e13d057490d8" dependencies = [ "ahash", "arrow-array", @@ -377,9 +377,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fdb130ee8325f4cd8262e19bb6baa3cbcef2b2573c4bee8c6fda7ea08199d7" +checksum = "6e7005d858d84b56428ba2a98a107fe88c0132c61793cf6b8232a1f9bfc0452b" dependencies = [ "arrow-array", "arrow-buffer", @@ -406,11 +406,11 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.18" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2 0.4.4", + "bzip2 0.5.2", "flate2", "futures-core", "memchr", @@ -438,18 +438,18 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "async-trait" -version = "0.1.86" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -502,9 +502,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bigdecimal" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" dependencies = [ "autocfg", "libm", @@ -514,12 +514,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.8.0" @@ -537,9 +531,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" +checksum = "389a099b34312839e16420d499a9cad9650541715937ffbdd40d36f49e77eeb3" dependencies = [ "arrayref", "arrayvec", @@ -608,21 +602,20 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" dependencies = [ "bzip2-sys", ] [[package]] name = "bzip2-sys" -version = "0.1.12+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -866,23 +859,26 @@ dependencies = [ [[package]] name = "datafusion" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" +checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" dependencies = [ - "apache-avro", "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.5.1", + "bzip2 0.5.2", "chrono", "datafusion-catalog", "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-avro", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", "datafusion-execution", "datafusion-expr", "datafusion-expr-common", @@ -897,12 +893,12 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-session", "datafusion-sql", "flate2", "futures", "itertools 0.14.0", "log", - "num-traits", "object_store", "parking_lot", "parquet", @@ -919,29 +915,35 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" +checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" dependencies = [ "arrow", "async-trait", "dashmap", "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", + "datafusion-session", "datafusion-sql", "futures", "itertools 0.14.0", "log", + "object_store", "parking_lot", + "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" dependencies = [ "arrow", "async-trait", @@ -953,6 +955,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-session", "futures", "log", "object_store", @@ -961,9 +964,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" +checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" dependencies = [ "ahash", "apache-avro", @@ -986,27 +989,27 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" +checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" dependencies = [ + "futures", "log", "tokio", ] [[package]] name = "datafusion-datasource" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.5.1", + "bzip2 0.5.2", "chrono", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", @@ -1014,13 +1017,16 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-session", "flate2", "futures", "glob", "itertools 0.14.0", "log", "object_store", + "parquet", "rand", + "tempfile", "tokio", "tokio-util", "url", @@ -1028,17 +1034,123 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-avro" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ea5111aab9d3f2a8bff570343cccb03ce4c203875ef5a566b7d6f1eb72559e" +dependencies = [ + "apache-avro", + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "num-traits", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand", + "tokio", +] + [[package]] name = "datafusion-doc" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" +checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" [[package]] name = "datafusion-execution" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" +checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" dependencies = [ "arrow", "dashmap", @@ -1055,9 +1167,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" +checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" dependencies = [ "arrow", "chrono", @@ -1076,9 +1188,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" +checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" dependencies = [ "arrow", "datafusion-common", @@ -1089,12 +1201,13 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d740dd9f32a4f4ed1b907e6934201bb059efe6c877532512c661771d973c7b21" +checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c" dependencies = [ "abi_stable", "arrow", + "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1108,9 +1221,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" +checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" dependencies = [ "arrow", "arrow-buffer", @@ -1137,9 +1250,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" +checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" dependencies = [ "ahash", "arrow", @@ -1158,9 +1271,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" +checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" dependencies = [ "ahash", "arrow", @@ -1171,9 +1284,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" +checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" dependencies = [ "arrow", "arrow-ord", @@ -1192,9 +1305,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" +checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" dependencies = [ "arrow", "async-trait", @@ -1208,9 +1321,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" +checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1225,9 +1338,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" +checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1235,20 +1348,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" +checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" dependencies = [ "datafusion-expr", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "datafusion-optimizer" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" +checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" dependencies = [ "arrow", "chrono", @@ -1265,9 +1378,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" +checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" dependencies = [ "ahash", "arrow", @@ -1287,9 +1400,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" +checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" dependencies = [ "ahash", "arrow", @@ -1301,9 +1414,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" +checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" dependencies = [ "arrow", "datafusion-common", @@ -1320,9 +1433,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" +checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" dependencies = [ "ahash", "arrow", @@ -1350,9 +1463,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f6ef4c6eb52370cb48639e25e2331a415aac0b2b0a0a472b36e26603bdf184f" +checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83" dependencies = [ "arrow", "chrono", @@ -1366,9 +1479,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5faf4a9bbb0d0a305fea8a6db21ba863286b53e53a212e687d2774028dd6f03f" +checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f" dependencies = [ "arrow", "datafusion-common", @@ -1398,11 +1511,35 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-session" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-sql" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" +checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" dependencies = [ "arrow", "bigdecimal", @@ -1417,9 +1554,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c2be3226a683e02cff65181e66e62eba9f812ed0e9b7ec8fe11ac8dabf1a73f" +checksum = "061efc0937f0ce3abb37ed0d56cfa01dd0e654b90e408656d05e846c8b7599fe" dependencies = [ "async-recursion", "async-trait", @@ -1453,7 +1590,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1498,21 +1635,22 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "24.12.23" +version = "25.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" dependencies = [ - "bitflags 1.3.2", + "bitflags", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", + "libz-rs-sys", "miniz_oxide", ] @@ -1593,7 +1731,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1703,9 +1841,9 @@ dependencies = [ [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", @@ -1986,7 +2124,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2012,9 +2150,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -2207,6 +2345,15 @@ dependencies = [ "libc", ] +[[package]] +name = "libz-rs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a" +dependencies = [ + "zlib-rs", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -2241,7 +2388,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ - "twox-hash", + "twox-hash 1.6.3", ] [[package]] @@ -2297,9 +2444,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.4" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -2407,19 +2554,22 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" +checksum = "e9ce831b09395f933addbc56d894d889e4b226eba304d4e7adbab591e26daf1e" dependencies = [ "async-trait", "base64 0.22.1", "bytes", "chrono", + "form_urlencoded", "futures", + "http", + "http-body-util", "httparse", "humantime", "hyper", - "itertools 0.13.0", + "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", @@ -2430,7 +2580,8 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "snafu", + "serde_urlencoded", + "thiserror 2.0.11", "tokio", "tracing", "url", @@ -2483,9 +2634,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94243778210509a5a5e9e012872127180c155d73a9cd6e2df9243d213e81e100" +checksum = "cd31a8290ac5b19f09ad77ee7a1e6a541f1be7674ad410547d5f1eef6eef4a9c" dependencies = [ "ahash", "arrow-array", @@ -2513,7 +2664,7 @@ dependencies = [ "snap", "thrift", "tokio", - "twox-hash", + "twox-hash 2.1.0", "zstd", ] @@ -2658,12 +2809,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.29" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ "proc-macro2", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2692,7 +2843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -2701,7 +2852,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.98", + "syn 2.0.100", "tempfile", ] @@ -2712,10 +2863,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2747,9 +2898,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" +checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229" dependencies = [ "cfg-if", "indoc", @@ -2765,9 +2916,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes" -version = "0.23.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "977dc837525cfd22919ba6a831413854beb7c99a256c03bf8624ad707e45810e" +checksum = "dd0b83dc42f9d41f50d38180dad65f0c99763b65a3ff2a81bf351dd35a1df8bf" dependencies = [ "futures", "once_cell", @@ -2778,9 +2929,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" +checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1" dependencies = [ "once_cell", "target-lexicon", @@ -2788,9 +2939,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" +checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc" dependencies = [ "libc", "pyo3-build-config", @@ -2798,27 +2949,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" +checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "pyo3-macros-backend" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" +checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2891,9 +3042,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -2945,7 +3096,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2954,7 +3105,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.8.0", + "bitflags", ] [[package]] @@ -3104,7 +3255,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.8.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -3198,9 +3349,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", "schemars_derive", @@ -3210,14 +3361,14 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3232,7 +3383,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.8.0", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -3251,9 +3402,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" dependencies = [ "serde", ] @@ -3266,9 +3417,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -3284,13 +3435,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3301,14 +3452,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "serde_json" -version = "1.0.138" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -3325,7 +3476,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3397,27 +3548,6 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" -[[package]] -name = "snafu" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "snap" version = "1.1.1" @@ -3436,9 +3566,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.54.0" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", "recursive", @@ -3453,7 +3583,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3497,14 +3627,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "substrait" -version = "0.53.2" +version = "0.55.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b" +checksum = "048fe52a3664881ccdfdc9bdb0f4e8805f3444ee64abf299d365c54f6a2ffabb" dependencies = [ "heck", "pbjson", @@ -3521,7 +3651,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.98", + "syn 2.0.100", "typify", "walkdir", ] @@ -3545,9 +3675,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -3571,14 +3701,14 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "target-lexicon" -version = "0.12.16" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] name = "tempfile" @@ -3620,7 +3750,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3631,7 +3761,7 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3681,9 +3811,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.43.0" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", @@ -3703,7 +3833,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3718,9 +3848,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" dependencies = [ "bytes", "futures-core", @@ -3775,7 +3905,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3818,6 +3948,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "twox-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" + [[package]] name = "typed-arena" version = "2.0.2" @@ -3841,7 +3977,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3875,7 +4011,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.98", + "syn 2.0.100", "thiserror 2.0.11", "unicode-ident", ] @@ -3893,7 +4029,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.98", + "syn 2.0.100", "typify-impl", ] @@ -4030,7 +4166,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -4065,7 +4201,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4276,7 +4412,7 @@ version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags 2.8.0", + "bitflags", ] [[package]] @@ -4320,7 +4456,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4342,7 +4478,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -4362,7 +4498,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4391,9 +4527,15 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] +[[package]] +name = "zlib-rs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8" + [[package]] name = "zstd" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index bc8639d4c..2c4188bb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,25 +34,25 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.43", features = ["macros", "rt", "rt-multi-thread", "sync"] } -pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } -pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} -arrow = { version = "54.2.1", features = ["pyarrow"] } -datafusion = { version = "46.0.1", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "46.0.1", optional = true } -datafusion-proto = { version = "46.0.1" } -datafusion-ffi = { version = "46.0.1" } +tokio = { version = "1.44", features = ["macros", "rt", "rt-multi-thread", "sync"] } +pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] } +pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]} +arrow = { version = "55.0.0", features = ["pyarrow"] } +datafusion = { version = "47.0.0", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "47.0.0", optional = true } +datafusion-proto = { version = "47.0.0" } +datafusion-ffi = { version = "47.0.0" } prost = "0.13.1" # keep in line with `datafusion-substrait` -uuid = { version = "1.12", features = ["v4"] } +uuid = { version = "1.16", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } -async-trait = "0.1.73" +async-trait = "0.1.88" futures = "0.3" -object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] } +object_store = { version = "0.12.0", features = ["aws", "gcp", "azure", "http"] } url = "2" [build-dependencies] prost-types = "0.13.1" # keep in line with `datafusion-substrait` -pyo3-build-config = "0.23" +pyo3-build-config = "0.24" [lib] name = "datafusion_python" diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 61b1c7d80..49dfb38cf 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -338,7 +338,6 @@ def test_bit_and_bool_fns(df, name, expr, result): ), [7, 9], ), - ("last_value", f.last_value(column("a")), [3, 6]), ( "last_value_ordered", f.last_value(column("a"), order_by=[column("a").sort(ascending=False)]), diff --git a/src/dataframe.rs b/src/dataframe.rs index 9b610b5d7..787f63520 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -216,7 +216,7 @@ impl PyDataFrame { #[pyo3(signature = (*args))] fn select(&self, args: Vec) -> PyDataFusionResult { - let expr = args.into_iter().map(|e| e.into()).collect(); + let expr: Vec = args.into_iter().map(|e| e.into()).collect(); let df = self.df.as_ref().clone().select(expr)?; Ok(Self::new(df)) } diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 445e4fe74..aab8d7566 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -275,7 +275,9 @@ impl DisplayAs for DatasetExec { Python::with_gil(|py| { let number_of_fragments = self.fragments.bind(py).len(); match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { + DisplayFormatType::Default + | DisplayFormatType::Verbose + | DisplayFormatType::TreeRender => { let projected_columns: Vec = self .schema .fields() diff --git a/src/expr.rs b/src/expr.rs index 561170289..fe0e76daa 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -714,9 +714,19 @@ impl PyExpr { | Operator::BitwiseXor | Operator::BitwiseAnd | Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary), - Operator::AtArrow | Operator::ArrowAt => { - Err(py_type_err(format!("Unsupported expr: ${op}"))) - } + Operator::AtArrow + | Operator::ArrowAt + | Operator::Arrow + | Operator::LongArrow + | Operator::HashArrow + | Operator::HashLongArrow + | Operator::AtAt + | Operator::IntegerDivide + | Operator::HashMinus + | Operator::AtQuestion + | Operator::Question + | Operator::QuestionAnd + | Operator::QuestionPipe => Err(py_type_err(format!("Unsupported expr: ${op}"))), }, Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value), diff --git a/src/functions.rs b/src/functions.rs index 9c406b95a..476c2b80e 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -375,27 +375,6 @@ macro_rules! aggregate_function { }; } -macro_rules! aggregate_function_vec_args { - ($NAME: ident) => { - aggregate_function_vec_args!($NAME, expr); - }; - ($NAME: ident, $($arg:ident)*) => { - #[pyfunction] - #[pyo3(signature = ($($arg),*, distinct=None, filter=None, order_by=None, null_treatment=None))] - fn $NAME( - $($arg: PyExpr),*, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option - ) -> PyDataFusionResult { - let agg_fn = functions_aggregate::expr_fn::$NAME(vec![$($arg.into()),*]); - - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) - } - }; -} - /// Generates a [pyo3] wrapper for [datafusion::functions::expr_fn] /// /// These functions have explicit named arguments. @@ -698,8 +677,22 @@ pub fn approx_percentile_cont_with_weight( add_builder_fns_to_aggregate(agg_fn, None, filter, None, None) } -aggregate_function_vec_args!(last_value); +// We handle first_value explicitly because the signature expects an order_by +// https://github.com/apache/datafusion/issues/12376 +#[pyfunction] +#[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] +pub fn last_value( + expr: PyExpr, + distinct: Option, + filter: Option, + order_by: Option>, + null_treatment: Option, +) -> PyDataFusionResult { + // If we initialize the UDAF with order_by directly, then it gets over-written by the builder + let agg_fn = functions_aggregate::expr_fn::last_value(expr.expr, None); + add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) +} // We handle first_value explicitly because the signature expects an order_by // https://github.com/apache/datafusion/issues/12376 #[pyfunction] From c9f15547cb8019068bbf2dc8eaf148d6eb42bd48 Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Fri, 25 Apr 2025 21:01:54 +0800 Subject: [PATCH 070/248] feat: alias with metadata (#1111) * feat: alias with metadata * fmt --- python/datafusion/expr.py | 14 +++++++++++--- python/datafusion/functions.py | 15 ++++++++++++--- python/tests/test_expr.py | 5 +++++ python/tests/test_functions.py | 5 +++++ src/expr.rs | 6 ++++-- src/functions.rs | 9 +++++++-- 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 2697d8143..01e1f3ded 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -406,9 +406,17 @@ def column(value: str) -> Expr: """Creates a new expression representing a column.""" return Expr(expr_internal.RawExpr.column(value)) - def alias(self, name: str) -> Expr: - """Assign a name to the expression.""" - return Expr(self.expr.alias(name)) + def alias(self, name: str, metadata: Optional[dict[str, str]] = None) -> Expr: + """Assign a name to the expression. + + Args: + name: The name to assign to the expression. + metadata: Optional metadata to attach to the expression. + + Returns: + A new expression with the assigned name. + """ + return Expr(self.expr.alias(name, metadata)) def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`. diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 5cf914e16..f430cdf4b 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -372,9 +372,18 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) -def alias(expr: Expr, name: str) -> Expr: - """Creates an alias expression.""" - return Expr(f.alias(expr.expr, name)) +def alias(expr: Expr, name: str, metadata: Optional[dict[str, str]] = None) -> Expr: + """Creates an alias expression with an optional metadata dictionary. + + Args: + expr: The expression to alias + name: The alias name + metadata: Optional metadata to attach to the column + + Returns: + An expression with the given alias + """ + return Expr(f.alias(expr.expr, name, metadata)) def col(name: str) -> Expr: diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 926e69845..dcf75f021 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -247,3 +247,8 @@ def test_fill_null(df): assert result.column(0) == pa.array([1, 2, 100]) assert result.column(1) == pa.array([4, 25, 6]) assert result.column(2) == pa.array([1234, 1234, 8]) + + +def test_alias_with_metadata(df): + df = df.select(col("a").alias("b", {"key": "value"})) + assert df.schema().field("b").metadata == {b"key": b"value"} diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 37f2075f5..90cf01f7e 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -1231,3 +1231,8 @@ def test_between_default(df): actual = df.collect()[0].to_pydict() assert actual == expected + + +def test_alias_with_metadata(df): + df = df.select(f.alias(f.col("a"), "b", {"key": "value"})) + assert df.schema().field("b").metadata == {b"key": b"value"} diff --git a/src/expr.rs b/src/expr.rs index fe0e76daa..7d4aa8798 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -22,6 +22,7 @@ use datafusion::logical_expr::{ }; use pyo3::IntoPyObjectExt; use pyo3::{basic::CompareOp, prelude::*}; +use std::collections::HashMap; use std::convert::{From, Into}; use std::sync::Arc; use window::PyWindowFrame; @@ -275,8 +276,9 @@ impl PyExpr { } /// assign a name to the PyExpr - pub fn alias(&self, name: &str) -> PyExpr { - self.expr.clone().alias(name).into() + #[pyo3(signature = (name, metadata=None))] + pub fn alias(&self, name: &str, metadata: Option>) -> PyExpr { + self.expr.clone().alias_with_metadata(name, metadata).into() } /// Create a sort PyExpr from an existing PyExpr. diff --git a/src/functions.rs b/src/functions.rs index 476c2b80e..caa79b8ad 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; + use datafusion::functions_aggregate::all_default_aggregate_functions; use datafusion::functions_window::all_default_window_functions; use datafusion::logical_expr::expr::WindowFunctionParams; @@ -205,10 +207,13 @@ fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult /// Creates a new Alias Expr #[pyfunction] -fn alias(expr: PyExpr, name: &str) -> PyResult { +#[pyo3(signature = (expr, name, metadata=None))] +fn alias(expr: PyExpr, name: &str, metadata: Option>) -> PyResult { let relation: Option = None; Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Alias(Alias::new(expr.expr, relation, name)), + expr: datafusion::logical_expr::Expr::Alias( + Alias::new(expr.expr, relation, name).with_metadata(metadata), + ), }) } From 91b66351fb19d91b62e8db83444141743b106e43 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sun, 27 Apr 2025 21:41:01 +0800 Subject: [PATCH 071/248] Add DataFrame usage guide with HTML rendering customization options (#1108) * docs: enhance user guide with detailed DataFrame operations and examples * move /docs/source/api/dataframe.rst into user-guide * docs: remove DataFrame API documentation * docs: fix formatting inconsistencies in DataFrame user guide * Two minor corrections to documentation rendering --------- Co-authored-by: Tim Saucer --- docs/source/index.rst | 1 + docs/source/user-guide/basics.rst | 5 +- docs/source/user-guide/dataframe.rst | 179 +++++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 docs/source/user-guide/dataframe.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 558b2d572..c18793822 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -72,6 +72,7 @@ Example user-guide/introduction user-guide/basics user-guide/data-sources + user-guide/dataframe user-guide/common-operations/index user-guide/io/index user-guide/configuration diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index 6636c0c6a..2975d9a6b 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -21,7 +21,8 @@ Concepts ======== In this section, we will cover a basic example to introduce a few key concepts. We will use the -2021 Yellow Taxi Trip Records ([download](https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet)), from the [TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page). +2021 Yellow Taxi Trip Records (`download `_), +from the `TLC Trip Record Data `_. .. ipython:: python @@ -72,6 +73,8 @@ DataFrames are typically created by calling a method on :py:class:`~datafusion.c calling the transformation methods, such as :py:func:`~datafusion.dataframe.DataFrame.filter`, :py:func:`~datafusion.dataframe.DataFrame.select`, :py:func:`~datafusion.dataframe.DataFrame.aggregate`, and :py:func:`~datafusion.dataframe.DataFrame.limit` to build up a query definition. +For more details on working with DataFrames, including visualization options and conversion to other formats, see :doc:`dataframe`. + Expressions ----------- diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst new file mode 100644 index 000000000..a78fd8073 --- /dev/null +++ b/docs/source/user-guide/dataframe.rst @@ -0,0 +1,179 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +DataFrames +========== + +Overview +-------- + +DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. +It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust +and Arrow. + +A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation. +The actual execution happens when terminal operations like ``collect()`` or ``show()`` are called. + +Basic Usage +----------- + +.. code-block:: python + + import datafusion + from datafusion import col, lit + + # Create a context and register a data source + ctx = datafusion.SessionContext() + ctx.register_csv("my_table", "path/to/data.csv") + + # Create and manipulate a DataFrame + df = ctx.sql("SELECT * FROM my_table") + + # Or use the DataFrame API directly + df = (ctx.table("my_table") + .filter(col("age") > lit(25)) + .select([col("name"), col("age")])) + + # Execute and collect results + result = df.collect() + + # Display the first few rows + df.show() + +HTML Rendering +-------------- + +When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will +automatically display as formatted HTML tables, making it easier to visualize your data. + +The ``_repr_html_`` method is called automatically by Jupyter to render a DataFrame. This method +controls how DataFrames appear in notebook environments, providing a richer visualization than +plain text output. + +Customizing HTML Rendering +-------------------------- + +You can customize how DataFrames are rendered in HTML by configuring the formatter: + +.. code-block:: python + + from datafusion.html_formatter import configure_formatter + + # Change the default styling + configure_formatter( + max_rows=50, # Maximum number of rows to display + max_width=None, # Maximum width in pixels (None for auto) + theme="light", # Theme: "light" or "dark" + precision=2, # Floating point precision + thousands_separator=",", # Separator for thousands + date_format="%Y-%m-%d", # Date format + truncate_width=20 # Max width for string columns before truncating + ) + +The formatter settings affect all DataFrames displayed after configuration. + +Custom Style Providers +---------------------- + +For advanced styling needs, you can create a custom style provider: + +.. code-block:: python + + from datafusion.html_formatter import StyleProvider, configure_formatter + + class MyStyleProvider(StyleProvider): + def get_table_styles(self): + return { + "table": "border-collapse: collapse; width: 100%;", + "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;", + "td": "border: 1px solid #ddd; padding: 8px;", + "tr:nth-child(even)": "background-color: #f2f2f2;", + } + + def get_value_styles(self, dtype, value): + """Return custom styles for specific values""" + if dtype == "float" and value < 0: + return "color: red;" + return None + + # Apply the custom style provider + configure_formatter(style_provider=MyStyleProvider()) + +Creating a Custom Formatter +--------------------------- + +For complete control over rendering, you can implement a custom formatter: + +.. code-block:: python + + from datafusion.html_formatter import Formatter, get_formatter + + class MyFormatter(Formatter): + def format_html(self, batches, schema, has_more=False, table_uuid=None): + # Create your custom HTML here + html = "
" + # ... formatting logic ... + html += "
" + return html + + # Set as the global formatter + configure_formatter(formatter_class=MyFormatter) + + # Or use the formatter just for specific operations + formatter = get_formatter() + custom_html = formatter.format_html(batches, schema) + +Managing Formatters +------------------- + +Reset to default formatting: + +.. code-block:: python + + from datafusion.html_formatter import reset_formatter + + # Reset to default settings + reset_formatter() + +Get the current formatter settings: + +.. code-block:: python + + from datafusion.html_formatter import get_formatter + + formatter = get_formatter() + print(formatter.max_rows) + print(formatter.theme) + +Contextual Formatting +--------------------- + +You can also use a context manager to temporarily change formatting settings: + +.. code-block:: python + + from datafusion.html_formatter import formatting_context + + # Default formatting + df.show() + + # Temporarily use different formatting + with formatting_context(max_rows=100, theme="dark"): + df.show() # Will use the temporary settings + + # Back to default formatting + df.show() From 00dea113eb85d54b758eb3451ea448c7b9263c1c Mon Sep 17 00:00:00 2001 From: deanm0000 <37878412+deanm0000@users.noreply.github.com> Date: Sun, 27 Apr 2025 10:14:54 -0400 Subject: [PATCH 072/248] Improve col class access using __getattr__ Co-authored-by: Tim Saucer --- python/datafusion/__init__.py | 12 ++-------- python/datafusion/col.py | 45 +++++++++++++++++++++++++++++++++++ python/tests/test_expr.py | 23 ++++++++++++++++++ 3 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 python/datafusion/col.py diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 60d0d61b4..15ceefbdb 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,6 +26,8 @@ except ImportError: import importlib_metadata +from datafusion.col import col, column + from . import functions, object_store, substrait, unparser # The following imports are okay to remain as opaque to the user. @@ -95,16 +97,6 @@ ] -def column(value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - -def col(value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - def literal(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/col.py b/python/datafusion/col.py new file mode 100644 index 000000000..1141dc092 --- /dev/null +++ b/python/datafusion/col.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Col class.""" + +from datafusion.expr import Expr + + +class Col: + """Create a column expression. + + This helper class allows an extra syntax of creating columns using the __getattr__ + method. + """ + + def __call__(self, value: str) -> Expr: + """Create a column expression.""" + return Expr.column(value) + + def __getattr__(self, value: str) -> Expr: + """Create a column using attribute syntax.""" + # For autocomplete to work with IPython + if value.startswith("__wrapped__"): + return getattr(type(self), value) + + return Expr.column(value) + + +col: Col = Col() +column: Col = Col() +__all__ = ["col", "column"] diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index dcf75f021..3651b60d6 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -249,6 +249,29 @@ def test_fill_null(df): assert result.column(2) == pa.array([1234, 1234, 8]) +def test_col_getattr(): + ctx = SessionContext() + data = { + "array_values": [[1, 2, 3], [4, 5], [6], []], + "struct_values": [ + {"name": "Alice", "age": 15}, + {"name": "Bob", "age": 14}, + {"name": "Charlie", "age": 13}, + {"name": None, "age": 12}, + ], + } + df = ctx.from_pydict(data, name="table1") + + names = df.select(col.struct_values["name"].alias("name")).collect() + names = [r.as_py() for rs in names for r in rs["name"]] + + array_values = df.select(col.array_values[1].alias("value")).collect() + array_values = [r.as_py() for rs in array_values for r in rs["value"]] + + assert names == ["Alice", "Bob", "Charlie", None] + assert array_values == [2, 5, None, None] + + def test_alias_with_metadata(df): df = df.select(col("a").alias("b", {"key": "value"})) assert df.schema().field("b").metadata == {b"key": b"value"} From 5a7f638286d2397bbce87e0e8197bebb46f26649 Mon Sep 17 00:00:00 2001 From: deanm0000 <37878412+deanm0000@users.noreply.github.com> Date: Sun, 27 Apr 2025 10:17:41 -0400 Subject: [PATCH 073/248] Add expression chaining of single parameter scalar functions --- python/datafusion/expr.py | 289 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 289 insertions(+) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 01e1f3ded..84e9d4ebb 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Optional +import functions as F import pyarrow as pa try: @@ -611,6 +612,294 @@ def over(self, window: Window) -> Expr: ) ) + def asin(self) -> Expr: + """Returns the arc sine or inverse sine of a number.""" + return F.asin(self) + + def array_pop_back(self) -> Expr: + """Returns the array without the last element.""" + return F.array_pop_back(self) + + def reverse(self) -> Expr: + """Reverse the string argument.""" + return F.reverse(self) + + def bit_length(self) -> Expr: + """Returns the number of bits in the string argument.""" + return F.bit_length(self) + + def array_length(self) -> Expr: + """Returns the length of the array.""" + return F.array_length(self) + + def array_ndims(self) -> Expr: + """Returns the number of dimensions of the array.""" + return F.array_ndims(self) + + def to_hex(self) -> Expr: + """Converts an integer to a hexadecimal string.""" + return F.to_hex(self) + + def array_dims(self) -> Expr: + """Returns an array of the array's dimensions.""" + return F.array_dims(self) + + def from_unixtime(self) -> Expr: + """Converts an integer to RFC3339 timestamp format string.""" + return F.from_unixtime(self) + + def array_empty(self) -> Expr: + """Returns a boolean indicating whether the array is empty.""" + return F.array_empty(self) + + def sin(self) -> Expr: + """Returns the sine of the argument.""" + return F.sin(self) + + def log10(self) -> Expr: + """Base 10 logarithm of the argument.""" + return F.log10(self) + + def initcap(self) -> Expr: + """Set the initial letter of each word to capital. + + Converts the first letter of each word in ``string`` to uppercase and the remaining + characters to lowercase. + """ + return F.initcap(self) + + def list_distinct(self) -> Expr: + """Returns distinct values from the array after removing duplicates. + + This is an alias for :py:func:`array_distinct`. + """ + return F.list_distinct(self) + + def iszero(self) -> Expr: + """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" + return F.iszero(self) + + def array_distinct(self) -> Expr: + """Returns distinct values from the array after removing duplicates.""" + return F.array_distinct(self) + + def arrow_typeof(self) -> Expr: + """Returns the Arrow type of the expression.""" + return F.arrow_typeof(self) + + def length(self) -> Expr: + """The number of characters in the ``string``.""" + return F.length(self) + + def lower(self) -> Expr: + """Converts a string to lowercase.""" + return F.lower(self) + + def acos(self) -> Expr: + """Returns the arc cosine or inverse cosine of a number. + + Returns: + -------- + Expr + A new expression representing the arc cosine of the input expression. + """ + return F.acos(self) + + def ascii(self) -> Expr: + """Returns the numeric code of the first character of the argument.""" + return F.ascii(self) + + def sha384(self) -> Expr: + """Computes the SHA-384 hash of a binary string.""" + return F.sha384(self) + + def isnan(self) -> Expr: + """Returns true if a given number is +NaN or -NaN otherwise returns false.""" + return F.isnan(self) + + def degrees(self) -> Expr: + """Converts the argument from radians to degrees.""" + return F.degrees(self) + + def cardinality(self) -> Expr: + """Returns the total number of elements in the array.""" + return F.cardinality(self) + + def sha224(self) -> Expr: + """Computes the SHA-224 hash of a binary string.""" + return F.sha224(self) + + def asinh(self) -> Expr: + """Returns inverse hyperbolic sine.""" + return F.asinh(self) + + def flatten(self) -> Expr: + """Flattens an array of arrays into a single array.""" + return F.flatten(self) + + def exp(self) -> Expr: + """Returns the exponential of the argument.""" + return F.exp(self) + + def abs(self) -> Expr: + """Return the absolute value of a given number. + + Returns: + -------- + Expr + A new expression representing the absolute value of the input expression. + """ + return F.abs(self) + + def btrim(self) -> Expr: + """Removes all characters, spaces by default, from both sides of a string.""" + return F.btrim(self) + + def md5(self) -> Expr: + """Computes an MD5 128-bit checksum for a string expression.""" + return F.md5(self) + + def octet_length(self) -> Expr: + """Returns the number of bytes of a string.""" + return F.octet_length(self) + + def cosh(self) -> Expr: + """Returns the hyperbolic cosine of the argument.""" + return F.cosh(self) + + def radians(self) -> Expr: + """Converts the argument from degrees to radians.""" + return F.radians(self) + + def sqrt(self) -> Expr: + """Returns the square root of the argument.""" + return F.sqrt(self) + + def character_length(self) -> Expr: + """Returns the number of characters in the argument.""" + return F.character_length(self) + + def tanh(self) -> Expr: + """Returns the hyperbolic tangent of the argument.""" + return F.tanh(self) + + def atan(self) -> Expr: + """Returns inverse tangent of a number.""" + return F.atan(self) + + def rtrim(self) -> Expr: + """Removes all characters, spaces by default, from the end of a string.""" + return F.rtrim(self) + + def atanh(self) -> Expr: + """Returns inverse hyperbolic tangent.""" + return F.atanh(self) + + def list_dims(self) -> Expr: + """Returns an array of the array's dimensions. + + This is an alias for :py:func:`array_dims`. + """ + return F.list_dims(self) + + def sha256(self) -> Expr: + """Computes the SHA-256 hash of a binary string.""" + return F.sha256(self) + + def factorial(self) -> Expr: + """Returns the factorial of the argument.""" + return F.factorial(self) + + def acosh(self) -> Expr: + """Returns inverse hyperbolic cosine.""" + return F.acosh(self) + + def floor(self) -> Expr: + """Returns the nearest integer less than or equal to the argument.""" + return F.floor(self) + + def ceil(self) -> Expr: + """Returns the nearest integer greater than or equal to argument.""" + return F.ceil(self) + + def list_length(self) -> Expr: + """Returns the length of the array. + + This is an alias for :py:func:`array_length`. + """ + return F.list_length(self) + + def upper(self) -> Expr: + """Converts a string to uppercase.""" + return F.upper(self) + + def chr(self) -> Expr: + """Converts the Unicode code point to a UTF8 character.""" + return F.chr(self) + + def ln(self) -> Expr: + """Returns the natural logarithm (base e) of the argument.""" + return F.ln(self) + + def tan(self) -> Expr: + """Returns the tangent of the argument.""" + return F.tan(self) + + def array_pop_front(self) -> Expr: + """Returns the array without the first element.""" + return F.array_pop_front(self) + + def cbrt(self) -> Expr: + """Returns the cube root of a number.""" + return F.cbrt(self) + + def sha512(self) -> Expr: + """Computes the SHA-512 hash of a binary string.""" + return F.sha512(self) + + def char_length(self) -> Expr: + """The number of characters in the ``string``.""" + return F.char_length(self) + + def list_ndims(self) -> Expr: + """Returns the number of dimensions of the array. + + This is an alias for :py:func:`array_ndims`. + """ + return F.list_ndims(self) + + def trim(self) -> Expr: + """Removes all characters, spaces by default, from both sides of a string.""" + return F.trim(self) + + def cos(self) -> Expr: + """Returns the cosine of the argument.""" + return F.cos(self) + + def sinh(self) -> Expr: + """Returns the hyperbolic sine of the argument.""" + return F.sinh(self) + + def empty(self) -> Expr: + """This is an alias for :py:func:`array_empty`.""" + return F.empty(self) + + def ltrim(self) -> Expr: + """Removes all characters, spaces by default, from the beginning of a string.""" + return F.ltrim(self) + + def signum(self) -> Expr: + """Returns the sign of the argument (-1, 0, +1).""" + return F.signum(self) + + def log2(self) -> Expr: + """Base 2 logarithm of the argument.""" + return F.log2(self) + + def cot(self) -> Expr: + """Returns the cotangent of the argument.""" + return F.cot(self) + class ExprFuncBuilder: def __init__(self, builder: expr_internal.ExprFuncBuilder) -> None: From 10600fb8fc32eba43b0b0f198325b55c63f8223d Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Mon, 28 Apr 2025 21:25:59 +0800 Subject: [PATCH 074/248] fix: recursive import (#1117) * fix: recursive import * format * format --- python/datafusion/expr.py | 135 +++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 3 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 84e9d4ebb..3750eeb3f 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -24,7 +24,6 @@ from typing import TYPE_CHECKING, Any, ClassVar, Optional -import functions as F import pyarrow as pa try: @@ -614,58 +613,84 @@ def over(self, window: Window) -> Expr: def asin(self) -> Expr: """Returns the arc sine or inverse sine of a number.""" + from . import functions as F + return F.asin(self) def array_pop_back(self) -> Expr: """Returns the array without the last element.""" + from . import functions as F + return F.array_pop_back(self) def reverse(self) -> Expr: """Reverse the string argument.""" + from . import functions as F + return F.reverse(self) def bit_length(self) -> Expr: """Returns the number of bits in the string argument.""" + from . import functions as F + return F.bit_length(self) def array_length(self) -> Expr: """Returns the length of the array.""" + from . import functions as F + return F.array_length(self) def array_ndims(self) -> Expr: """Returns the number of dimensions of the array.""" + from . import functions as F + return F.array_ndims(self) def to_hex(self) -> Expr: """Converts an integer to a hexadecimal string.""" + from . import functions as F + return F.to_hex(self) def array_dims(self) -> Expr: """Returns an array of the array's dimensions.""" + from . import functions as F + return F.array_dims(self) def from_unixtime(self) -> Expr: """Converts an integer to RFC3339 timestamp format string.""" + from . import functions as F + return F.from_unixtime(self) def array_empty(self) -> Expr: """Returns a boolean indicating whether the array is empty.""" + from . import functions as F + return F.array_empty(self) def sin(self) -> Expr: """Returns the sine of the argument.""" + from . import functions as F + return F.sin(self) def log10(self) -> Expr: """Base 10 logarithm of the argument.""" + from . import functions as F + return F.log10(self) def initcap(self) -> Expr: """Set the initial letter of each word to capital. - Converts the first letter of each word in ``string`` to uppercase and the remaining - characters to lowercase. + Converts the first letter of each word in ``string`` + to uppercase and the remaining characters to lowercase. """ + from . import functions as F + return F.initcap(self) def list_distinct(self) -> Expr: @@ -673,26 +698,38 @@ def list_distinct(self) -> Expr: This is an alias for :py:func:`array_distinct`. """ + from . import functions as F + return F.list_distinct(self) def iszero(self) -> Expr: """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" + from . import functions as F + return F.iszero(self) def array_distinct(self) -> Expr: """Returns distinct values from the array after removing duplicates.""" + from . import functions as F + return F.array_distinct(self) def arrow_typeof(self) -> Expr: """Returns the Arrow type of the expression.""" + from . import functions as F + return F.arrow_typeof(self) def length(self) -> Expr: """The number of characters in the ``string``.""" + from . import functions as F + return F.length(self) def lower(self) -> Expr: """Converts a string to lowercase.""" + from . import functions as F + return F.lower(self) def acos(self) -> Expr: @@ -703,42 +740,62 @@ def acos(self) -> Expr: Expr A new expression representing the arc cosine of the input expression. """ + from . import functions as F + return F.acos(self) def ascii(self) -> Expr: """Returns the numeric code of the first character of the argument.""" + from . import functions as F + return F.ascii(self) def sha384(self) -> Expr: """Computes the SHA-384 hash of a binary string.""" + from . import functions as F + return F.sha384(self) def isnan(self) -> Expr: """Returns true if a given number is +NaN or -NaN otherwise returns false.""" + from . import functions as F + return F.isnan(self) def degrees(self) -> Expr: """Converts the argument from radians to degrees.""" + from . import functions as F + return F.degrees(self) def cardinality(self) -> Expr: """Returns the total number of elements in the array.""" + from . import functions as F + return F.cardinality(self) def sha224(self) -> Expr: """Computes the SHA-224 hash of a binary string.""" + from . import functions as F + return F.sha224(self) def asinh(self) -> Expr: """Returns inverse hyperbolic sine.""" + from . import functions as F + return F.asinh(self) def flatten(self) -> Expr: """Flattens an array of arrays into a single array.""" + from . import functions as F + return F.flatten(self) def exp(self) -> Expr: """Returns the exponential of the argument.""" + from . import functions as F + return F.exp(self) def abs(self) -> Expr: @@ -749,50 +806,74 @@ def abs(self) -> Expr: Expr A new expression representing the absolute value of the input expression. """ + from . import functions as F + return F.abs(self) def btrim(self) -> Expr: """Removes all characters, spaces by default, from both sides of a string.""" + from . import functions as F + return F.btrim(self) def md5(self) -> Expr: """Computes an MD5 128-bit checksum for a string expression.""" + from . import functions as F + return F.md5(self) def octet_length(self) -> Expr: """Returns the number of bytes of a string.""" + from . import functions as F + return F.octet_length(self) def cosh(self) -> Expr: """Returns the hyperbolic cosine of the argument.""" + from . import functions as F + return F.cosh(self) def radians(self) -> Expr: """Converts the argument from degrees to radians.""" + from . import functions as F + return F.radians(self) def sqrt(self) -> Expr: """Returns the square root of the argument.""" + from . import functions as F + return F.sqrt(self) def character_length(self) -> Expr: """Returns the number of characters in the argument.""" + from . import functions as F + return F.character_length(self) def tanh(self) -> Expr: """Returns the hyperbolic tangent of the argument.""" + from . import functions as F + return F.tanh(self) def atan(self) -> Expr: """Returns inverse tangent of a number.""" + from . import functions as F + return F.atan(self) def rtrim(self) -> Expr: """Removes all characters, spaces by default, from the end of a string.""" + from . import functions as F + return F.rtrim(self) def atanh(self) -> Expr: """Returns inverse hyperbolic tangent.""" + from . import functions as F + return F.atanh(self) def list_dims(self) -> Expr: @@ -800,26 +881,38 @@ def list_dims(self) -> Expr: This is an alias for :py:func:`array_dims`. """ + from . import functions as F + return F.list_dims(self) def sha256(self) -> Expr: """Computes the SHA-256 hash of a binary string.""" + from . import functions as F + return F.sha256(self) def factorial(self) -> Expr: """Returns the factorial of the argument.""" + from . import functions as F + return F.factorial(self) def acosh(self) -> Expr: """Returns inverse hyperbolic cosine.""" + from . import functions as F + return F.acosh(self) def floor(self) -> Expr: """Returns the nearest integer less than or equal to the argument.""" + from . import functions as F + return F.floor(self) def ceil(self) -> Expr: """Returns the nearest integer greater than or equal to argument.""" + from . import functions as F + return F.ceil(self) def list_length(self) -> Expr: @@ -827,38 +920,56 @@ def list_length(self) -> Expr: This is an alias for :py:func:`array_length`. """ + from . import functions as F + return F.list_length(self) def upper(self) -> Expr: """Converts a string to uppercase.""" + from . import functions as F + return F.upper(self) def chr(self) -> Expr: """Converts the Unicode code point to a UTF8 character.""" + from . import functions as F + return F.chr(self) def ln(self) -> Expr: """Returns the natural logarithm (base e) of the argument.""" + from . import functions as F + return F.ln(self) def tan(self) -> Expr: """Returns the tangent of the argument.""" + from . import functions as F + return F.tan(self) def array_pop_front(self) -> Expr: """Returns the array without the first element.""" + from . import functions as F + return F.array_pop_front(self) def cbrt(self) -> Expr: """Returns the cube root of a number.""" + from . import functions as F + return F.cbrt(self) def sha512(self) -> Expr: """Computes the SHA-512 hash of a binary string.""" + from . import functions as F + return F.sha512(self) def char_length(self) -> Expr: """The number of characters in the ``string``.""" + from . import functions as F + return F.char_length(self) def list_ndims(self) -> Expr: @@ -866,38 +977,56 @@ def list_ndims(self) -> Expr: This is an alias for :py:func:`array_ndims`. """ + from . import functions as F + return F.list_ndims(self) def trim(self) -> Expr: """Removes all characters, spaces by default, from both sides of a string.""" + from . import functions as F + return F.trim(self) def cos(self) -> Expr: """Returns the cosine of the argument.""" + from . import functions as F + return F.cos(self) def sinh(self) -> Expr: """Returns the hyperbolic sine of the argument.""" + from . import functions as F + return F.sinh(self) def empty(self) -> Expr: """This is an alias for :py:func:`array_empty`.""" + from . import functions as F + return F.empty(self) def ltrim(self) -> Expr: """Removes all characters, spaces by default, from the beginning of a string.""" + from . import functions as F + return F.ltrim(self) def signum(self) -> Expr: """Returns the sign of the argument (-1, 0, +1).""" + from . import functions as F + return F.signum(self) def log2(self) -> Expr: """Base 2 logarithm of the argument.""" + from . import functions as F + return F.log2(self) def cot(self) -> Expr: """Returns the cotangent of the argument.""" + from . import functions as F + return F.cot(self) From 6fbeceff6091aee610273d9b27106483f9ce24ea Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 1 May 2025 12:10:40 -0400 Subject: [PATCH 075/248] Copy over protected branch rule from datafusion repo (#1122) --- .asf.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.asf.yaml b/.asf.yaml index e96b43cf0..75b2262de 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -29,6 +29,10 @@ github: rebase: false features: issues: true + protected_branches: + main: + required_pull_request_reviews: + required_approving_review_count: 1 staging: whoami: asf-staging From 15b96c48eb76ad8ea19022df427aa25b06c3012b Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Mon, 5 May 2025 21:43:03 +0800 Subject: [PATCH 076/248] feat: add missing PyLogicalPlan to_variant (#1085) * add expr * format * clippy * add license * update * ruff * Update expr.py * add test * ruff * Minor ruff whitespace change * Minor format change --------- Co-authored-by: Tim Saucer --- python/datafusion/common.py | 6 + python/datafusion/expr.py | 54 +++- python/tests/test_expr.py | 86 ++++++ src/common.rs | 3 + src/common/schema.rs | 89 ++++++ src/expr.rs | 41 +++ src/expr/copy_to.rs | 138 +++++++++ src/expr/create_catalog.rs | 100 +++++++ src/expr/create_catalog_schema.rs | 100 +++++++ src/expr/create_external_table.rs | 183 ++++++++++++ src/expr/create_function.rs | 182 ++++++++++++ src/expr/create_index.rs | 129 +++++++++ src/expr/describe_table.rs | 92 ++++++ src/expr/dml.rs | 136 +++++++++ src/expr/drop_catalog_schema.rs | 116 ++++++++ src/expr/drop_function.rs | 95 +++++++ src/expr/drop_view.rs | 102 +++++++ src/expr/recursive_query.rs | 111 ++++++++ src/expr/statement.rs | 454 ++++++++++++++++++++++++++++++ src/expr/values.rs | 86 ++++++ src/sql/logical.rs | 85 +++++- 21 files changed, 2372 insertions(+), 16 deletions(-) create mode 100644 src/expr/copy_to.rs create mode 100644 src/expr/create_catalog.rs create mode 100644 src/expr/create_catalog_schema.rs create mode 100644 src/expr/create_external_table.rs create mode 100644 src/expr/create_function.rs create mode 100644 src/expr/create_index.rs create mode 100644 src/expr/describe_table.rs create mode 100644 src/expr/dml.rs create mode 100644 src/expr/drop_catalog_schema.rs create mode 100644 src/expr/drop_function.rs create mode 100644 src/expr/drop_view.rs create mode 100644 src/expr/recursive_query.rs create mode 100644 src/expr/statement.rs create mode 100644 src/expr/values.rs diff --git a/python/datafusion/common.py b/python/datafusion/common.py index e762a993b..c689a816d 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -33,8 +33,12 @@ SqlTable = common_internal.SqlTable SqlType = common_internal.SqlType SqlView = common_internal.SqlView +TableType = common_internal.TableType +TableSource = common_internal.TableSource +Constraints = common_internal.Constraints __all__ = [ + "Constraints", "DFSchema", "DataType", "DataTypeMap", @@ -47,6 +51,8 @@ "SqlTable", "SqlType", "SqlView", + "TableSource", + "TableType", ] diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 3750eeb3f..9e58873d0 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -54,14 +54,29 @@ Case = expr_internal.Case Cast = expr_internal.Cast Column = expr_internal.Column +CopyTo = expr_internal.CopyTo +CreateCatalog = expr_internal.CreateCatalog +CreateCatalogSchema = expr_internal.CreateCatalogSchema +CreateExternalTable = expr_internal.CreateExternalTable +CreateFunction = expr_internal.CreateFunction +CreateFunctionBody = expr_internal.CreateFunctionBody +CreateIndex = expr_internal.CreateIndex CreateMemoryTable = expr_internal.CreateMemoryTable CreateView = expr_internal.CreateView +Deallocate = expr_internal.Deallocate +DescribeTable = expr_internal.DescribeTable Distinct = expr_internal.Distinct +DmlStatement = expr_internal.DmlStatement +DropCatalogSchema = expr_internal.DropCatalogSchema +DropFunction = expr_internal.DropFunction DropTable = expr_internal.DropTable +DropView = expr_internal.DropView EmptyRelation = expr_internal.EmptyRelation +Execute = expr_internal.Execute Exists = expr_internal.Exists Explain = expr_internal.Explain Extension = expr_internal.Extension +FileType = expr_internal.FileType Filter = expr_internal.Filter GroupingSet = expr_internal.GroupingSet Join = expr_internal.Join @@ -83,21 +98,31 @@ Literal = expr_internal.Literal Negative = expr_internal.Negative Not = expr_internal.Not +OperateFunctionArg = expr_internal.OperateFunctionArg Partitioning = expr_internal.Partitioning Placeholder = expr_internal.Placeholder +Prepare = expr_internal.Prepare Projection = expr_internal.Projection +RecursiveQuery = expr_internal.RecursiveQuery Repartition = expr_internal.Repartition ScalarSubquery = expr_internal.ScalarSubquery ScalarVariable = expr_internal.ScalarVariable +SetVariable = expr_internal.SetVariable SimilarTo = expr_internal.SimilarTo Sort = expr_internal.Sort Subquery = expr_internal.Subquery SubqueryAlias = expr_internal.SubqueryAlias TableScan = expr_internal.TableScan +TransactionAccessMode = expr_internal.TransactionAccessMode +TransactionConclusion = expr_internal.TransactionConclusion +TransactionEnd = expr_internal.TransactionEnd +TransactionIsolationLevel = expr_internal.TransactionIsolationLevel +TransactionStart = expr_internal.TransactionStart TryCast = expr_internal.TryCast Union = expr_internal.Union Unnest = expr_internal.Unnest UnnestExpr = expr_internal.UnnestExpr +Values = expr_internal.Values WindowExpr = expr_internal.WindowExpr __all__ = [ @@ -111,15 +136,30 @@ "CaseBuilder", "Cast", "Column", + "CopyTo", + "CreateCatalog", + "CreateCatalogSchema", + "CreateExternalTable", + "CreateFunction", + "CreateFunctionBody", + "CreateIndex", "CreateMemoryTable", "CreateView", + "Deallocate", + "DescribeTable", "Distinct", + "DmlStatement", + "DropCatalogSchema", + "DropFunction", "DropTable", + "DropView", "EmptyRelation", + "Execute", "Exists", "Explain", "Expr", "Extension", + "FileType", "Filter", "GroupingSet", "ILike", @@ -142,22 +182,32 @@ "Literal", "Negative", "Not", + "OperateFunctionArg", "Partitioning", "Placeholder", + "Prepare", "Projection", + "RecursiveQuery", "Repartition", "ScalarSubquery", "ScalarVariable", + "SetVariable", "SimilarTo", "Sort", "SortExpr", "Subquery", "SubqueryAlias", "TableScan", + "TransactionAccessMode", + "TransactionConclusion", + "TransactionEnd", + "TransactionIsolationLevel", + "TransactionStart", "TryCast", "Union", "Unnest", "UnnestExpr", + "Values", "Window", "WindowExpr", "WindowFrame", @@ -686,8 +736,8 @@ def log10(self) -> Expr: def initcap(self) -> Expr: """Set the initial letter of each word to capital. - Converts the first letter of each word in ``string`` - to uppercase and the remaining characters to lowercase. + Converts the first letter of each word in ``string`` to uppercase and the + remaining characters to lowercase. """ from . import functions as F diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 3651b60d6..58a202724 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -23,12 +23,21 @@ AggregateFunction, BinaryExpr, Column, + CopyTo, + CreateIndex, + DescribeTable, + DmlStatement, + DropCatalogSchema, Filter, Limit, Literal, Projection, + RecursiveQuery, Sort, TableScan, + TransactionEnd, + TransactionStart, + Values, ) @@ -249,6 +258,83 @@ def test_fill_null(df): assert result.column(2) == pa.array([1234, 1234, 8]) +def test_copy_to(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + df = ctx.sql("COPY foo TO bar STORED AS CSV") + plan = df.logical_plan() + plan = plan.to_variant() + assert isinstance(plan, CopyTo) + + +def test_create_index(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + plan = ctx.sql("create index idx on foo (a)").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, CreateIndex) + + +def test_describe_table(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + plan = ctx.sql("describe foo").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, DescribeTable) + + +def test_dml_statement(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + plan = ctx.sql("insert into foo values (1, 2)").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, DmlStatement) + + +def drop_catalog_schema(): + ctx = SessionContext() + plan = ctx.sql("drop schema cat").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, DropCatalogSchema) + + +def test_recursive_query(): + ctx = SessionContext() + plan = ctx.sql( + """ + WITH RECURSIVE cte AS ( + SELECT 1 as n + UNION ALL + SELECT n + 1 FROM cte WHERE n < 5 + ) + SELECT * FROM cte; + """ + ).logical_plan() + plan = plan.inputs()[0].inputs()[0].to_variant() + assert isinstance(plan, RecursiveQuery) + + +def test_values(): + ctx = SessionContext() + plan = ctx.sql("values (1, 'foo'), (2, 'bar')").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, Values) + + +def test_transaction_start(): + ctx = SessionContext() + plan = ctx.sql("START TRANSACTION").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, TransactionStart) + + +def test_transaction_end(): + ctx = SessionContext() + plan = ctx.sql("COMMIT").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, TransactionEnd) + + def test_col_getattr(): ctx = SessionContext() data = { diff --git a/src/common.rs b/src/common.rs index 453bf67a4..88d2fdd5f 100644 --- a/src/common.rs +++ b/src/common.rs @@ -36,5 +36,8 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/common/schema.rs b/src/common/schema.rs index 66ce925ae..5a54fe333 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -15,14 +15,22 @@ // specific language governing permissions and limitations // under the License. +use std::fmt::{self, Display, Formatter}; +use std::sync::Arc; use std::{any::Any, borrow::Cow}; +use arrow::datatypes::Schema; +use arrow::pyarrow::PyArrowType; use datafusion::arrow::datatypes::SchemaRef; +use datafusion::common::Constraints; +use datafusion::datasource::TableType; use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableSource}; use pyo3::prelude::*; use datafusion::logical_expr::utils::split_conjunction; +use crate::sql::logical::PyLogicalPlan; + use super::{data_type::DataTypeMap, function::SqlFunction}; #[pyclass(name = "SqlSchema", module = "datafusion.common", subclass)] @@ -218,3 +226,84 @@ impl SqlStatistics { self.row_count } } + +#[pyclass(name = "Constraints", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyConstraints { + pub constraints: Constraints, +} + +impl From for Constraints { + fn from(constraints: PyConstraints) -> Self { + constraints.constraints + } +} + +impl From for PyConstraints { + fn from(constraints: Constraints) -> Self { + PyConstraints { constraints } + } +} + +impl Display for PyConstraints { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "Constraints: {:?}", self.constraints) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "TableType", module = "datafusion.common")] +pub enum PyTableType { + Base, + View, + Temporary, +} + +impl From for datafusion::logical_expr::TableType { + fn from(table_type: PyTableType) -> Self { + match table_type { + PyTableType::Base => datafusion::logical_expr::TableType::Base, + PyTableType::View => datafusion::logical_expr::TableType::View, + PyTableType::Temporary => datafusion::logical_expr::TableType::Temporary, + } + } +} + +impl From for PyTableType { + fn from(table_type: TableType) -> Self { + match table_type { + datafusion::logical_expr::TableType::Base => PyTableType::Base, + datafusion::logical_expr::TableType::View => PyTableType::View, + datafusion::logical_expr::TableType::Temporary => PyTableType::Temporary, + } + } +} + +#[pyclass(name = "TableSource", module = "datafusion.common", subclass)] +#[derive(Clone)] +pub struct PyTableSource { + pub table_source: Arc, +} + +#[pymethods] +impl PyTableSource { + pub fn schema(&self) -> PyArrowType { + (*self.table_source.schema()).clone().into() + } + + pub fn constraints(&self) -> Option { + self.table_source.constraints().map(|c| PyConstraints { + constraints: c.clone(), + }) + } + + pub fn table_type(&self) -> PyTableType { + self.table_source.table_type().into() + } + + pub fn get_logical_plan(&self) -> Option { + self.table_source + .get_logical_plan() + .map(|plan| PyLogicalPlan::new(plan.into_owned())) + } +} diff --git a/src/expr.rs b/src/expr.rs index 7d4aa8798..404e575f8 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -67,10 +67,21 @@ pub mod case; pub mod cast; pub mod column; pub mod conditional_expr; +pub mod copy_to; +pub mod create_catalog; +pub mod create_catalog_schema; +pub mod create_external_table; +pub mod create_function; +pub mod create_index; pub mod create_memory_table; pub mod create_view; +pub mod describe_table; pub mod distinct; +pub mod dml; +pub mod drop_catalog_schema; +pub mod drop_function; pub mod drop_table; +pub mod drop_view; pub mod empty_relation; pub mod exists; pub mod explain; @@ -86,18 +97,21 @@ pub mod literal; pub mod logical_node; pub mod placeholder; pub mod projection; +pub mod recursive_query; pub mod repartition; pub mod scalar_subquery; pub mod scalar_variable; pub mod signature; pub mod sort; pub mod sort_expr; +pub mod statement; pub mod subquery; pub mod subquery_alias; pub mod table_scan; pub mod union; pub mod unnest; pub mod unnest_expr; +pub mod values; pub mod window; use sort_expr::{to_sort_expressions, PySortExpr}; @@ -802,5 +816,32 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) } diff --git a/src/expr/copy_to.rs b/src/expr/copy_to.rs new file mode 100644 index 000000000..ebfcb8ebc --- /dev/null +++ b/src/expr/copy_to.rs @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + collections::HashMap, + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::{common::file_options::file_type::FileType, logical_expr::dml::CopyTo}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CopyTo", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCopyTo { + copy: CopyTo, +} + +impl From for CopyTo { + fn from(copy: PyCopyTo) -> Self { + copy.copy + } +} + +impl From for PyCopyTo { + fn from(copy: CopyTo) -> PyCopyTo { + PyCopyTo { copy } + } +} + +impl Display for PyCopyTo { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CopyTo: {:?}", self.copy.output_url) + } +} + +impl LogicalNode for PyCopyTo { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.copy.input).clone())] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyCopyTo { + #[new] + pub fn new( + input: PyLogicalPlan, + output_url: String, + partition_by: Vec, + file_type: PyFileType, + options: HashMap, + ) -> Self { + PyCopyTo { + copy: CopyTo { + input: input.plan(), + output_url, + partition_by, + file_type: file_type.file_type, + options, + }, + } + } + + fn input(&self) -> PyLogicalPlan { + PyLogicalPlan::from((*self.copy.input).clone()) + } + + fn output_url(&self) -> String { + self.copy.output_url.clone() + } + + fn partition_by(&self) -> Vec { + self.copy.partition_by.clone() + } + + fn file_type(&self) -> PyFileType { + PyFileType { + file_type: self.copy.file_type.clone(), + } + } + + fn options(&self) -> HashMap { + self.copy.options.clone() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CopyTo({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CopyTo".to_string()) + } +} + +#[pyclass(name = "FileType", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyFileType { + file_type: Arc, +} + +impl Display for PyFileType { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "FileType: {}", self.file_type) + } +} + +#[pymethods] +impl PyFileType { + fn __repr__(&self) -> PyResult { + Ok(format!("FileType({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("FileType".to_string()) + } +} diff --git a/src/expr/create_catalog.rs b/src/expr/create_catalog.rs new file mode 100644 index 000000000..f4ea0f517 --- /dev/null +++ b/src/expr/create_catalog.rs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateCatalog; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CreateCatalog", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateCatalog { + create: CreateCatalog, +} + +impl From for CreateCatalog { + fn from(create: PyCreateCatalog) -> Self { + create.create + } +} + +impl From for PyCreateCatalog { + fn from(create: CreateCatalog) -> PyCreateCatalog { + PyCreateCatalog { create } + } +} + +impl Display for PyCreateCatalog { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateCatalog: {:?}", self.create.catalog_name) + } +} + +#[pymethods] +impl PyCreateCatalog { + #[new] + pub fn new( + catalog_name: String, + if_not_exists: bool, + schema: PyDFSchema, + ) -> PyResult { + Ok(PyCreateCatalog { + create: CreateCatalog { + catalog_name, + if_not_exists, + schema: Arc::new(schema.into()), + }, + }) + } + + pub fn catalog_name(&self) -> String { + self.create.catalog_name.clone() + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateCatalog({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateCatalog".to_string()) + } +} + +impl LogicalNode for PyCreateCatalog { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_catalog_schema.rs b/src/expr/create_catalog_schema.rs new file mode 100644 index 000000000..85f447e1e --- /dev/null +++ b/src/expr/create_catalog_schema.rs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateCatalogSchema; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CreateCatalogSchema", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateCatalogSchema { + create: CreateCatalogSchema, +} + +impl From for CreateCatalogSchema { + fn from(create: PyCreateCatalogSchema) -> Self { + create.create + } +} + +impl From for PyCreateCatalogSchema { + fn from(create: CreateCatalogSchema) -> PyCreateCatalogSchema { + PyCreateCatalogSchema { create } + } +} + +impl Display for PyCreateCatalogSchema { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateCatalogSchema: {:?}", self.create.schema_name) + } +} + +#[pymethods] +impl PyCreateCatalogSchema { + #[new] + pub fn new( + schema_name: String, + if_not_exists: bool, + schema: PyDFSchema, + ) -> PyResult { + Ok(PyCreateCatalogSchema { + create: CreateCatalogSchema { + schema_name, + if_not_exists, + schema: Arc::new(schema.into()), + }, + }) + } + + pub fn schema_name(&self) -> String { + self.create.schema_name.clone() + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateCatalogSchema({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateCatalogSchema".to_string()) + } +} + +impl LogicalNode for PyCreateCatalogSchema { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_external_table.rs b/src/expr/create_external_table.rs new file mode 100644 index 000000000..01ce7d0ca --- /dev/null +++ b/src/expr/create_external_table.rs @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{common::schema::PyConstraints, expr::PyExpr, sql::logical::PyLogicalPlan}; +use std::{ + collections::HashMap, + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateExternalTable; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::common::df_schema::PyDFSchema; + +use super::{logical_node::LogicalNode, sort_expr::PySortExpr}; + +#[pyclass(name = "CreateExternalTable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateExternalTable { + create: CreateExternalTable, +} + +impl From for CreateExternalTable { + fn from(create: PyCreateExternalTable) -> Self { + create.create + } +} + +impl From for PyCreateExternalTable { + fn from(create: CreateExternalTable) -> PyCreateExternalTable { + PyCreateExternalTable { create } + } +} + +impl Display for PyCreateExternalTable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "CreateExternalTable: {:?}{}", + self.create.name, self.create.constraints + ) + } +} + +#[pymethods] +impl PyCreateExternalTable { + #[allow(clippy::too_many_arguments)] + #[new] + #[pyo3(signature = (schema, name, location, file_type, table_partition_cols, if_not_exists, temporary, order_exprs, unbounded, options, constraints, column_defaults, definition=None))] + pub fn new( + schema: PyDFSchema, + name: String, + location: String, + file_type: String, + table_partition_cols: Vec, + if_not_exists: bool, + temporary: bool, + order_exprs: Vec>, + unbounded: bool, + options: HashMap, + constraints: PyConstraints, + column_defaults: HashMap, + definition: Option, + ) -> Self { + let create = CreateExternalTable { + schema: Arc::new(schema.into()), + name: name.into(), + location, + file_type, + table_partition_cols, + if_not_exists, + temporary, + definition, + order_exprs: order_exprs + .into_iter() + .map(|vec| vec.into_iter().map(|s| s.into()).collect::>()) + .collect::>(), + unbounded, + options, + constraints: constraints.constraints, + column_defaults: column_defaults + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + }; + PyCreateExternalTable { create } + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + pub fn name(&self) -> PyResult { + Ok(self.create.name.to_string()) + } + + pub fn location(&self) -> String { + self.create.location.clone() + } + + pub fn file_type(&self) -> String { + self.create.file_type.clone() + } + + pub fn table_partition_cols(&self) -> Vec { + self.create.table_partition_cols.clone() + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn temporary(&self) -> bool { + self.create.temporary + } + + pub fn definition(&self) -> Option { + self.create.definition.clone() + } + + pub fn order_exprs(&self) -> Vec> { + self.create + .order_exprs + .iter() + .map(|vec| vec.iter().map(|s| s.clone().into()).collect()) + .collect() + } + + pub fn unbounded(&self) -> bool { + self.create.unbounded + } + + pub fn options(&self) -> HashMap { + self.create.options.clone() + } + + pub fn constraints(&self) -> PyConstraints { + PyConstraints { + constraints: self.create.constraints.clone(), + } + } + + pub fn column_defaults(&self) -> HashMap { + self.create + .column_defaults + .iter() + .map(|(k, v)| (k.clone(), v.clone().into())) + .collect() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateExternalTable({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateExternalTable".to_string()) + } +} + +impl LogicalNode for PyCreateExternalTable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_function.rs b/src/expr/create_function.rs new file mode 100644 index 000000000..6f3c3f0ff --- /dev/null +++ b/src/expr/create_function.rs @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::{ + CreateFunction, CreateFunctionBody, OperateFunctionArg, Volatility, +}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use super::logical_node::LogicalNode; +use super::PyExpr; +use crate::common::{data_type::PyDataType, df_schema::PyDFSchema}; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "CreateFunction", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateFunction { + create: CreateFunction, +} + +impl From for CreateFunction { + fn from(create: PyCreateFunction) -> Self { + create.create + } +} + +impl From for PyCreateFunction { + fn from(create: CreateFunction) -> PyCreateFunction { + PyCreateFunction { create } + } +} + +impl Display for PyCreateFunction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateFunction: name {:?}", self.create.name) + } +} + +#[pyclass(name = "OperateFunctionArg", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyOperateFunctionArg { + arg: OperateFunctionArg, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "Volatility", module = "datafusion.expr")] +pub enum PyVolatility { + Immutable, + Stable, + Volatile, +} + +#[pyclass(name = "CreateFunctionBody", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateFunctionBody { + body: CreateFunctionBody, +} + +#[pymethods] +impl PyCreateFunctionBody { + pub fn language(&self) -> Option { + self.body + .language + .as_ref() + .map(|language| language.to_string()) + } + + pub fn behavior(&self) -> Option { + self.body.behavior.as_ref().map(|behavior| match behavior { + Volatility::Immutable => PyVolatility::Immutable, + Volatility::Stable => PyVolatility::Stable, + Volatility::Volatile => PyVolatility::Volatile, + }) + } + + pub fn function_body(&self) -> Option { + self.body + .function_body + .as_ref() + .map(|function_body| function_body.clone().into()) + } +} + +#[pymethods] +impl PyCreateFunction { + #[new] + #[pyo3(signature = (or_replace, temporary, name, params, schema, return_type=None, args=None))] + pub fn new( + or_replace: bool, + temporary: bool, + name: String, + params: PyCreateFunctionBody, + schema: PyDFSchema, + return_type: Option, + args: Option>, + ) -> Self { + PyCreateFunction { + create: CreateFunction { + or_replace, + temporary, + name, + args: args.map(|args| args.into_iter().map(|arg| arg.arg).collect()), + return_type: return_type.map(|return_type| return_type.data_type), + params: params.body, + schema: Arc::new(schema.into()), + }, + } + } + + pub fn or_replace(&self) -> bool { + self.create.or_replace + } + + pub fn temporary(&self) -> bool { + self.create.temporary + } + + pub fn name(&self) -> String { + self.create.name.clone() + } + + pub fn params(&self) -> PyCreateFunctionBody { + PyCreateFunctionBody { + body: self.create.params.clone(), + } + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + pub fn return_type(&self) -> Option { + self.create + .return_type + .as_ref() + .map(|return_type| return_type.clone().into()) + } + + pub fn args(&self) -> Option> { + self.create.args.as_ref().map(|args| { + args.iter() + .map(|arg| PyOperateFunctionArg { arg: arg.clone() }) + .collect() + }) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateFunction({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateFunction".to_string()) + } +} + +impl LogicalNode for PyCreateFunction { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_index.rs b/src/expr/create_index.rs new file mode 100644 index 000000000..13dadbc3f --- /dev/null +++ b/src/expr/create_index.rs @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateIndex; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::{logical_node::LogicalNode, sort_expr::PySortExpr}; + +#[pyclass(name = "CreateIndex", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateIndex { + create: CreateIndex, +} + +impl From for CreateIndex { + fn from(create: PyCreateIndex) -> Self { + create.create + } +} + +impl From for PyCreateIndex { + fn from(create: CreateIndex) -> PyCreateIndex { + PyCreateIndex { create } + } +} + +impl Display for PyCreateIndex { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateIndex: {:?}", self.create.name) + } +} + +#[pymethods] +impl PyCreateIndex { + #[new] + #[pyo3(signature = (table, columns, unique, if_not_exists, schema, name=None, using=None))] + pub fn new( + table: String, + columns: Vec, + unique: bool, + if_not_exists: bool, + schema: PyDFSchema, + name: Option, + using: Option, + ) -> PyResult { + Ok(PyCreateIndex { + create: CreateIndex { + name, + table: table.into(), + using, + columns: columns.iter().map(|c| c.clone().into()).collect(), + unique, + if_not_exists, + schema: Arc::new(schema.into()), + }, + }) + } + + pub fn name(&self) -> Option { + self.create.name.clone() + } + + pub fn table(&self) -> PyResult { + Ok(self.create.table.to_string()) + } + + pub fn using(&self) -> Option { + self.create.using.clone() + } + + pub fn columns(&self) -> Vec { + self.create + .columns + .iter() + .map(|c| c.clone().into()) + .collect() + } + + pub fn unique(&self) -> bool { + self.create.unique + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateIndex({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateIndex".to_string()) + } +} + +impl LogicalNode for PyCreateIndex { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/describe_table.rs b/src/expr/describe_table.rs new file mode 100644 index 000000000..5658a13f2 --- /dev/null +++ b/src/expr/describe_table.rs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use arrow::{datatypes::Schema, pyarrow::PyArrowType}; +use datafusion::logical_expr::DescribeTable; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "DescribeTable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDescribeTable { + describe: DescribeTable, +} + +impl Display for PyDescribeTable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "DescribeTable") + } +} + +#[pymethods] +impl PyDescribeTable { + #[new] + fn new(schema: PyArrowType, output_schema: PyDFSchema) -> Self { + Self { + describe: DescribeTable { + schema: Arc::new(schema.0), + output_schema: Arc::new(output_schema.into()), + }, + } + } + + pub fn schema(&self) -> PyArrowType { + (*self.describe.schema).clone().into() + } + + pub fn output_schema(&self) -> PyDFSchema { + (*self.describe.output_schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DescribeTable({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("DescribeTable".to_string()) + } +} + +impl From for DescribeTable { + fn from(describe: PyDescribeTable) -> Self { + describe.describe + } +} + +impl From for PyDescribeTable { + fn from(describe: DescribeTable) -> PyDescribeTable { + PyDescribeTable { describe } + } +} + +impl LogicalNode for PyDescribeTable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/dml.rs b/src/expr/dml.rs new file mode 100644 index 000000000..251e336cc --- /dev/null +++ b/src/expr/dml.rs @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::logical_expr::dml::InsertOp; +use datafusion::logical_expr::{DmlStatement, WriteOp}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::common::schema::PyTableSource; +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "DmlStatement", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDmlStatement { + dml: DmlStatement, +} + +impl From for DmlStatement { + fn from(dml: PyDmlStatement) -> Self { + dml.dml + } +} + +impl From for PyDmlStatement { + fn from(dml: DmlStatement) -> PyDmlStatement { + PyDmlStatement { dml } + } +} + +impl LogicalNode for PyDmlStatement { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.dml.input).clone())] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyDmlStatement { + pub fn table_name(&self) -> PyResult { + Ok(self.dml.table_name.to_string()) + } + + pub fn target(&self) -> PyResult { + Ok(PyTableSource { + table_source: self.dml.target.clone(), + }) + } + + pub fn op(&self) -> PyWriteOp { + self.dml.op.clone().into() + } + + pub fn input(&self) -> PyLogicalPlan { + PyLogicalPlan { + plan: self.dml.input.clone(), + } + } + + pub fn output_schema(&self) -> PyDFSchema { + (*self.dml.output_schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok("DmlStatement".to_string()) + } + + fn __name__(&self) -> PyResult { + Ok("DmlStatement".to_string()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "WriteOp", module = "datafusion.expr")] +pub enum PyWriteOp { + Append, + Overwrite, + Replace, + + Update, + Delete, + Ctas, +} + +impl From for PyWriteOp { + fn from(write_op: WriteOp) -> Self { + match write_op { + WriteOp::Insert(InsertOp::Append) => PyWriteOp::Append, + WriteOp::Insert(InsertOp::Overwrite) => PyWriteOp::Overwrite, + WriteOp::Insert(InsertOp::Replace) => PyWriteOp::Replace, + + WriteOp::Update => PyWriteOp::Update, + WriteOp::Delete => PyWriteOp::Delete, + WriteOp::Ctas => PyWriteOp::Ctas, + } + } +} + +impl From for WriteOp { + fn from(py: PyWriteOp) -> Self { + match py { + PyWriteOp::Append => WriteOp::Insert(InsertOp::Append), + PyWriteOp::Overwrite => WriteOp::Insert(InsertOp::Overwrite), + PyWriteOp::Replace => WriteOp::Insert(InsertOp::Replace), + + PyWriteOp::Update => WriteOp::Update, + PyWriteOp::Delete => WriteOp::Delete, + PyWriteOp::Ctas => WriteOp::Ctas, + } + } +} + +#[pymethods] +impl PyWriteOp { + fn name(&self) -> String { + let write_op: WriteOp = self.clone().into(); + write_op.name().to_string() + } +} diff --git a/src/expr/drop_catalog_schema.rs b/src/expr/drop_catalog_schema.rs new file mode 100644 index 000000000..b7420a99c --- /dev/null +++ b/src/expr/drop_catalog_schema.rs @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::{common::SchemaReference, logical_expr::DropCatalogSchema, sql::TableReference}; +use pyo3::{exceptions::PyValueError, prelude::*, IntoPyObjectExt}; + +use crate::common::df_schema::PyDFSchema; + +use super::logical_node::LogicalNode; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "DropCatalogSchema", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDropCatalogSchema { + drop: DropCatalogSchema, +} + +impl From for DropCatalogSchema { + fn from(drop: PyDropCatalogSchema) -> Self { + drop.drop + } +} + +impl From for PyDropCatalogSchema { + fn from(drop: DropCatalogSchema) -> PyDropCatalogSchema { + PyDropCatalogSchema { drop } + } +} + +impl Display for PyDropCatalogSchema { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "DropCatalogSchema") + } +} + +fn parse_schema_reference(name: String) -> PyResult { + match name.into() { + TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table }), + TableReference::Partial { schema, table } => Ok(SchemaReference::Full { + schema: table, + catalog: schema, + }), + TableReference::Full { + catalog: _, + schema: _, + table: _, + } => Err(PyErr::new::( + "Invalid schema specifier (has 3 parts)".to_string(), + )), + } +} + +#[pymethods] +impl PyDropCatalogSchema { + #[new] + fn new(name: String, schema: PyDFSchema, if_exists: bool, cascade: bool) -> PyResult { + let name = parse_schema_reference(name)?; + Ok(PyDropCatalogSchema { + drop: DropCatalogSchema { + name, + schema: Arc::new(schema.into()), + if_exists, + cascade, + }, + }) + } + + fn name(&self) -> PyResult { + Ok(self.drop.name.to_string()) + } + + fn schema(&self) -> PyDFSchema { + (*self.drop.schema).clone().into() + } + + fn if_exists(&self) -> PyResult { + Ok(self.drop.if_exists) + } + + fn cascade(&self) -> PyResult { + Ok(self.drop.cascade) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DropCatalogSchema({})", self)) + } +} + +impl LogicalNode for PyDropCatalogSchema { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/drop_function.rs b/src/expr/drop_function.rs new file mode 100644 index 000000000..9fbd78fdc --- /dev/null +++ b/src/expr/drop_function.rs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::DropFunction; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use super::logical_node::LogicalNode; +use crate::common::df_schema::PyDFSchema; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "DropFunction", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDropFunction { + drop: DropFunction, +} + +impl From for DropFunction { + fn from(drop: PyDropFunction) -> Self { + drop.drop + } +} + +impl From for PyDropFunction { + fn from(drop: DropFunction) -> PyDropFunction { + PyDropFunction { drop } + } +} + +impl Display for PyDropFunction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "DropFunction") + } +} + +#[pymethods] +impl PyDropFunction { + #[new] + fn new(name: String, schema: PyDFSchema, if_exists: bool) -> PyResult { + Ok(PyDropFunction { + drop: DropFunction { + name, + schema: Arc::new(schema.into()), + if_exists, + }, + }) + } + fn name(&self) -> PyResult { + Ok(self.drop.name.clone()) + } + + fn schema(&self) -> PyDFSchema { + (*self.drop.schema).clone().into() + } + + fn if_exists(&self) -> PyResult { + Ok(self.drop.if_exists) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DropFunction({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("DropFunction".to_string()) + } +} + +impl LogicalNode for PyDropFunction { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/drop_view.rs b/src/expr/drop_view.rs new file mode 100644 index 000000000..1d1ab1e59 --- /dev/null +++ b/src/expr/drop_view.rs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::DropView; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::common::df_schema::PyDFSchema; + +use super::logical_node::LogicalNode; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "DropView", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDropView { + drop: DropView, +} + +impl From for DropView { + fn from(drop: PyDropView) -> Self { + drop.drop + } +} + +impl From for PyDropView { + fn from(drop: DropView) -> PyDropView { + PyDropView { drop } + } +} + +impl Display for PyDropView { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "DropView: {name:?} if not exist:={if_exists}", + name = self.drop.name, + if_exists = self.drop.if_exists + ) + } +} + +#[pymethods] +impl PyDropView { + #[new] + fn new(name: String, schema: PyDFSchema, if_exists: bool) -> PyResult { + Ok(PyDropView { + drop: DropView { + name: name.into(), + schema: Arc::new(schema.into()), + if_exists, + }, + }) + } + + fn name(&self) -> PyResult { + Ok(self.drop.name.to_string()) + } + + fn schema(&self) -> PyDFSchema { + (*self.drop.schema).clone().into() + } + + fn if_exists(&self) -> PyResult { + Ok(self.drop.if_exists) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DropView({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("DropView".to_string()) + } +} + +impl LogicalNode for PyDropView { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/recursive_query.rs b/src/expr/recursive_query.rs new file mode 100644 index 000000000..65181f7d3 --- /dev/null +++ b/src/expr/recursive_query.rs @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion::logical_expr::RecursiveQuery; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "RecursiveQuery", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyRecursiveQuery { + query: RecursiveQuery, +} + +impl From for RecursiveQuery { + fn from(query: PyRecursiveQuery) -> Self { + query.query + } +} + +impl From for PyRecursiveQuery { + fn from(query: RecursiveQuery) -> PyRecursiveQuery { + PyRecursiveQuery { query } + } +} + +impl Display for PyRecursiveQuery { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "RecursiveQuery {name:?} is_distinct:={is_distinct}", + name = self.query.name, + is_distinct = self.query.is_distinct + ) + } +} + +#[pymethods] +impl PyRecursiveQuery { + #[new] + fn new( + name: String, + static_term: PyLogicalPlan, + recursive_term: PyLogicalPlan, + is_distinct: bool, + ) -> Self { + Self { + query: RecursiveQuery { + name, + static_term: static_term.plan(), + recursive_term: recursive_term.plan(), + is_distinct, + }, + } + } + + fn name(&self) -> PyResult { + Ok(self.query.name.clone()) + } + + fn static_term(&self) -> PyLogicalPlan { + PyLogicalPlan::from((*self.query.static_term).clone()) + } + + fn recursive_term(&self) -> PyLogicalPlan { + PyLogicalPlan::from((*self.query.recursive_term).clone()) + } + + fn is_distinct(&self) -> PyResult { + Ok(self.query.is_distinct) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("RecursiveQuery({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("RecursiveQuery".to_string()) + } +} + +impl LogicalNode for PyRecursiveQuery { + fn inputs(&self) -> Vec { + vec![ + PyLogicalPlan::from((*self.query.static_term).clone()), + PyLogicalPlan::from((*self.query.recursive_term).clone()), + ] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/statement.rs b/src/expr/statement.rs new file mode 100644 index 000000000..83774cda1 --- /dev/null +++ b/src/expr/statement.rs @@ -0,0 +1,454 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::logical_expr::{ + Deallocate, Execute, Prepare, SetVariable, TransactionAccessMode, TransactionConclusion, + TransactionEnd, TransactionIsolationLevel, TransactionStart, +}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::data_type::PyDataType, sql::logical::PyLogicalPlan}; + +use super::{logical_node::LogicalNode, PyExpr}; + +#[pyclass(name = "TransactionStart", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyTransactionStart { + transaction_start: TransactionStart, +} + +impl From for PyTransactionStart { + fn from(transaction_start: TransactionStart) -> PyTransactionStart { + PyTransactionStart { transaction_start } + } +} + +impl TryFrom for TransactionStart { + type Error = PyErr; + + fn try_from(py: PyTransactionStart) -> Result { + Ok(py.transaction_start) + } +} + +impl LogicalNode for PyTransactionStart { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "TransactionAccessMode", module = "datafusion.expr")] +pub enum PyTransactionAccessMode { + ReadOnly, + ReadWrite, +} + +impl From for PyTransactionAccessMode { + fn from(access_mode: TransactionAccessMode) -> PyTransactionAccessMode { + match access_mode { + TransactionAccessMode::ReadOnly => PyTransactionAccessMode::ReadOnly, + TransactionAccessMode::ReadWrite => PyTransactionAccessMode::ReadWrite, + } + } +} + +impl TryFrom for TransactionAccessMode { + type Error = PyErr; + + fn try_from(py: PyTransactionAccessMode) -> Result { + match py { + PyTransactionAccessMode::ReadOnly => Ok(TransactionAccessMode::ReadOnly), + PyTransactionAccessMode::ReadWrite => Ok(TransactionAccessMode::ReadWrite), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass( + eq, + eq_int, + name = "TransactionIsolationLevel", + module = "datafusion.expr" +)] +pub enum PyTransactionIsolationLevel { + ReadUncommitted, + ReadCommitted, + RepeatableRead, + Serializable, + Snapshot, +} + +impl From for PyTransactionIsolationLevel { + fn from(isolation_level: TransactionIsolationLevel) -> PyTransactionIsolationLevel { + match isolation_level { + TransactionIsolationLevel::ReadUncommitted => { + PyTransactionIsolationLevel::ReadUncommitted + } + TransactionIsolationLevel::ReadCommitted => PyTransactionIsolationLevel::ReadCommitted, + TransactionIsolationLevel::RepeatableRead => { + PyTransactionIsolationLevel::RepeatableRead + } + TransactionIsolationLevel::Serializable => PyTransactionIsolationLevel::Serializable, + TransactionIsolationLevel::Snapshot => PyTransactionIsolationLevel::Snapshot, + } + } +} + +impl TryFrom for TransactionIsolationLevel { + type Error = PyErr; + + fn try_from(value: PyTransactionIsolationLevel) -> Result { + match value { + PyTransactionIsolationLevel::ReadUncommitted => { + Ok(TransactionIsolationLevel::ReadUncommitted) + } + PyTransactionIsolationLevel::ReadCommitted => { + Ok(TransactionIsolationLevel::ReadCommitted) + } + PyTransactionIsolationLevel::RepeatableRead => { + Ok(TransactionIsolationLevel::RepeatableRead) + } + PyTransactionIsolationLevel::Serializable => { + Ok(TransactionIsolationLevel::Serializable) + } + PyTransactionIsolationLevel::Snapshot => Ok(TransactionIsolationLevel::Snapshot), + } + } +} + +#[pymethods] +impl PyTransactionStart { + #[new] + pub fn new( + access_mode: PyTransactionAccessMode, + isolation_level: PyTransactionIsolationLevel, + ) -> PyResult { + let access_mode = access_mode.try_into()?; + let isolation_level = isolation_level.try_into()?; + Ok(PyTransactionStart { + transaction_start: TransactionStart { + access_mode, + isolation_level, + }, + }) + } + + pub fn access_mode(&self) -> PyResult { + Ok(self.transaction_start.access_mode.clone().into()) + } + + pub fn isolation_level(&self) -> PyResult { + Ok(self.transaction_start.isolation_level.clone().into()) + } +} + +#[pyclass(name = "TransactionEnd", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyTransactionEnd { + transaction_end: TransactionEnd, +} + +impl From for PyTransactionEnd { + fn from(transaction_end: TransactionEnd) -> PyTransactionEnd { + PyTransactionEnd { transaction_end } + } +} + +impl TryFrom for TransactionEnd { + type Error = PyErr; + + fn try_from(py: PyTransactionEnd) -> Result { + Ok(py.transaction_end) + } +} + +impl LogicalNode for PyTransactionEnd { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "TransactionConclusion", module = "datafusion.expr")] +pub enum PyTransactionConclusion { + Commit, + Rollback, +} + +impl From for PyTransactionConclusion { + fn from(value: TransactionConclusion) -> Self { + match value { + TransactionConclusion::Commit => PyTransactionConclusion::Commit, + TransactionConclusion::Rollback => PyTransactionConclusion::Rollback, + } + } +} + +impl TryFrom for TransactionConclusion { + type Error = PyErr; + + fn try_from(value: PyTransactionConclusion) -> Result { + match value { + PyTransactionConclusion::Commit => Ok(TransactionConclusion::Commit), + PyTransactionConclusion::Rollback => Ok(TransactionConclusion::Rollback), + } + } +} +#[pymethods] +impl PyTransactionEnd { + #[new] + pub fn new(conclusion: PyTransactionConclusion, chain: bool) -> PyResult { + let conclusion = conclusion.try_into()?; + Ok(PyTransactionEnd { + transaction_end: TransactionEnd { conclusion, chain }, + }) + } + + pub fn conclusion(&self) -> PyResult { + Ok(self.transaction_end.conclusion.clone().into()) + } + + pub fn chain(&self) -> bool { + self.transaction_end.chain + } +} + +#[pyclass(name = "SetVariable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PySetVariable { + set_variable: SetVariable, +} + +impl From for PySetVariable { + fn from(set_variable: SetVariable) -> PySetVariable { + PySetVariable { set_variable } + } +} + +impl TryFrom for SetVariable { + type Error = PyErr; + + fn try_from(py: PySetVariable) -> Result { + Ok(py.set_variable) + } +} + +impl LogicalNode for PySetVariable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PySetVariable { + #[new] + pub fn new(variable: String, value: String) -> Self { + PySetVariable { + set_variable: SetVariable { variable, value }, + } + } + + pub fn variable(&self) -> String { + self.set_variable.variable.clone() + } + + pub fn value(&self) -> String { + self.set_variable.value.clone() + } +} + +#[pyclass(name = "Prepare", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyPrepare { + prepare: Prepare, +} + +impl From for PyPrepare { + fn from(prepare: Prepare) -> PyPrepare { + PyPrepare { prepare } + } +} + +impl TryFrom for Prepare { + type Error = PyErr; + + fn try_from(py: PyPrepare) -> Result { + Ok(py.prepare) + } +} + +impl LogicalNode for PyPrepare { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.prepare.input).clone())] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyPrepare { + #[new] + pub fn new(name: String, data_types: Vec, input: PyLogicalPlan) -> Self { + let input = input.plan().clone(); + let data_types = data_types + .into_iter() + .map(|data_type| data_type.into()) + .collect(); + PyPrepare { + prepare: Prepare { + name, + data_types, + input, + }, + } + } + + pub fn name(&self) -> String { + self.prepare.name.clone() + } + + pub fn data_types(&self) -> Vec { + self.prepare + .data_types + .clone() + .into_iter() + .map(|t| t.into()) + .collect() + } + + pub fn input(&self) -> PyLogicalPlan { + PyLogicalPlan { + plan: self.prepare.input.clone(), + } + } +} + +#[pyclass(name = "Execute", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyExecute { + execute: Execute, +} + +impl From for PyExecute { + fn from(execute: Execute) -> PyExecute { + PyExecute { execute } + } +} + +impl TryFrom for Execute { + type Error = PyErr; + + fn try_from(py: PyExecute) -> Result { + Ok(py.execute) + } +} + +impl LogicalNode for PyExecute { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyExecute { + #[new] + pub fn new(name: String, parameters: Vec) -> Self { + let parameters = parameters + .into_iter() + .map(|parameter| parameter.into()) + .collect(); + PyExecute { + execute: Execute { name, parameters }, + } + } + + pub fn name(&self) -> String { + self.execute.name.clone() + } + + pub fn parameters(&self) -> Vec { + self.execute + .parameters + .clone() + .into_iter() + .map(|t| t.into()) + .collect() + } +} + +#[pyclass(name = "Deallocate", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDeallocate { + deallocate: Deallocate, +} + +impl From for PyDeallocate { + fn from(deallocate: Deallocate) -> PyDeallocate { + PyDeallocate { deallocate } + } +} + +impl TryFrom for Deallocate { + type Error = PyErr; + + fn try_from(py: PyDeallocate) -> Result { + Ok(py.deallocate) + } +} + +impl LogicalNode for PyDeallocate { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyDeallocate { + #[new] + pub fn new(name: String) -> Self { + PyDeallocate { + deallocate: Deallocate { name }, + } + } + + pub fn name(&self) -> String { + self.deallocate.name.clone() + } +} diff --git a/src/expr/values.rs b/src/expr/values.rs new file mode 100644 index 000000000..fb2692230 --- /dev/null +++ b/src/expr/values.rs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion::logical_expr::Values; +use pyo3::{prelude::*, IntoPyObjectExt}; +use pyo3::{pyclass, PyErr, PyResult, Python}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::{logical_node::LogicalNode, PyExpr}; + +#[pyclass(name = "Values", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyValues { + values: Values, +} + +impl From for PyValues { + fn from(values: Values) -> PyValues { + PyValues { values } + } +} + +impl TryFrom for Values { + type Error = PyErr; + + fn try_from(py: PyValues) -> Result { + Ok(py.values) + } +} + +impl LogicalNode for PyValues { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyValues { + #[new] + pub fn new(schema: PyDFSchema, values: Vec>) -> PyResult { + let values = values + .into_iter() + .map(|row| row.into_iter().map(|expr| expr.into()).collect()) + .collect(); + Ok(PyValues { + values: Values { + schema: Arc::new(schema.into()), + values, + }, + }) + } + + pub fn schema(&self) -> PyResult { + Ok((*self.values.schema).clone().into()) + } + + pub fn values(&self) -> Vec> { + self.values + .values + .clone() + .into_iter() + .map(|row| row.into_iter().map(|expr| expr.into()).collect()) + .collect() + } +} diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 96561c434..198d68bdc 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,10 +17,25 @@ use std::sync::Arc; +use crate::context::PySessionContext; use crate::errors::PyDataFusionResult; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; +use crate::expr::copy_to::PyCopyTo; +use crate::expr::create_catalog::PyCreateCatalog; +use crate::expr::create_catalog_schema::PyCreateCatalogSchema; +use crate::expr::create_external_table::PyCreateExternalTable; +use crate::expr::create_function::PyCreateFunction; +use crate::expr::create_index::PyCreateIndex; +use crate::expr::create_memory_table::PyCreateMemoryTable; +use crate::expr::create_view::PyCreateView; +use crate::expr::describe_table::PyDescribeTable; use crate::expr::distinct::PyDistinct; +use crate::expr::dml::PyDmlStatement; +use crate::expr::drop_catalog_schema::PyDropCatalogSchema; +use crate::expr::drop_function::PyDropFunction; +use crate::expr::drop_table::PyDropTable; +use crate::expr::drop_view::PyDropView; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; use crate::expr::extension::PyExtension; @@ -28,14 +43,20 @@ use crate::expr::filter::PyFilter; use crate::expr::join::PyJoin; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; +use crate::expr::recursive_query::PyRecursiveQuery; +use crate::expr::repartition::PyRepartition; use crate::expr::sort::PySort; +use crate::expr::statement::{ + PyDeallocate, PyExecute, PyPrepare, PySetVariable, PyTransactionEnd, PyTransactionStart, +}; use crate::expr::subquery::PySubquery; use crate::expr::subquery_alias::PySubqueryAlias; use crate::expr::table_scan::PyTableScan; +use crate::expr::union::PyUnion; use crate::expr::unnest::PyUnnest; +use crate::expr::values::PyValues; use crate::expr::window::PyWindowExpr; -use crate::{context::PySessionContext, errors::py_unsupported_variant_err}; -use datafusion::logical_expr::LogicalPlan; +use datafusion::logical_expr::{DdlStatement, LogicalPlan, Statement}; use datafusion_proto::logical_plan::{AsLogicalPlan, DefaultLogicalExtensionCodec}; use prost::Message; use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyBytes}; @@ -82,18 +103,54 @@ impl PyLogicalPlan { LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), LogicalPlan::Unnest(plan) => PyUnnest::from(plan.clone()).to_variant(py), LogicalPlan::Window(plan) => PyWindowExpr::from(plan.clone()).to_variant(py), - LogicalPlan::Repartition(_) - | LogicalPlan::Union(_) - | LogicalPlan::Statement(_) - | LogicalPlan::Values(_) - | LogicalPlan::Dml(_) - | LogicalPlan::Ddl(_) - | LogicalPlan::Copy(_) - | LogicalPlan::DescribeTable(_) - | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( - "Conversion of variant not implemented: {:?}", - self.plan - ))), + LogicalPlan::Repartition(plan) => PyRepartition::from(plan.clone()).to_variant(py), + LogicalPlan::Union(plan) => PyUnion::from(plan.clone()).to_variant(py), + LogicalPlan::Statement(plan) => match plan { + Statement::TransactionStart(plan) => { + PyTransactionStart::from(plan.clone()).to_variant(py) + } + Statement::TransactionEnd(plan) => { + PyTransactionEnd::from(plan.clone()).to_variant(py) + } + Statement::SetVariable(plan) => PySetVariable::from(plan.clone()).to_variant(py), + Statement::Prepare(plan) => PyPrepare::from(plan.clone()).to_variant(py), + Statement::Execute(plan) => PyExecute::from(plan.clone()).to_variant(py), + Statement::Deallocate(plan) => PyDeallocate::from(plan.clone()).to_variant(py), + }, + LogicalPlan::Values(plan) => PyValues::from(plan.clone()).to_variant(py), + LogicalPlan::Dml(plan) => PyDmlStatement::from(plan.clone()).to_variant(py), + LogicalPlan::Ddl(plan) => match plan { + DdlStatement::CreateExternalTable(plan) => { + PyCreateExternalTable::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateMemoryTable(plan) => { + PyCreateMemoryTable::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateView(plan) => PyCreateView::from(plan.clone()).to_variant(py), + DdlStatement::CreateCatalogSchema(plan) => { + PyCreateCatalogSchema::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateCatalog(plan) => { + PyCreateCatalog::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateIndex(plan) => PyCreateIndex::from(plan.clone()).to_variant(py), + DdlStatement::DropTable(plan) => PyDropTable::from(plan.clone()).to_variant(py), + DdlStatement::DropView(plan) => PyDropView::from(plan.clone()).to_variant(py), + DdlStatement::DropCatalogSchema(plan) => { + PyDropCatalogSchema::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateFunction(plan) => { + PyCreateFunction::from(plan.clone()).to_variant(py) + } + DdlStatement::DropFunction(plan) => { + PyDropFunction::from(plan.clone()).to_variant(py) + } + }, + LogicalPlan::Copy(plan) => PyCopyTo::from(plan.clone()).to_variant(py), + LogicalPlan::DescribeTable(plan) => PyDescribeTable::from(plan.clone()).to_variant(py), + LogicalPlan::RecursiveQuery(plan) => { + PyRecursiveQuery::from(plan.clone()).to_variant(py) + } } } From 7d8bcd8d20623beb76a397eb4fddfb18781589eb Mon Sep 17 00:00:00 2001 From: kosiew Date: Mon, 5 May 2025 21:50:52 +0800 Subject: [PATCH 077/248] Partial fix for 1078: Enhance DataFrame Formatter Configuration with Memory and Display Controls (#1119) * feat: add configurable max table bytes and min table rows for DataFrame display * Revert "feat: add configurable max table bytes and min table rows for DataFrame display" This reverts commit f9b78fa3180c5d6c20eaa3b6d0af7426d7084093. * feat: add FormatterConfig for configurable DataFrame display options * refactor: simplify attribute extraction in get_formatter_config function * refactor: remove hardcoded constants and use FormatterConfig for display options * refactor: simplify record batch collection by using FormatterConfig for display options * feat: add max_memory_bytes, min_rows_display, and repr_rows parameters to DataFrameHtmlFormatter * feat: add tests for HTML formatter row display settings and memory limit * refactor: extract Python formatter retrieval into a separate function * Revert "feat: add tests for HTML formatter row display settings and memory limit" This reverts commit e089d7b282e53e587116b11d92760e6d292ec871. * feat: add tests for HTML formatter row and memory limit configurations * Revert "feat: add tests for HTML formatter row and memory limit configurations" This reverts commit 4090fd2f7378855b045d6bfd1368d088cc9ada75. * feat: add tests for new parameters and validation in DataFrameHtmlFormatter * Reorganize tests * refactor: rename and restructure formatter functions for clarity and maintainability * feat: implement PythonFormatter struct and refactor formatter retrieval for improved clarity * refactor: improve comments and restructure FormatterConfig usage in PyDataFrame * Add DataFrame usage guide with HTML rendering customization options (#1108) * docs: enhance user guide with detailed DataFrame operations and examples * move /docs/source/api/dataframe.rst into user-guide * docs: remove DataFrame API documentation * docs: fix formatting inconsistencies in DataFrame user guide * Two minor corrections to documentation rendering --------- Co-authored-by: Tim Saucer * Update documentation * refactor: streamline HTML rendering documentation * refactor: extract validation logic into separate functions for clarity * Implement feature X to enhance user experience and optimize performance * feat: add validation method for FormatterConfig to ensure positive integer values * add comment - ensure minimum rows are collected even if memory or row limits are hit * Update html_formatter documentation * update tests * remove unused type hints from imports in html_formatter.py * remove redundant tests for DataFrameHtmlFormatter and clean up assertions * refactor get_attr function to support generic default values * build_formatter_config_from_python return PyResult * fix ruff errors * trigger ci * fix: remove redundant newline in test_custom_style_provider_html_formatter * add more tests * trigger ci * Fix ruff errors * fix clippy error * feat: add validation for parameters in configure_formatter * test: add tests for invalid parameters in configure_formatter * Fix ruff errors --------- Co-authored-by: Tim Saucer --- docs/source/user-guide/dataframe.rst | 52 +++++++- python/datafusion/html_formatter.py | 104 ++++++++++++--- python/tests/test_dataframe.py | 183 ++++++++++++++++++++++++--- src/dataframe.rs | 142 +++++++++++++++++---- 4 files changed, 413 insertions(+), 68 deletions(-) diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst index a78fd8073..11e3d7e72 100644 --- a/docs/source/user-guide/dataframe.rst +++ b/docs/source/user-guide/dataframe.rst @@ -75,13 +75,17 @@ You can customize how DataFrames are rendered in HTML by configuring the formatt # Change the default styling configure_formatter( - max_rows=50, # Maximum number of rows to display - max_width=None, # Maximum width in pixels (None for auto) - theme="light", # Theme: "light" or "dark" - precision=2, # Floating point precision - thousands_separator=",", # Separator for thousands - date_format="%Y-%m-%d", # Date format - truncate_width=20 # Max width for string columns before truncating + max_cell_length=25, # Maximum characters in a cell before truncation + max_width=1000, # Maximum width in pixels + max_height=300, # Maximum height in pixels + max_memory_bytes=2097152, # Maximum memory for rendering (2MB) + min_rows_display=20, # Minimum number of rows to display + repr_rows=10, # Number of rows to display in __repr__ + enable_cell_expansion=True,# Allow expanding truncated cells + custom_css=None, # Additional custom CSS + show_truncation_message=True, # Show message when data is truncated + style_provider=None, # Custom styling provider + use_shared_styles=True # Share styles across tables ) The formatter settings affect all DataFrames displayed after configuration. @@ -113,6 +117,25 @@ For advanced styling needs, you can create a custom style provider: # Apply the custom style provider configure_formatter(style_provider=MyStyleProvider()) +Performance Optimization with Shared Styles +------------------------------------------- +The ``use_shared_styles`` parameter (enabled by default) optimizes performance when displaying +multiple DataFrames in notebook environments: + + .. code-block:: python + from datafusion.html_formatter import StyleProvider, configure_formatter + # Default: Use shared styles (recommended for notebooks) + configure_formatter(use_shared_styles=True) + + # Disable shared styles (each DataFrame includes its own styles) + configure_formatter(use_shared_styles=False) + +When ``use_shared_styles=True``: +- CSS styles and JavaScript are included only once per notebook session +- This reduces HTML output size and prevents style duplication +- Improves rendering performance with many DataFrames +- Applies consistent styling across all DataFrames + Creating a Custom Formatter --------------------------- @@ -177,3 +200,18 @@ You can also use a context manager to temporarily change formatting settings: # Back to default formatting df.show() + +Memory and Display Controls +--------------------------- + +You can control how much data is displayed and how much memory is used for rendering: + + .. code-block:: python + + configure_formatter( + max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display + min_rows_display=50, # Always show at least 50 rows + repr_rows=20 # Show 20 rows in __repr__ output + ) + +These parameters help balance comprehensive data display against performance considerations. \ No newline at end of file diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py index a50e14fd5..12a7e4553 100644 --- a/python/datafusion/html_formatter.py +++ b/python/datafusion/html_formatter.py @@ -27,6 +27,36 @@ ) +def _validate_positive_int(value: Any, param_name: str) -> None: + """Validate that a parameter is a positive integer. + + Args: + value: The value to validate + param_name: Name of the parameter (used in error message) + + Raises: + ValueError: If the value is not a positive integer + """ + if not isinstance(value, int) or value <= 0: + msg = f"{param_name} must be a positive integer" + raise ValueError(msg) + + +def _validate_bool(value: Any, param_name: str) -> None: + """Validate that a parameter is a boolean. + + Args: + value: The value to validate + param_name: Name of the parameter (used in error message) + + Raises: + TypeError: If the value is not a boolean + """ + if not isinstance(value, bool): + msg = f"{param_name} must be a boolean" + raise TypeError(msg) + + @runtime_checkable class CellFormatter(Protocol): """Protocol for cell value formatters.""" @@ -91,6 +121,9 @@ class DataFrameHtmlFormatter: max_cell_length: Maximum characters to display in a cell before truncation max_width: Maximum width of the HTML table in pixels max_height: Maximum height of the HTML table in pixels + max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) + min_rows_display: Minimum number of rows to display + repr_rows: Default number of rows to display in repr output enable_cell_expansion: Whether to add expand/collapse buttons for long cell values custom_css: Additional CSS to include in the HTML output @@ -108,6 +141,9 @@ def __init__( max_cell_length: int = 25, max_width: int = 1000, max_height: int = 300, + max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB + min_rows_display: int = 20, + repr_rows: int = 10, enable_cell_expansion: bool = True, custom_css: Optional[str] = None, show_truncation_message: bool = True, @@ -124,6 +160,12 @@ def __init__( Maximum width of the displayed table in pixels. max_height : int, default 300 Maximum height of the displayed table in pixels. + max_memory_bytes : int, default 2097152 (2MB) + Maximum memory in bytes for rendered data. + min_rows_display : int, default 20 + Minimum number of rows to display. + repr_rows : int, default 10 + Default number of rows to display in repr output. enable_cell_expansion : bool, default True Whether to allow cells to expand when clicked. custom_css : str, optional @@ -139,7 +181,8 @@ def __init__( Raises: ------ ValueError - If max_cell_length, max_width, or max_height is not a positive integer. + If max_cell_length, max_width, max_height, max_memory_bytes, + min_rows_display, or repr_rows is not a positive integer. TypeError If enable_cell_expansion, show_truncation_message, or use_shared_styles is not a boolean, @@ -148,27 +191,17 @@ def __init__( protocol. """ # Validate numeric parameters - - if not isinstance(max_cell_length, int) or max_cell_length <= 0: - msg = "max_cell_length must be a positive integer" - raise ValueError(msg) - if not isinstance(max_width, int) or max_width <= 0: - msg = "max_width must be a positive integer" - raise ValueError(msg) - if not isinstance(max_height, int) or max_height <= 0: - msg = "max_height must be a positive integer" - raise ValueError(msg) + _validate_positive_int(max_cell_length, "max_cell_length") + _validate_positive_int(max_width, "max_width") + _validate_positive_int(max_height, "max_height") + _validate_positive_int(max_memory_bytes, "max_memory_bytes") + _validate_positive_int(min_rows_display, "min_rows_display") + _validate_positive_int(repr_rows, "repr_rows") # Validate boolean parameters - if not isinstance(enable_cell_expansion, bool): - msg = "enable_cell_expansion must be a boolean" - raise TypeError(msg) - if not isinstance(show_truncation_message, bool): - msg = "show_truncation_message must be a boolean" - raise TypeError(msg) - if not isinstance(use_shared_styles, bool): - msg = "use_shared_styles must be a boolean" - raise TypeError(msg) + _validate_bool(enable_cell_expansion, "enable_cell_expansion") + _validate_bool(show_truncation_message, "show_truncation_message") + _validate_bool(use_shared_styles, "use_shared_styles") # Validate custom_css if custom_css is not None and not isinstance(custom_css, str): @@ -183,6 +216,9 @@ def __init__( self.max_cell_length = max_cell_length self.max_width = max_width self.max_height = max_height + self.max_memory_bytes = max_memory_bytes + self.min_rows_display = min_rows_display + self.repr_rows = repr_rows self.enable_cell_expansion = enable_cell_expansion self.custom_css = custom_css self.show_truncation_message = show_truncation_message @@ -597,6 +633,9 @@ def configure_formatter(**kwargs: Any) -> None: **kwargs: Formatter configuration parameters like max_cell_length, max_width, max_height, enable_cell_expansion, etc. + Raises: + ValueError: If any invalid parameters are provided + Example: >>> from datafusion.html_formatter import configure_formatter >>> configure_formatter( @@ -606,6 +645,31 @@ def configure_formatter(**kwargs: Any) -> None: ... use_shared_styles=True ... ) """ + # Valid parameters accepted by DataFrameHtmlFormatter + valid_params = { + "max_cell_length", + "max_width", + "max_height", + "max_memory_bytes", + "min_rows_display", + "repr_rows", + "enable_cell_expansion", + "custom_css", + "show_truncation_message", + "style_provider", + "use_shared_styles", + } + + # Check for invalid parameters + invalid_params = set(kwargs) - valid_params + if invalid_params: + msg = ( + f"Invalid formatter parameters: {', '.join(invalid_params)}. " + f"Valid parameters are: {', '.join(valid_params)}" + ) + raise ValueError(msg) + + # Create and set formatter with validated parameters set_formatter(DataFrameHtmlFormatter(**kwargs)) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 464b884db..e01308c86 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -41,6 +41,8 @@ ) from pyarrow.csv import write_csv +MB = 1024 * 1024 + @pytest.fixture def ctx(): @@ -117,6 +119,31 @@ def clean_formatter_state(): reset_formatter() +# custom style for testing with html formatter +class CustomStyleProvider: + def get_cell_style(self) -> str: + return ( + "background-color: #f5f5f5; color: #333; padding: 8px; border: " + "1px solid #ddd;" + ) + + def get_header_style(self) -> str: + return ( + "background-color: #4285f4; color: white; font-weight: bold; " + "padding: 10px; border: 1px solid #3367d6;" + ) + + +def count_table_rows(html_content: str) -> int: + """Count the number of table rows in HTML content. + Args: + html_content: HTML string to analyze + Returns: + Number of table rows found (number of tags) + """ + return len(re.findall(r" str: - return ( - "background-color: #f5f5f5; color: #333; padding: 8px; border: " - "1px solid #ddd;" - ) - - def get_header_style(self) -> str: - return ( - "background-color: #4285f4; color: white; font-weight: bold; " - "padding: 10px; border: 1px solid #3367d6;" - ) - # Configure with custom style provider configure_formatter(style_provider=CustomStyleProvider()) @@ -917,6 +930,141 @@ def get_header_style(self) -> str: assert "color: #5af" in html_output # Even numbers +def test_html_formatter_memory(df, clean_formatter_state): + """Test the memory and row control parameters in DataFrameHtmlFormatter.""" + configure_formatter(max_memory_bytes=10, min_rows_display=1) + html_output = df._repr_html_() + + # Count the number of table rows in the output + tr_count = count_table_rows(html_output) + # With a tiny memory limit of 10 bytes, the formatter should display + # the minimum number of rows (1) plus a message about truncation + assert tr_count == 2 # 1 for header row, 1 for data row + assert "data truncated" in html_output.lower() + + configure_formatter(max_memory_bytes=10 * MB, min_rows_display=1) + html_output = df._repr_html_() + # With larger memory limit and min_rows=2, should display all rows + tr_count = count_table_rows(html_output) + # Table should have header row (1) + 3 data rows = 4 rows + assert tr_count == 4 + # No truncation message should appear + assert "data truncated" not in html_output.lower() + + +def test_html_formatter_repr_rows(df, clean_formatter_state): + configure_formatter(min_rows_display=2, repr_rows=2) + html_output = df._repr_html_() + + tr_count = count_table_rows(html_output) + # Tabe should have header row (1) + 2 data rows = 3 rows + assert tr_count == 3 + + configure_formatter(min_rows_display=2, repr_rows=3) + html_output = df._repr_html_() + + tr_count = count_table_rows(html_output) + # Tabe should have header row (1) + 3 data rows = 4 rows + assert tr_count == 4 + + +def test_html_formatter_validation(): + # Test validation for invalid parameters + + with pytest.raises(ValueError, match="max_cell_length must be a positive integer"): + DataFrameHtmlFormatter(max_cell_length=0) + + with pytest.raises(ValueError, match="max_width must be a positive integer"): + DataFrameHtmlFormatter(max_width=0) + + with pytest.raises(ValueError, match="max_height must be a positive integer"): + DataFrameHtmlFormatter(max_height=0) + + with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): + DataFrameHtmlFormatter(max_memory_bytes=0) + + with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): + DataFrameHtmlFormatter(max_memory_bytes=-100) + + with pytest.raises(ValueError, match="min_rows_display must be a positive integer"): + DataFrameHtmlFormatter(min_rows_display=0) + + with pytest.raises(ValueError, match="min_rows_display must be a positive integer"): + DataFrameHtmlFormatter(min_rows_display=-5) + + with pytest.raises(ValueError, match="repr_rows must be a positive integer"): + DataFrameHtmlFormatter(repr_rows=0) + + with pytest.raises(ValueError, match="repr_rows must be a positive integer"): + DataFrameHtmlFormatter(repr_rows=-10) + + +def test_configure_formatter(df, clean_formatter_state): + """Test using custom style providers with the HTML formatter and configured + parameters.""" + + # these are non-default values + max_cell_length = 10 + max_width = 500 + max_height = 30 + max_memory_bytes = 3 * MB + min_rows_display = 2 + repr_rows = 2 + enable_cell_expansion = False + show_truncation_message = False + use_shared_styles = False + + reset_formatter() + formatter_default = get_formatter() + + assert formatter_default.max_cell_length != max_cell_length + assert formatter_default.max_width != max_width + assert formatter_default.max_height != max_height + assert formatter_default.max_memory_bytes != max_memory_bytes + assert formatter_default.min_rows_display != min_rows_display + assert formatter_default.repr_rows != repr_rows + assert formatter_default.enable_cell_expansion != enable_cell_expansion + assert formatter_default.show_truncation_message != show_truncation_message + assert formatter_default.use_shared_styles != use_shared_styles + + # Configure with custom style provider and additional parameters + configure_formatter( + max_cell_length=max_cell_length, + max_width=max_width, + max_height=max_height, + max_memory_bytes=max_memory_bytes, + min_rows_display=min_rows_display, + repr_rows=repr_rows, + enable_cell_expansion=enable_cell_expansion, + show_truncation_message=show_truncation_message, + use_shared_styles=use_shared_styles, + ) + formatter_custom = get_formatter() + assert formatter_custom.max_cell_length == max_cell_length + assert formatter_custom.max_width == max_width + assert formatter_custom.max_height == max_height + assert formatter_custom.max_memory_bytes == max_memory_bytes + assert formatter_custom.min_rows_display == min_rows_display + assert formatter_custom.repr_rows == repr_rows + assert formatter_custom.enable_cell_expansion == enable_cell_expansion + assert formatter_custom.show_truncation_message == show_truncation_message + assert formatter_custom.use_shared_styles == use_shared_styles + + +def test_configure_formatter_invalid_params(clean_formatter_state): + """Test that configure_formatter rejects invalid parameters.""" + with pytest.raises(ValueError, match="Invalid formatter parameters"): + configure_formatter(invalid_param=123) + + # Test with multiple parameters, one valid and one invalid + with pytest.raises(ValueError, match="Invalid formatter parameters"): + configure_formatter(max_width=500, not_a_real_param="test") + + # Test with multiple invalid parameters + with pytest.raises(ValueError, match="Invalid formatter parameters"): + configure_formatter(fake_param1="test", fake_param2=456) + + def test_get_dataframe(tmp_path): ctx = SessionContext() @@ -1505,9 +1653,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: assert result["new_col"] == [3 for _i in range(3)] -def test_dataframe_repr_html_structure(df) -> None: +def test_dataframe_repr_html_structure(df, clean_formatter_state) -> None: """Test that DataFrame._repr_html_ produces expected HTML output structure.""" - import re output = df._repr_html_() @@ -1537,7 +1684,7 @@ def test_dataframe_repr_html_structure(df) -> None: assert len(body_matches) == 1, "Expected pattern of values not found in HTML output" -def test_dataframe_repr_html_values(df): +def test_dataframe_repr_html_values(df, clean_formatter_state): """Test that DataFrame._repr_html_ contains the expected data values.""" html = df._repr_html_() assert html is not None diff --git a/src/dataframe.rs b/src/dataframe.rs index 787f63520..211e31bd1 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -71,8 +71,103 @@ impl PyTableProvider { PyTable::new(table_provider) } } -const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB -const MIN_TABLE_ROWS_TO_DISPLAY: usize = 20; + +/// Configuration for DataFrame display formatting +#[derive(Debug, Clone)] +pub struct FormatterConfig { + /// Maximum memory in bytes to use for display (default: 2MB) + pub max_bytes: usize, + /// Minimum number of rows to display (default: 20) + pub min_rows: usize, + /// Number of rows to include in __repr__ output (default: 10) + pub repr_rows: usize, +} + +impl Default for FormatterConfig { + fn default() -> Self { + Self { + max_bytes: 2 * 1024 * 1024, // 2MB + min_rows: 20, + repr_rows: 10, + } + } +} + +impl FormatterConfig { + /// Validates that all configuration values are positive integers. + /// + /// # Returns + /// + /// `Ok(())` if all values are valid, or an `Err` with a descriptive error message. + pub fn validate(&self) -> Result<(), String> { + if self.max_bytes == 0 { + return Err("max_bytes must be a positive integer".to_string()); + } + + if self.min_rows == 0 { + return Err("min_rows must be a positive integer".to_string()); + } + + if self.repr_rows == 0 { + return Err("repr_rows must be a positive integer".to_string()); + } + + Ok(()) + } +} + +/// Holds the Python formatter and its configuration +struct PythonFormatter<'py> { + /// The Python formatter object + formatter: Bound<'py, PyAny>, + /// The formatter configuration + config: FormatterConfig, +} + +/// Get the Python formatter and its configuration +fn get_python_formatter_with_config(py: Python) -> PyResult { + let formatter = import_python_formatter(py)?; + let config = build_formatter_config_from_python(&formatter)?; + Ok(PythonFormatter { formatter, config }) +} + +/// Get the Python formatter from the datafusion.html_formatter module +fn import_python_formatter(py: Python) -> PyResult> { + let formatter_module = py.import("datafusion.html_formatter")?; + let get_formatter = formatter_module.getattr("get_formatter")?; + get_formatter.call0() +} + +// Helper function to extract attributes with fallback to default +fn get_attr<'a, T>(py_object: &'a Bound<'a, PyAny>, attr_name: &str, default_value: T) -> T +where + T: for<'py> pyo3::FromPyObject<'py> + Clone, +{ + py_object + .getattr(attr_name) + .and_then(|v| v.extract::()) + .unwrap_or_else(|_| default_value.clone()) +} + +/// Helper function to create a FormatterConfig from a Python formatter object +fn build_formatter_config_from_python(formatter: &Bound<'_, PyAny>) -> PyResult { + let default_config = FormatterConfig::default(); + let max_bytes = get_attr(formatter, "max_memory_bytes", default_config.max_bytes); + let min_rows = get_attr(formatter, "min_rows_display", default_config.min_rows); + let repr_rows = get_attr(formatter, "repr_rows", default_config.repr_rows); + + let config = FormatterConfig { + max_bytes, + min_rows, + repr_rows, + }; + + // Return the validated config, converting String error to PyErr + config + .validate() + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?; + Ok(config) +} /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. @@ -114,9 +209,14 @@ impl PyDataFrame { } fn __repr__(&self, py: Python) -> PyDataFusionResult { + // Get the Python formatter config + let PythonFormatter { + formatter: _, + config, + } = get_python_formatter_with_config(py)?; let (batches, has_more) = wait_for_future( py, - collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10), + collect_record_batches_to_display(self.df.as_ref().clone(), config), )?; if batches.is_empty() { // This should not be reached, but do it for safety since we index into the vector below @@ -135,13 +235,11 @@ impl PyDataFrame { } fn _repr_html_(&self, py: Python) -> PyDataFusionResult { + // Get the Python formatter and config + let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; let (batches, has_more) = wait_for_future( py, - collect_record_batches_to_display( - self.df.as_ref().clone(), - MIN_TABLE_ROWS_TO_DISPLAY, - usize::MAX, - ), + collect_record_batches_to_display(self.df.as_ref().clone(), config), )?; if batches.is_empty() { // This should not be reached, but do it for safety since we index into the vector below @@ -158,12 +256,6 @@ impl PyDataFrame { let py_schema = self.schema().into_pyobject(py)?; - // Get the Python formatter module and call format_html - let formatter_module = py.import("datafusion.html_formatter")?; - let get_formatter = formatter_module.getattr("get_formatter")?; - let formatter = get_formatter.call0()?; - - // Call format_html method on the formatter let kwargs = pyo3::types::PyDict::new(py); let py_batches_list = PyList::new(py, py_batches.as_slice())?; kwargs.set_item("batches", py_batches_list)?; @@ -796,9 +888,14 @@ fn record_batch_into_schema( /// rows, set min_rows == max_rows. async fn collect_record_batches_to_display( df: DataFrame, - min_rows: usize, - max_rows: usize, + config: FormatterConfig, ) -> Result<(Vec, bool), DataFusionError> { + let FormatterConfig { + max_bytes, + min_rows, + repr_rows, + } = config; + let partitioned_stream = df.execute_stream_partitioned().await?; let mut stream = futures::stream::iter(partitioned_stream).flatten(); let mut size_estimate_so_far = 0; @@ -806,9 +903,8 @@ async fn collect_record_batches_to_display( let mut record_batches = Vec::default(); let mut has_more = false; - while (size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows) - || rows_so_far < min_rows - { + // ensure minimum rows even if memory/row limits are hit + while (size_estimate_so_far < max_bytes && rows_so_far < repr_rows) || rows_so_far < min_rows { let mut rb = match stream.next().await { None => { break; @@ -821,8 +917,8 @@ async fn collect_record_batches_to_display( if rows_in_rb > 0 { size_estimate_so_far += rb.get_array_memory_size(); - if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY { - let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32; + if size_estimate_so_far > max_bytes { + let ratio = max_bytes as f32 / size_estimate_so_far as f32; let total_rows = rows_in_rb + rows_so_far; let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize; @@ -838,8 +934,8 @@ async fn collect_record_batches_to_display( } } - if rows_in_rb + rows_so_far > max_rows { - rb = rb.slice(0, max_rows - rows_so_far); + if rows_in_rb + rows_so_far > repr_rows { + rb = rb.slice(0, repr_rows - rows_so_far); has_more = true; } From f3c98ec7a2eb325041530b1ae8d6de41aa558037 Mon Sep 17 00:00:00 2001 From: kosiew Date: Fri, 16 May 2025 14:34:19 +0800 Subject: [PATCH 078/248] Add fill_null method to DataFrame API for handling missing values (#1019) * feat: add fill_null method to DataFrame for handling null values * test: add coalesce function tests for handling default values * Resolve test cases for fill_null * feat: add fill_nan method to DataFrame for handling NaN values * move imports out of functions * docs: add documentation for fill_null and fill_nan methods in DataFrame * Add more tests * fix ruff errors * amend def fill_null to invoke PyDataFrame's fill_null - Implemented `fill_null` method in `dataframe.rs` to allow filling null values with a specified value for specific columns or all columns. - Added a helper function `python_value_to_scalar_value` to convert Python values to DataFusion ScalarValues, supporting various types including integers, floats, booleans, strings, and timestamps. - Updated the `count` method in `PyDataFrame` to maintain functionality. * refactor: remove fill_nan method documentation from functions.rst * refactor: remove unused import of Enum from dataframe.py * refactor: improve error handling and type extraction in python_value_to_scalar_value function * refactor: enhance datetime and date conversion logic in python_value_to_scalar_value function * refactor: streamline type extraction in python_value_to_scalar_value function * fix try_convert_to_string * refactor: improve type handling in python_value_to_scalar_value function * refactor: move py_obj_to_scalar_value function to utils module * refactor: update fill_null to use py_obj_to_scalar_value from utils * Remove python_object_to_scalar_value code * refactor: enhance py_obj_to_scalar_value to utilize PyArrow for complex type conversion * refactor: update py_obj_to_scalar_value to handle errors and use extract_bound for PyArrow scalar conversion * refactor: modify py_obj_to_scalar_value to return ScalarValue directly and streamline error handling * refactor: update py_obj_to_scalar_value to return a Result for better error handling * test: add tests for fill_null functionality in DataFrame with null values * test: enhance null DataFrame tests to include date32 and date64 columns * refactor: simplify py_obj_to_scalar_value by removing direct extraction of basic types * refactor: remove unnecessary documentation from py_obj_to_scalar_value function * Fix ruff errors * test: update datetime handling in coalesce tests to include timezone information * Fix ruff errors * trigger ci --- .../common-operations/functions.rst | 21 ++ python/datafusion/dataframe.py | 26 +- python/tests/test_dataframe.py | 266 ++++++++++++++++++ python/tests/test_functions.py | 61 ++++ src/config.rs | 21 +- src/dataframe.rs | 23 +- src/utils.rs | 18 ++ 7 files changed, 414 insertions(+), 22 deletions(-) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index 12097be8f..d458d3eb0 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -129,3 +129,24 @@ The function :py:func:`~datafusion.functions.in_list` allows to check a column f .limit(20) .to_pandas() ) + + +Handling Missing Values +===================== + +DataFusion provides methods to handle missing values in DataFrames: + +fill_null +--------- + +The ``fill_null()`` method replaces NULL values in specified columns with a provided value: + +.. code-block:: python + + # Fill all NULL values with 0 where possible + df = df.fill_null(0) + + # Fill NULL values only in specific string columns + df = df.fill_null("missing", subset=["name", "category"]) + +The fill value will be cast to match each column's type. If casting fails for a column, that column remains unchanged. diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 26fe8f453..a1df7e080 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -37,6 +37,8 @@ except ImportError: from typing_extensions import deprecated # Python 3.12 +from datafusion._internal import DataFrame as DataFrameInternal +from datafusion.expr import Expr, SortExpr, sort_or_default from datafusion.plan import ExecutionPlan, LogicalPlan from datafusion.record_batch import RecordBatchStream @@ -53,8 +55,6 @@ from enum import Enum -from datafusion.expr import Expr, SortExpr, sort_or_default - # excerpt from deltalake # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 @@ -869,3 +869,25 @@ def within_limit(df: DataFrame, limit: int) -> DataFrame: DataFrame: After applying func to the original dataframe. """ return func(self, *args) + + def fill_null(self, value: Any, subset: list[str] | None = None) -> DataFrame: + """Fill null values in specified columns with a value. + + Args: + value: Value to replace nulls with. Will be cast to match column type. + subset: Optional list of column names to fill. If None, fills all columns. + + Returns: + DataFrame with null values replaced where type casting is possible + + Examples: + >>> df = df.fill_null(0) # Fill all nulls with 0 where possible + >>> # Fill nulls in specific string columns + >>> df = df.fill_null("missing", subset=["name", "category"]) + + Notes: + - Only fills nulls in columns where the value can be cast to the column type + - For columns where casting fails, the original column is kept unchanged + - For columns not in subset, the original column is kept unchanged + """ + return DataFrame(self.df.fill_null(value, subset)) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index e01308c86..dd5f962b2 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import datetime import os import re from typing import Any @@ -119,6 +120,38 @@ def clean_formatter_state(): reset_formatter() +@pytest.fixture +def null_df(): + """Create a DataFrame with null values of different types.""" + ctx = SessionContext() + + # Create a RecordBatch with nulls across different types + batch = pa.RecordBatch.from_arrays( + [ + pa.array([1, None, 3, None], type=pa.int64()), + pa.array([4.5, 6.7, None, None], type=pa.float64()), + pa.array(["a", None, "c", None], type=pa.string()), + pa.array([True, None, False, None], type=pa.bool_()), + pa.array( + [10957, None, 18993, None], type=pa.date32() + ), # 2000-01-01, null, 2022-01-01, null + pa.array( + [946684800000, None, 1640995200000, None], type=pa.date64() + ), # 2000-01-01, null, 2022-01-01, null + ], + names=[ + "int_col", + "float_col", + "str_col", + "bool_col", + "date32_col", + "date64_col", + ], + ) + + return ctx.create_dataframe([[batch]]) + + # custom style for testing with html formatter class CustomStyleProvider: def get_cell_style(self) -> str: @@ -1794,3 +1827,236 @@ def test_html_formatter_manual_format_html(clean_formatter_state): assert "") + return html + + def _build_table_container_start(self) -> list[str]: + """Build the opening tags for the table container.""" + html = [] + html.append( + f'
' + ) + html.append('') + return html + + def _build_table_header(self, schema: Any) -> list[str]: + """Build the HTML table header with column names.""" + html = [] + html.append("") + html.append("") + for field in schema: + if self._custom_header_builder: + html.append(self._custom_header_builder(field)) + else: + html.append( + f"" + ) + html.append("") + html.append("") + return html + + def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: + """Build the HTML table body with data rows.""" + html = [] + html.append("") + + row_count = 0 + for batch in batches: + for row_idx in range(batch.num_rows): + row_count += 1 + html.append("") + + for col_idx, column in enumerate(batch.columns): + # Get the raw value from the column + raw_value = self._get_cell_value(column, row_idx) + + # Always check for type formatters first to format the value + formatted_value = self._format_cell_value(raw_value) + + # Then apply either custom cell builder or standard cell formatting + if self._custom_cell_builder: + # Pass both the raw value and formatted value to let the + # builder decide + cell_html = self._custom_cell_builder( + raw_value, row_count, col_idx, table_uuid + ) + html.append(cell_html) + else: + # Standard cell formatting with formatted value + if ( + len(str(raw_value)) > self.max_cell_length + and self.enable_cell_expansion + ): + cell_html = self._build_expandable_cell( + formatted_value, row_count, col_idx, table_uuid + ) + else: + cell_html = self._build_regular_cell(formatted_value) + html.append(cell_html) + + html.append("") + + html.append("") + return html + + def _get_cell_value(self, column: Any, row_idx: int) -> Any: + """Extract a cell value from a column. + + Args: + column: Arrow array + row_idx: Row index + + Returns: + The raw cell value + """ + try: + value = column[row_idx] + + if hasattr(value, "as_py"): + return value.as_py() + except (AttributeError, TypeError): + pass + else: + return value + + def _format_cell_value(self, value: Any) -> str: + """Format a cell value for display. + + Uses registered type formatters if available. + + Args: + value: The cell value to format + + Returns: + Formatted cell value as string + """ + # Check for custom type formatters + for type_cls, formatter in self._type_formatters.items(): + if isinstance(value, type_cls): + return formatter(value) + + # If no formatter matched, return string representation + return str(value) + + def _build_expandable_cell( + self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str + ) -> str: + """Build an expandable cell for long content.""" + short_value = str(formatted_value)[: self.max_cell_length] + return ( + f"" + ) + + def _build_regular_cell(self, formatted_value: str) -> str: + """Build a regular table cell.""" + return ( + f"" + ) + + def _build_html_footer(self, has_more: bool) -> list[str]: + """Build the HTML footer with JavaScript and messages.""" + html = [] + + # Add JavaScript for interactivity only if cell expansion is enabled + # and we're not using the shared styles approach + if self.enable_cell_expansion and not self.use_shared_styles: + html.append(self._get_javascript()) + + # Add truncation message if needed + if has_more and self.show_truncation_message: + html.append("
Data truncated due to size.
") + + return html + + def _get_default_css(self) -> str: + """Get default CSS styles for the HTML table.""" + return """ + .expandable-container { + display: inline-block; + max-width: 200px; + } + .expandable { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + display: block; + } + .full-text { + display: none; + white-space: normal; + } + .expand-btn { + cursor: pointer; + color: blue; + text-decoration: underline; + border: none; + background: none; + font-size: inherit; + display: block; + margin-top: 5px; + } + """ + + def _get_javascript(self) -> str: + """Get JavaScript code for interactive elements.""" + return """ + + """ + + +class FormatterManager: + """Manager class for the global DataFrame HTML formatter instance.""" + + _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() + + @classmethod + def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + """ + cls._default_formatter = formatter + _refresh_formatter_reference() + + @classmethod + def get_formatter(cls) -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + Returns: + The global HTML formatter instance + """ + return cls._default_formatter + + +def get_formatter() -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + This function is used by the DataFrame._repr_html_ implementation to access + the shared formatter instance. It can also be used directly when custom + HTML rendering is needed. + + Returns: + The global HTML formatter instance + + Example: + >>> from datafusion.html_formatter import get_formatter + >>> formatter = get_formatter() + >>> formatter.max_cell_length = 50 # Increase cell length + """ + return FormatterManager.get_formatter() + + +def set_formatter(formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + + Example: + >>> from datafusion.html_formatter import get_formatter, set_formatter + >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) + >>> set_formatter(custom_formatter) + """ + FormatterManager.set_formatter(formatter) + + +def configure_formatter(**kwargs: Any) -> None: + """Configure the global DataFrame HTML formatter. + + This function creates a new formatter with the provided configuration + and sets it as the global formatter for all DataFrames. + + Args: + **kwargs: Formatter configuration parameters like max_cell_length, + max_width, max_height, enable_cell_expansion, etc. + + Raises: + ValueError: If any invalid parameters are provided + + Example: + >>> from datafusion.html_formatter import configure_formatter + >>> configure_formatter( + ... max_cell_length=50, + ... max_height=500, + ... enable_cell_expansion=True, + ... use_shared_styles=True + ... ) + """ + # Valid parameters accepted by DataFrameHtmlFormatter + valid_params = { + "max_cell_length", + "max_width", + "max_height", + "max_memory_bytes", + "min_rows_display", + "repr_rows", + "enable_cell_expansion", + "custom_css", + "show_truncation_message", + "style_provider", + "use_shared_styles", + } + + # Check for invalid parameters + invalid_params = set(kwargs) - valid_params + if invalid_params: + msg = ( + f"Invalid formatter parameters: {', '.join(invalid_params)}. " + f"Valid parameters are: {', '.join(valid_params)}" + ) + raise ValueError(msg) + + # Create and set formatter with validated parameters + set_formatter(DataFrameHtmlFormatter(**kwargs)) + + +def reset_formatter() -> None: + """Reset the global DataFrame HTML formatter to default settings. + + This function creates a new formatter with default configuration + and sets it as the global formatter for all DataFrames. + + Example: + >>> from datafusion.html_formatter import reset_formatter + >>> reset_formatter() # Reset formatter to default settings + """ + formatter = DataFrameHtmlFormatter() + # Reset the styles_loaded flag to ensure styles will be reloaded + DataFrameHtmlFormatter._styles_loaded = False + set_formatter(formatter) + + +def reset_styles_loaded_state() -> None: + """Reset the styles loaded state to force reloading of styles. + + This can be useful when switching between notebook sessions or + when styles need to be refreshed. + + Example: + >>> from datafusion.html_formatter import reset_styles_loaded_state + >>> reset_styles_loaded_state() # Force styles to reload in next render + """ + DataFrameHtmlFormatter._styles_loaded = False + + +def _refresh_formatter_reference() -> None: + """Refresh formatter reference in any modules using it. + + This helps ensure that changes to the formatter are reflected in existing + DataFrames that might be caching the formatter reference. + """ + # This is a no-op but signals modules to refresh their reference diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py index 12a7e4553..65eb1f042 100644 --- a/python/datafusion/html_formatter.py +++ b/python/datafusion/html_formatter.py @@ -14,698 +14,16 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""HTML formatting utilities for DataFusion DataFrames.""" -from __future__ import annotations +"""Deprecated module for dataframe formatting.""" -from typing import ( - Any, - Callable, - Optional, - Protocol, - runtime_checkable, -) - - -def _validate_positive_int(value: Any, param_name: str) -> None: - """Validate that a parameter is a positive integer. - - Args: - value: The value to validate - param_name: Name of the parameter (used in error message) - - Raises: - ValueError: If the value is not a positive integer - """ - if not isinstance(value, int) or value <= 0: - msg = f"{param_name} must be a positive integer" - raise ValueError(msg) - - -def _validate_bool(value: Any, param_name: str) -> None: - """Validate that a parameter is a boolean. - - Args: - value: The value to validate - param_name: Name of the parameter (used in error message) - - Raises: - TypeError: If the value is not a boolean - """ - if not isinstance(value, bool): - msg = f"{param_name} must be a boolean" - raise TypeError(msg) - - -@runtime_checkable -class CellFormatter(Protocol): - """Protocol for cell value formatters.""" - - def __call__(self, value: Any) -> str: - """Format a cell value to string representation.""" - ... - - -@runtime_checkable -class StyleProvider(Protocol): - """Protocol for HTML style providers.""" - - def get_cell_style(self) -> str: - """Get the CSS style for table cells.""" - ... - - def get_header_style(self) -> str: - """Get the CSS style for header cells.""" - ... - - -class DefaultStyleProvider: - """Default implementation of StyleProvider.""" - - def get_cell_style(self) -> str: - """Get the CSS style for table cells. - - Returns: - CSS style string - """ - return ( - "border: 1px solid black; padding: 8px; text-align: left; " - "white-space: nowrap;" - ) - - def get_header_style(self) -> str: - """Get the CSS style for header cells. - - Returns: - CSS style string - """ - return ( - "border: 1px solid black; padding: 8px; text-align: left; " - "background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; " - "max-width: fit-content;" - ) - - -class DataFrameHtmlFormatter: - """Configurable HTML formatter for DataFusion DataFrames. - - This class handles the HTML rendering of DataFrames for display in - Jupyter notebooks and other rich display contexts. - - This class supports extension through composition. Key extension points: - - Provide a custom StyleProvider for styling cells and headers - - Register custom formatters for specific types - - Provide custom cell builders for specialized cell rendering - - Args: - max_cell_length: Maximum characters to display in a cell before truncation - max_width: Maximum width of the HTML table in pixels - max_height: Maximum height of the HTML table in pixels - max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) - min_rows_display: Minimum number of rows to display - repr_rows: Default number of rows to display in repr output - enable_cell_expansion: Whether to add expand/collapse buttons for long cell - values - custom_css: Additional CSS to include in the HTML output - show_truncation_message: Whether to display a message when data is truncated - style_provider: Custom provider for cell and header styles - use_shared_styles: Whether to load styles and scripts only once per notebook - session - """ - - # Class variable to track if styles have been loaded in the notebook - _styles_loaded = False - - def __init__( - self, - max_cell_length: int = 25, - max_width: int = 1000, - max_height: int = 300, - max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB - min_rows_display: int = 20, - repr_rows: int = 10, - enable_cell_expansion: bool = True, - custom_css: Optional[str] = None, - show_truncation_message: bool = True, - style_provider: Optional[StyleProvider] = None, - use_shared_styles: bool = True, - ) -> None: - """Initialize the HTML formatter. - - Parameters - ---------- - max_cell_length : int, default 25 - Maximum length of cell content before truncation. - max_width : int, default 1000 - Maximum width of the displayed table in pixels. - max_height : int, default 300 - Maximum height of the displayed table in pixels. - max_memory_bytes : int, default 2097152 (2MB) - Maximum memory in bytes for rendered data. - min_rows_display : int, default 20 - Minimum number of rows to display. - repr_rows : int, default 10 - Default number of rows to display in repr output. - enable_cell_expansion : bool, default True - Whether to allow cells to expand when clicked. - custom_css : str, optional - Custom CSS to apply to the HTML table. - show_truncation_message : bool, default True - Whether to show a message indicating that content has been truncated. - style_provider : StyleProvider, optional - Provider of CSS styles for the HTML table. If None, DefaultStyleProvider - is used. - use_shared_styles : bool, default True - Whether to use shared styles across multiple tables. - - Raises: - ------ - ValueError - If max_cell_length, max_width, max_height, max_memory_bytes, - min_rows_display, or repr_rows is not a positive integer. - TypeError - If enable_cell_expansion, show_truncation_message, or use_shared_styles is - not a boolean, - or if custom_css is provided but is not a string, - or if style_provider is provided but does not implement the StyleProvider - protocol. - """ - # Validate numeric parameters - _validate_positive_int(max_cell_length, "max_cell_length") - _validate_positive_int(max_width, "max_width") - _validate_positive_int(max_height, "max_height") - _validate_positive_int(max_memory_bytes, "max_memory_bytes") - _validate_positive_int(min_rows_display, "min_rows_display") - _validate_positive_int(repr_rows, "repr_rows") - - # Validate boolean parameters - _validate_bool(enable_cell_expansion, "enable_cell_expansion") - _validate_bool(show_truncation_message, "show_truncation_message") - _validate_bool(use_shared_styles, "use_shared_styles") - - # Validate custom_css - if custom_css is not None and not isinstance(custom_css, str): - msg = "custom_css must be None or a string" - raise TypeError(msg) - - # Validate style_provider - if style_provider is not None and not isinstance(style_provider, StyleProvider): - msg = "style_provider must implement the StyleProvider protocol" - raise TypeError(msg) - - self.max_cell_length = max_cell_length - self.max_width = max_width - self.max_height = max_height - self.max_memory_bytes = max_memory_bytes - self.min_rows_display = min_rows_display - self.repr_rows = repr_rows - self.enable_cell_expansion = enable_cell_expansion - self.custom_css = custom_css - self.show_truncation_message = show_truncation_message - self.style_provider = style_provider or DefaultStyleProvider() - self.use_shared_styles = use_shared_styles - # Registry for custom type formatters - self._type_formatters: dict[type, CellFormatter] = {} - # Custom cell builders - self._custom_cell_builder: Optional[Callable[[Any, int, int, str], str]] = None - self._custom_header_builder: Optional[Callable[[Any], str]] = None - - def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: - """Register a custom formatter for a specific data type. - - Args: - type_class: The type to register a formatter for - formatter: Function that takes a value of the given type and returns - a formatted string - """ - self._type_formatters[type_class] = formatter - - def set_custom_cell_builder( - self, builder: Callable[[Any, int, int, str], str] - ) -> None: - """Set a custom cell builder function. - - Args: - builder: Function that takes (value, row, col, table_id) and returns HTML - """ - self._custom_cell_builder = builder - - def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: - """Set a custom header builder function. - - Args: - builder: Function that takes a field and returns HTML - """ - self._custom_header_builder = builder - - @classmethod - def is_styles_loaded(cls) -> bool: - """Check if HTML styles have been loaded in the current session. - - This method is primarily intended for debugging UI rendering issues - related to style loading. - - Returns: - True if styles have been loaded, False otherwise - - Example: - >>> from datafusion.html_formatter import DataFrameHtmlFormatter - >>> DataFrameHtmlFormatter.is_styles_loaded() - False - """ - return cls._styles_loaded - - def format_html( - self, - batches: list, - schema: Any, - has_more: bool = False, - table_uuid: str | None = None, - ) -> str: - """Format record batches as HTML. - - This method is used by DataFrame's _repr_html_ implementation and can be - called directly when custom HTML rendering is needed. - - Args: - batches: List of Arrow RecordBatch objects - schema: Arrow Schema object - has_more: Whether there are more batches not shown - table_uuid: Unique ID for the table, used for JavaScript interactions - - Returns: - HTML string representation of the data - - Raises: - TypeError: If schema is invalid and no batches are provided - """ - if not batches: - return "No data to display" - - # Validate schema - if schema is None or not hasattr(schema, "__iter__"): - msg = "Schema must be provided" - raise TypeError(msg) - - # Generate a unique ID if none provided - table_uuid = table_uuid or f"df-{id(batches)}" - - # Build HTML components - html = [] - - # Only include styles and scripts if: - # 1. Not using shared styles, OR - # 2. Using shared styles but they haven't been loaded yet - include_styles = ( - not self.use_shared_styles or not DataFrameHtmlFormatter._styles_loaded - ) - - if include_styles: - html.extend(self._build_html_header()) - # If we're using shared styles, mark them as loaded - if self.use_shared_styles: - DataFrameHtmlFormatter._styles_loaded = True - - html.extend(self._build_table_container_start()) - - # Add table header and body - html.extend(self._build_table_header(schema)) - html.extend(self._build_table_body(batches, table_uuid)) - - html.append("
" + f"{field.name}
" + f"
" + "" + "" + f"{formatted_value}" + f"" + f"
" + f"
{formatted_value}
") - html.append("
") - - # Add footer (JavaScript and messages) - if include_styles and self.enable_cell_expansion: - html.append(self._get_javascript()) - - # Always add truncation message if needed (independent of styles) - if has_more and self.show_truncation_message: - html.append("
Data truncated due to size.
") - - return "\n".join(html) - - def _build_html_header(self) -> list[str]: - """Build the HTML header with CSS styles.""" - html = [] - html.append("") - return html +import warnings - def _build_table_container_start(self) -> list[str]: - """Build the opening tags for the table container.""" - html = [] - html.append( - f'
' - ) - html.append('') - return html +from datafusion.dataframe_formatter import * # noqa: F403 - def _build_table_header(self, schema: Any) -> list[str]: - """Build the HTML table header with column names.""" - html = [] - html.append("") - html.append("") - for field in schema: - if self._custom_header_builder: - html.append(self._custom_header_builder(field)) - else: - html.append( - f"" - ) - html.append("") - html.append("") - return html - - def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: - """Build the HTML table body with data rows.""" - html = [] - html.append("") - - row_count = 0 - for batch in batches: - for row_idx in range(batch.num_rows): - row_count += 1 - html.append("") - - for col_idx, column in enumerate(batch.columns): - # Get the raw value from the column - raw_value = self._get_cell_value(column, row_idx) - - # Always check for type formatters first to format the value - formatted_value = self._format_cell_value(raw_value) - - # Then apply either custom cell builder or standard cell formatting - if self._custom_cell_builder: - # Pass both the raw value and formatted value to let the - # builder decide - cell_html = self._custom_cell_builder( - raw_value, row_count, col_idx, table_uuid - ) - html.append(cell_html) - else: - # Standard cell formatting with formatted value - if ( - len(str(raw_value)) > self.max_cell_length - and self.enable_cell_expansion - ): - cell_html = self._build_expandable_cell( - formatted_value, row_count, col_idx, table_uuid - ) - else: - cell_html = self._build_regular_cell(formatted_value) - html.append(cell_html) - - html.append("") - - html.append("") - return html - - def _get_cell_value(self, column: Any, row_idx: int) -> Any: - """Extract a cell value from a column. - - Args: - column: Arrow array - row_idx: Row index - - Returns: - The raw cell value - """ - try: - value = column[row_idx] - - if hasattr(value, "as_py"): - return value.as_py() - except (AttributeError, TypeError): - pass - else: - return value - - def _format_cell_value(self, value: Any) -> str: - """Format a cell value for display. - - Uses registered type formatters if available. - - Args: - value: The cell value to format - - Returns: - Formatted cell value as string - """ - # Check for custom type formatters - for type_cls, formatter in self._type_formatters.items(): - if isinstance(value, type_cls): - return formatter(value) - - # If no formatter matched, return string representation - return str(value) - - def _build_expandable_cell( - self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str - ) -> str: - """Build an expandable cell for long content.""" - short_value = str(formatted_value)[: self.max_cell_length] - return ( - f"" - ) - - def _build_regular_cell(self, formatted_value: str) -> str: - """Build a regular table cell.""" - return ( - f"" - ) - - def _build_html_footer(self, has_more: bool) -> list[str]: - """Build the HTML footer with JavaScript and messages.""" - html = [] - - # Add JavaScript for interactivity only if cell expansion is enabled - # and we're not using the shared styles approach - if self.enable_cell_expansion and not self.use_shared_styles: - html.append(self._get_javascript()) - - # Add truncation message if needed - if has_more and self.show_truncation_message: - html.append("
Data truncated due to size.
") - - return html - - def _get_default_css(self) -> str: - """Get default CSS styles for the HTML table.""" - return """ - .expandable-container { - display: inline-block; - max-width: 200px; - } - .expandable { - white-space: nowrap; - overflow: hidden; - text-overflow: ellipsis; - display: block; - } - .full-text { - display: none; - white-space: normal; - } - .expand-btn { - cursor: pointer; - color: blue; - text-decoration: underline; - border: none; - background: none; - font-size: inherit; - display: block; - margin-top: 5px; - } - """ - - def _get_javascript(self) -> str: - """Get JavaScript code for interactive elements.""" - return """ - - """ - - -class FormatterManager: - """Manager class for the global DataFrame HTML formatter instance.""" - - _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() - - @classmethod - def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: - """Set the global DataFrame HTML formatter. - - Args: - formatter: The formatter instance to use globally - """ - cls._default_formatter = formatter - _refresh_formatter_reference() - - @classmethod - def get_formatter(cls) -> DataFrameHtmlFormatter: - """Get the current global DataFrame HTML formatter. - - Returns: - The global HTML formatter instance - """ - return cls._default_formatter - - -def get_formatter() -> DataFrameHtmlFormatter: - """Get the current global DataFrame HTML formatter. - - This function is used by the DataFrame._repr_html_ implementation to access - the shared formatter instance. It can also be used directly when custom - HTML rendering is needed. - - Returns: - The global HTML formatter instance - - Example: - >>> from datafusion.html_formatter import get_formatter - >>> formatter = get_formatter() - >>> formatter.max_cell_length = 50 # Increase cell length - """ - return FormatterManager.get_formatter() - - -def set_formatter(formatter: DataFrameHtmlFormatter) -> None: - """Set the global DataFrame HTML formatter. - - Args: - formatter: The formatter instance to use globally - - Example: - >>> from datafusion.html_formatter import get_formatter, set_formatter - >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) - >>> set_formatter(custom_formatter) - """ - FormatterManager.set_formatter(formatter) - - -def configure_formatter(**kwargs: Any) -> None: - """Configure the global DataFrame HTML formatter. - - This function creates a new formatter with the provided configuration - and sets it as the global formatter for all DataFrames. - - Args: - **kwargs: Formatter configuration parameters like max_cell_length, - max_width, max_height, enable_cell_expansion, etc. - - Raises: - ValueError: If any invalid parameters are provided - - Example: - >>> from datafusion.html_formatter import configure_formatter - >>> configure_formatter( - ... max_cell_length=50, - ... max_height=500, - ... enable_cell_expansion=True, - ... use_shared_styles=True - ... ) - """ - # Valid parameters accepted by DataFrameHtmlFormatter - valid_params = { - "max_cell_length", - "max_width", - "max_height", - "max_memory_bytes", - "min_rows_display", - "repr_rows", - "enable_cell_expansion", - "custom_css", - "show_truncation_message", - "style_provider", - "use_shared_styles", - } - - # Check for invalid parameters - invalid_params = set(kwargs) - valid_params - if invalid_params: - msg = ( - f"Invalid formatter parameters: {', '.join(invalid_params)}. " - f"Valid parameters are: {', '.join(valid_params)}" - ) - raise ValueError(msg) - - # Create and set formatter with validated parameters - set_formatter(DataFrameHtmlFormatter(**kwargs)) - - -def reset_formatter() -> None: - """Reset the global DataFrame HTML formatter to default settings. - - This function creates a new formatter with default configuration - and sets it as the global formatter for all DataFrames. - - Example: - >>> from datafusion.html_formatter import reset_formatter - >>> reset_formatter() # Reset formatter to default settings - """ - formatter = DataFrameHtmlFormatter() - # Reset the styles_loaded flag to ensure styles will be reloaded - DataFrameHtmlFormatter._styles_loaded = False - set_formatter(formatter) - - -def reset_styles_loaded_state() -> None: - """Reset the styles loaded state to force reloading of styles. - - This can be useful when switching between notebook sessions or - when styles need to be refreshed. - - Example: - >>> from datafusion.html_formatter import reset_styles_loaded_state - >>> reset_styles_loaded_state() # Force styles to reload in next render - """ - DataFrameHtmlFormatter._styles_loaded = False - - -def _refresh_formatter_reference() -> None: - """Refresh formatter reference in any modules using it. - - This helps ensure that changes to the formatter are reflected in existing - DataFrames that might be caching the formatter reference. - """ - # This is a no-op but signals modules to refresh their reference +warnings.warn( + "The module 'html_formatter' is deprecated and will be removed in the next release." + "Please use 'dataframe_formatter' instead.", + DeprecationWarning, + stacklevel=3, +) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index deaa30b3d..c9ae38d8e 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -37,14 +37,14 @@ from datafusion import ( functions as f, ) -from datafusion.expr import Window -from datafusion.html_formatter import ( +from datafusion.dataframe_formatter import ( DataFrameHtmlFormatter, configure_formatter, get_formatter, reset_formatter, reset_styles_loaded_state, ) +from datafusion.expr import Window from pyarrow.csv import write_csv MB = 1024 * 1024 diff --git a/src/dataframe.rs b/src/dataframe.rs index 3d68db279..c2ad4771e 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -24,6 +24,7 @@ use arrow::compute::can_cast_types; use arrow::error::ArrowError; use arrow::ffi::FFI_ArrowSchema; use arrow::ffi_stream::FFI_ArrowArrayStream; +use arrow::pyarrow::FromPyArrow; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::arrow::util::pretty; @@ -150,9 +151,9 @@ fn get_python_formatter_with_config(py: Python) -> PyResult { Ok(PythonFormatter { formatter, config }) } -/// Get the Python formatter from the datafusion.html_formatter module +/// Get the Python formatter from the datafusion.dataframe_formatter module fn import_python_formatter(py: Python) -> PyResult> { - let formatter_module = py.import("datafusion.html_formatter")?; + let formatter_module = py.import("datafusion.dataframe_formatter")?; let get_formatter = formatter_module.getattr("get_formatter")?; get_formatter.call0() } @@ -295,6 +296,46 @@ impl PyDataFrame { pub fn new(df: DataFrame) -> Self { Self { df: Arc::new(df) } } + + fn prepare_repr_string(&self, py: Python, as_html: bool) -> PyDataFusionResult { + // Get the Python formatter and config + let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; + let (batches, has_more) = wait_for_future( + py, + collect_record_batches_to_display(self.df.as_ref().clone(), config), + )??; + if batches.is_empty() { + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); + } + + let table_uuid = uuid::Uuid::new_v4().to_string(); + + // Convert record batches to PyObject list + let py_batches = batches + .into_iter() + .map(|rb| rb.to_pyarrow(py)) + .collect::>>()?; + + let py_schema = self.schema().into_pyobject(py)?; + + let kwargs = pyo3::types::PyDict::new(py); + let py_batches_list = PyList::new(py, py_batches.as_slice())?; + kwargs.set_item("batches", py_batches_list)?; + kwargs.set_item("schema", py_schema)?; + kwargs.set_item("has_more", has_more)?; + kwargs.set_item("table_uuid", table_uuid)?; + + let method_name = match as_html { + true => "format_html", + false => "format_str", + }; + + let html_result = formatter.call_method(method_name, (), Some(&kwargs))?; + let html_str: String = html_result.extract()?; + + Ok(html_str) + } } #[pymethods] @@ -321,18 +362,27 @@ impl PyDataFrame { } fn __repr__(&self, py: Python) -> PyDataFusionResult { - // Get the Python formatter config - let PythonFormatter { - formatter: _, - config, - } = get_python_formatter_with_config(py)?; - let (batches, has_more) = wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??; + self.prepare_repr_string(py, false) + } + + #[staticmethod] + #[expect(unused_variables)] + fn default_str_repr<'py>( + batches: Vec>, + schema: &Bound<'py, PyAny>, + has_more: bool, + table_uuid: &str, + ) -> PyResult { + let batches = batches + .into_iter() + .map(|batch| RecordBatch::from_pyarrow_bound(&batch)) + .collect::>>()? + .into_iter() + .filter(|batch| batch.num_rows() > 0) + .collect::>(); + if batches.is_empty() { - // This should not be reached, but do it for safety since we index into the vector below - return Ok("No data to display".to_string()); + return Ok("No data to display".to_owned()); } let batches_as_displ = @@ -347,38 +397,7 @@ impl PyDataFrame { } fn _repr_html_(&self, py: Python) -> PyDataFusionResult { - // Get the Python formatter and config - let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; - let (batches, has_more) = wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??; - if batches.is_empty() { - // This should not be reached, but do it for safety since we index into the vector below - return Ok("No data to display".to_string()); - } - - let table_uuid = uuid::Uuid::new_v4().to_string(); - - // Convert record batches to PyObject list - let py_batches = batches - .into_iter() - .map(|rb| rb.to_pyarrow(py)) - .collect::>>()?; - - let py_schema = self.schema().into_pyobject(py)?; - - let kwargs = pyo3::types::PyDict::new(py); - let py_batches_list = PyList::new(py, py_batches.as_slice())?; - kwargs.set_item("batches", py_batches_list)?; - kwargs.set_item("schema", py_schema)?; - kwargs.set_item("has_more", has_more)?; - kwargs.set_item("table_uuid", table_uuid)?; - - let html_result = formatter.call_method("format_html", (), Some(&kwargs))?; - let html_str: String = html_result.extract()?; - - Ok(html_str) + self.prepare_repr_string(py, true) } /// Calculate summary statistics for a DataFrame From 954563429384078a9e85c56ad553c7e3be7ac52a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 25 Jun 2025 11:29:35 -0400 Subject: [PATCH 094/248] feat: collect once during display() in jupyter notebooks (#1167) * Only collect one time during display() in jupyter notebooks * Check for juypter notebook environment specifically * Remove approach of checking environment which could not differentiate between jupyter console and notebook * Instead of trying to detect notebook vs console, collect one time when we have any kind if ipython environment. --- src/dataframe.rs | 36 ++++++++++++++++++++++++++---------- src/utils.rs | 11 +++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index c2ad4771e..ab4749e35 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -51,7 +51,7 @@ use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; use crate::utils::{ - get_tokio_runtime, py_obj_to_scalar_value, validate_pycapsule, wait_for_future, + get_tokio_runtime, is_ipython_env, py_obj_to_scalar_value, validate_pycapsule, wait_for_future, }; use crate::{ errors::PyDataFusionResult, @@ -289,21 +289,33 @@ impl PyParquetColumnOptions { #[derive(Clone)] pub struct PyDataFrame { df: Arc, + + // In IPython environment cache batches between __repr__ and _repr_html_ calls. + batches: Option<(Vec, bool)>, } impl PyDataFrame { /// creates a new PyDataFrame pub fn new(df: DataFrame) -> Self { - Self { df: Arc::new(df) } + Self { + df: Arc::new(df), + batches: None, + } } - fn prepare_repr_string(&self, py: Python, as_html: bool) -> PyDataFusionResult { + fn prepare_repr_string(&mut self, py: Python, as_html: bool) -> PyDataFusionResult { // Get the Python formatter and config let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; - let (batches, has_more) = wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??; + + let should_cache = *is_ipython_env(py) && self.batches.is_none(); + let (batches, has_more) = match self.batches.take() { + Some(b) => b, + None => wait_for_future( + py, + collect_record_batches_to_display(self.df.as_ref().clone(), config), + )??, + }; + if batches.is_empty() { // This should not be reached, but do it for safety since we index into the vector below return Ok("No data to display".to_string()); @@ -313,7 +325,7 @@ impl PyDataFrame { // Convert record batches to PyObject list let py_batches = batches - .into_iter() + .iter() .map(|rb| rb.to_pyarrow(py)) .collect::>>()?; @@ -334,6 +346,10 @@ impl PyDataFrame { let html_result = formatter.call_method(method_name, (), Some(&kwargs))?; let html_str: String = html_result.extract()?; + if should_cache { + self.batches = Some((batches, has_more)); + } + Ok(html_str) } } @@ -361,7 +377,7 @@ impl PyDataFrame { } } - fn __repr__(&self, py: Python) -> PyDataFusionResult { + fn __repr__(&mut self, py: Python) -> PyDataFusionResult { self.prepare_repr_string(py, false) } @@ -396,7 +412,7 @@ impl PyDataFrame { Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}")) } - fn _repr_html_(&self, py: Python) -> PyDataFusionResult { + fn _repr_html_(&mut self, py: Python) -> PyDataFusionResult { self.prepare_repr_string(py, true) } diff --git a/src/utils.rs b/src/utils.rs index 90d654385..f4e121fd5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -39,6 +39,17 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) } +#[inline] +pub(crate) fn is_ipython_env(py: Python) -> &'static bool { + static IS_IPYTHON_ENV: OnceLock = OnceLock::new(); + IS_IPYTHON_ENV.get_or_init(|| { + py.import("IPython") + .and_then(|ipython| ipython.call_method0("get_ipython")) + .map(|ipython| !ipython.is_none()) + .unwrap_or(false) + }) +} + /// Utility to get the Global Datafussion CTX #[inline] pub(crate) fn get_global_ctx() -> &'static SessionContext { From 9362f53150e5423581757ed56883b3ca2c95b8a2 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 2 Jul 2025 08:08:53 -0400 Subject: [PATCH 095/248] feat: python based catalog and schema provider (#1156) * Exposing FFI to python * Exposing FFI to python * Workin progress on python catalog * Flushing out schema and catalog providers * Adding implementation of python based catalog and schema providers * Small updates after rebase * Add default in memory options for adding schema and catalogs * Add support for creating in memory catalog and schema * Update from database to schema in unit tests * xfailed label no longer applies to these unit tests * Defining abstract methods for catalog and schema providers * Working through issues between custom catalog and build in schema * Check types on schema provider to return * Add docstring * Add documentation about how to use catalog and schema providers * Re-add module to all after rebase * Minor bugfix * Clippy updates from the new rust version --------- Co-authored-by: renato2099 --- Cargo.lock | 19 + Cargo.toml | 2 + docs/source/user-guide/data-sources.rst | 56 ++ examples/datafusion-ffi-example/Cargo.lock | 1 + examples/datafusion-ffi-example/Cargo.toml | 1 + .../python/tests/_test_catalog_provider.py | 60 +++ .../src/catalog_provider.rs | 179 +++++++ examples/datafusion-ffi-example/src/lib.rs | 3 + python/datafusion/__init__.py | 1 + python/datafusion/catalog.py | 195 ++++++- python/datafusion/context.py | 24 +- python/datafusion/dataframe.py | 9 +- python/tests/test_catalog.py | 173 ++++++- python/tests/test_context.py | 40 +- python/tests/test_sql.py | 30 +- python/tests/test_substrait.py | 4 +- src/catalog.rs | 490 ++++++++++++++++-- src/common/data_type.rs | 120 ++--- src/context.rs | 61 ++- src/expr.rs | 15 +- src/expr/aggregate.rs | 2 +- src/expr/aggregate_expr.rs | 2 +- src/expr/alias.rs | 2 +- src/expr/analyze.rs | 2 +- src/expr/between.rs | 2 +- src/expr/column.rs | 2 +- src/expr/copy_to.rs | 4 +- src/expr/create_catalog.rs | 2 +- src/expr/create_catalog_schema.rs | 2 +- src/expr/create_external_table.rs | 2 +- src/expr/create_function.rs | 2 +- src/expr/create_index.rs | 2 +- src/expr/create_memory_table.rs | 2 +- src/expr/create_view.rs | 2 +- src/expr/describe_table.rs | 2 +- src/expr/distinct.rs | 5 +- src/expr/drop_catalog_schema.rs | 2 +- src/expr/drop_function.rs | 2 +- src/expr/drop_table.rs | 2 +- src/expr/drop_view.rs | 2 +- src/expr/empty_relation.rs | 2 +- src/expr/filter.rs | 2 +- src/expr/join.rs | 2 +- src/expr/like.rs | 6 +- src/expr/limit.rs | 2 +- src/expr/projection.rs | 2 +- src/expr/recursive_query.rs | 2 +- src/expr/repartition.rs | 2 +- src/expr/sort.rs | 2 +- src/expr/sort_expr.rs | 2 +- src/expr/subquery.rs | 2 +- src/expr/subquery_alias.rs | 2 +- src/expr/table_scan.rs | 2 +- src/expr/union.rs | 2 +- src/expr/unnest.rs | 2 +- src/expr/unnest_expr.rs | 2 +- src/expr/window.rs | 11 +- src/functions.rs | 2 +- src/lib.rs | 10 +- src/physical_plan.rs | 3 +- src/sql/logical.rs | 3 +- src/utils.rs | 5 +- 62 files changed, 1340 insertions(+), 258 deletions(-) create mode 100644 examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py create mode 100644 examples/datafusion-ffi-example/src/catalog_provider.rs diff --git a/Cargo.lock b/Cargo.lock index 112167cb4..a3e9336cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,6 +165,12 @@ dependencies = [ "zstd", ] +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "arrayref" version = "0.3.9" @@ -1503,6 +1509,7 @@ dependencies = [ "datafusion-proto", "datafusion-substrait", "futures", + "log", "mimalloc", "object_store", "prost", @@ -1510,6 +1517,7 @@ dependencies = [ "pyo3", "pyo3-async-runtimes", "pyo3-build-config", + "pyo3-log", "tokio", "url", "uuid", @@ -2953,6 +2961,17 @@ dependencies = [ "pyo3-build-config", ] +[[package]] +name = "pyo3-log" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45192e5e4a4d2505587e27806c7b710c231c40c56f3bfc19535d0bb25df52264" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + [[package]] name = "pyo3-macros" version = "0.24.2" diff --git a/Cargo.toml b/Cargo.toml index 4135e64e2..1f7895a50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.45", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]} +pyo3-log = "0.12.4" arrow = { version = "55.1.0", features = ["pyarrow"] } datafusion = { version = "48.0.0", features = ["avro", "unicode_expressions"] } datafusion-substrait = { version = "48.0.0", optional = true } @@ -49,6 +50,7 @@ async-trait = "0.1.88" futures = "0.3" object_store = { version = "0.12.1", features = ["aws", "gcp", "azure", "http"] } url = "2" +log = "0.4.27" [build-dependencies] prost-types = "0.13.1" # keep in line with `datafusion-substrait` diff --git a/docs/source/user-guide/data-sources.rst b/docs/source/user-guide/data-sources.rst index ba5967c97..9c95d58e0 100644 --- a/docs/source/user-guide/data-sources.rst +++ b/docs/source/user-guide/data-sources.rst @@ -185,3 +185,59 @@ the interface as describe in the :ref:`Custom Table Provider `_ is provided in the DataFusion repository. + +Catalog +======= + +A common technique for organizing tables is using a three level hierarchical approach. DataFusion +supports this form of organizing using the :py:class:`~datafusion.catalog.Catalog`, +:py:class:`~datafusion.catalog.Schema`, and :py:class:`~datafusion.catalog.Table`. By default, +a :py:class:`~datafusion.context.SessionContext` comes with a single Catalog and a single Schema +with the names ``datafusion`` and ``default``, respectively. + +The default implementation uses an in-memory approach to the catalog and schema. We have support +for adding additional in-memory catalogs and schemas. This can be done like in the following +example: + +.. code-block:: python + + from datafusion.catalog import Catalog, Schema + + my_catalog = Catalog.memory_catalog() + my_schema = Schema.memory_schema() + + my_catalog.register_schema("my_schema_name", my_schema) + + ctx.register_catalog("my_catalog_name", my_catalog) + +You could then register tables in ``my_schema`` and access them either through the DataFrame +API or via sql commands such as ``"SELECT * from my_catalog_name.my_schema_name.my_table"``. + +User Defined Catalog and Schema +------------------------------- + +If the in-memory catalogs are insufficient for your uses, there are two approaches you can take +to implementing a custom catalog and/or schema. In the below discussion, we describe how to +implement these for a Catalog, but the approach to implementing for a Schema is nearly +identical. + +DataFusion supports Catalogs written in either Rust or Python. If you write a Catalog in Rust, +you will need to export it as a Python library via PyO3. There is a complete example of a +catalog implemented this way in the +`examples folder `_ +of our repository. Writing catalog providers in Rust provides typically can lead to significant +performance improvements over the Python based approach. + +To implement a Catalog in Python, you will need to inherit from the abstract base class +:py:class:`~datafusion.catalog.CatalogProvider`. There are examples in the +`unit tests `_ of +implementing a basic Catalog in Python where we simply keep a dictionary of the +registered Schemas. + +One important note for developers is that when we have a Catalog defined in Python, we have +two different ways of accessing this Catalog. First, we register the catalog with a Rust +wrapper. This allows for any rust based code to call the Python functions as necessary. +Second, if the user access the Catalog via the Python API, we identify this and return back +the original Python object that implements the Catalog. This is an important distinction +for developers because we do *not* return a Python wrapper around the Rust wrapper of the +original Python object. diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock index 075ebd5a1..e5a1ca8d1 100644 --- a/examples/datafusion-ffi-example/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -1448,6 +1448,7 @@ dependencies = [ "arrow", "arrow-array", "arrow-schema", + "async-trait", "datafusion", "datafusion-ffi", "pyo3", diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml index 0e17567b9..319163554 100644 --- a/examples/datafusion-ffi-example/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -27,6 +27,7 @@ pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] arrow = { version = "55.0.0" } arrow-array = { version = "55.0.0" } arrow-schema = { version = "55.0.0" } +async-trait = "0.1.88" [build-dependencies] pyo3-build-config = "0.23" diff --git a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py new file mode 100644 index 000000000..72aadf64c --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext +from datafusion_ffi_example import MyCatalogProvider + + +def test_catalog_provider(): + ctx = SessionContext() + + my_catalog_name = "my_catalog" + expected_schema_name = "my_schema" + expected_table_name = "my_table" + expected_table_columns = ["units", "price"] + + catalog_provider = MyCatalogProvider() + ctx.register_catalog_provider(my_catalog_name, catalog_provider) + my_catalog = ctx.catalog(my_catalog_name) + + my_catalog_schemas = my_catalog.names() + assert expected_schema_name in my_catalog_schemas + my_database = my_catalog.database(expected_schema_name) + assert expected_table_name in my_database.names() + my_table = my_database.table(expected_table_name) + assert expected_table_columns == my_table.schema.names + + result = ctx.table( + f"{my_catalog_name}.{expected_schema_name}.{expected_table_name}" + ).collect() + assert len(result) == 2 + + col0_result = [r.column(0) for r in result] + col1_result = [r.column(1) for r in result] + expected_col0 = [ + pa.array([10, 20, 30], type=pa.int32()), + pa.array([5, 7], type=pa.int32()), + ] + expected_col1 = [ + pa.array([1, 2, 5], type=pa.float64()), + pa.array([1.5, 2.5], type=pa.float64()), + ] + assert col0_result == expected_col0 + assert col1_result == expected_col1 diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs new file mode 100644 index 000000000..54e61cf3e --- /dev/null +++ b/examples/datafusion-ffi-example/src/catalog_provider.rs @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::{any::Any, fmt::Debug, sync::Arc}; + +use arrow::datatypes::Schema; +use async_trait::async_trait; +use datafusion::{ + catalog::{ + CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, TableProvider, + }, + common::exec_err, + datasource::MemTable, + error::{DataFusionError, Result}, +}; +use datafusion_ffi::catalog_provider::FFI_CatalogProvider; +use pyo3::types::PyCapsule; + +pub fn my_table() -> Arc { + use arrow::datatypes::{DataType, Field}; + use datafusion::common::record_batch; + + let schema = Arc::new(Schema::new(vec![ + Field::new("units", DataType::Int32, true), + Field::new("price", DataType::Float64, true), + ])); + + let partitions = vec![ + record_batch!( + ("units", Int32, vec![10, 20, 30]), + ("price", Float64, vec![1.0, 2.0, 5.0]) + ) + .unwrap(), + record_batch!( + ("units", Int32, vec![5, 7]), + ("price", Float64, vec![1.5, 2.5]) + ) + .unwrap(), + ]; + + Arc::new(MemTable::try_new(schema, vec![partitions]).unwrap()) +} + +#[derive(Debug)] +pub struct FixedSchemaProvider { + inner: MemorySchemaProvider, +} + +impl Default for FixedSchemaProvider { + fn default() -> Self { + let inner = MemorySchemaProvider::new(); + + let table = my_table(); + + let _ = inner.register_table("my_table".to_string(), table).unwrap(); + + Self { inner } + } +} + +#[async_trait] +impl SchemaProvider for FixedSchemaProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + self.inner.table_names() + } + + async fn table(&self, name: &str) -> Result>, DataFusionError> { + self.inner.table(name).await + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> Result>> { + self.inner.register_table(name, table) + } + + fn deregister_table(&self, name: &str) -> Result>> { + self.inner.deregister_table(name) + } + + fn table_exist(&self, name: &str) -> bool { + self.inner.table_exist(name) + } +} + +/// This catalog provider is intended only for unit tests. It prepopulates with one +/// schema and only allows for schemas named after four types of fruit. +#[pyclass( + name = "MyCatalogProvider", + module = "datafusion_ffi_example", + subclass +)] +#[derive(Debug)] +pub(crate) struct MyCatalogProvider { + inner: MemoryCatalogProvider, +} + +impl Default for MyCatalogProvider { + fn default() -> Self { + let inner = MemoryCatalogProvider::new(); + + let schema_name: &str = "my_schema"; + let _ = inner.register_schema(schema_name, Arc::new(FixedSchemaProvider::default())); + + Self { inner } + } +} + +impl CatalogProvider for MyCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + self.inner.schema_names() + } + + fn schema(&self, name: &str) -> Option> { + self.inner.schema(name) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> Result>> { + self.inner.register_schema(name, schema) + } + + fn deregister_schema( + &self, + name: &str, + cascade: bool, + ) -> Result>> { + self.inner.deregister_schema(name, cascade) + } +} + +#[pymethods] +impl MyCatalogProvider { + #[new] + pub fn new() -> Self { + Self { + inner: Default::default(), + } + } + + pub fn __datafusion_catalog_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_catalog_provider".into(); + let catalog_provider = + FFI_CatalogProvider::new(Arc::new(MyCatalogProvider::default()), None); + + PyCapsule::new(py, catalog_provider, Some(name)) + } +} diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs index ae08c3b65..3a4cf2247 100644 --- a/examples/datafusion-ffi-example/src/lib.rs +++ b/examples/datafusion-ffi-example/src/lib.rs @@ -15,10 +15,12 @@ // specific language governing permissions and limitations // under the License. +use crate::catalog_provider::MyCatalogProvider; use crate::table_function::MyTableFunction; use crate::table_provider::MyTableProvider; use pyo3::prelude::*; +pub(crate) mod catalog_provider; pub(crate) mod table_function; pub(crate) mod table_provider; @@ -26,5 +28,6 @@ pub(crate) mod table_provider; fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index fd7f4fc06..e9d2dba75 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -92,6 +92,7 @@ "TableFunction", "WindowFrame", "WindowUDF", + "catalog", "col", "column", "common", diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 67ab3ead2..536b3a790 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -19,18 +19,33 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Protocol import datafusion._internal as df_internal if TYPE_CHECKING: import pyarrow as pa +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 + + +__all__ = [ + "Catalog", + "CatalogProvider", + "Schema", + "SchemaProvider", + "Table", +] + class Catalog: """DataFusion data catalog.""" - def __init__(self, catalog: df_internal.Catalog) -> None: + def __init__(self, catalog: df_internal.catalog.RawCatalog) -> None: """This constructor is not typically called by the end user.""" self.catalog = catalog @@ -38,39 +53,95 @@ def __repr__(self) -> str: """Print a string representation of the catalog.""" return self.catalog.__repr__() - def names(self) -> list[str]: - """Returns the list of databases in this catalog.""" - return self.catalog.names() + def names(self) -> set[str]: + """This is an alias for `schema_names`.""" + return self.schema_names() + + def schema_names(self) -> set[str]: + """Returns the list of schemas in this catalog.""" + return self.catalog.schema_names() + + @staticmethod + def memory_catalog() -> Catalog: + """Create an in-memory catalog provider.""" + catalog = df_internal.catalog.RawCatalog.memory_catalog() + return Catalog(catalog) - def database(self, name: str = "public") -> Database: + def schema(self, name: str = "public") -> Schema: """Returns the database with the given ``name`` from this catalog.""" - return Database(self.catalog.database(name)) + schema = self.catalog.schema(name) + + return ( + Schema(schema) + if isinstance(schema, df_internal.catalog.RawSchema) + else schema + ) + + @deprecated("Use `schema` instead.") + def database(self, name: str = "public") -> Schema: + """Returns the database with the given ``name`` from this catalog.""" + return self.schema(name) + + def register_schema(self, name, schema) -> Schema | None: + """Register a schema with this catalog.""" + if isinstance(schema, Schema): + return self.catalog.register_schema(name, schema._raw_schema) + return self.catalog.register_schema(name, schema) + + def deregister_schema(self, name: str, cascade: bool = True) -> Schema | None: + """Deregister a schema from this catalog.""" + return self.catalog.deregister_schema(name, cascade) -class Database: - """DataFusion Database.""" +class Schema: + """DataFusion Schema.""" - def __init__(self, db: df_internal.Database) -> None: + def __init__(self, schema: df_internal.catalog.RawSchema) -> None: """This constructor is not typically called by the end user.""" - self.db = db + self._raw_schema = schema def __repr__(self) -> str: - """Print a string representation of the database.""" - return self.db.__repr__() + """Print a string representation of the schema.""" + return self._raw_schema.__repr__() + + @staticmethod + def memory_schema() -> Schema: + """Create an in-memory schema provider.""" + schema = df_internal.catalog.RawSchema.memory_schema() + return Schema(schema) def names(self) -> set[str]: - """Returns the list of all tables in this database.""" - return self.db.names() + """This is an alias for `table_names`.""" + return self.table_names() + + def table_names(self) -> set[str]: + """Returns the list of all tables in this schema.""" + return self._raw_schema.table_names def table(self, name: str) -> Table: - """Return the table with the given ``name`` from this database.""" - return Table(self.db.table(name)) + """Return the table with the given ``name`` from this schema.""" + return Table(self._raw_schema.table(name)) + + def register_table(self, name, table) -> None: + """Register a table provider in this schema.""" + if isinstance(table, Table): + return self._raw_schema.register_table(name, table.table) + return self._raw_schema.register_table(name, table) + + def deregister_table(self, name: str) -> None: + """Deregister a table provider from this schema.""" + return self._raw_schema.deregister_table(name) + + +@deprecated("Use `Schema` instead.") +class Database(Schema): + """See `Schema`.""" class Table: """DataFusion table.""" - def __init__(self, table: df_internal.Table) -> None: + def __init__(self, table: df_internal.catalog.RawTable) -> None: """This constructor is not typically called by the end user.""" self.table = table @@ -78,6 +149,11 @@ def __repr__(self) -> str: """Print a string representation of the table.""" return self.table.__repr__() + @staticmethod + def from_dataset(dataset: pa.dataset.Dataset) -> Table: + """Turn a pyarrow Dataset into a Table.""" + return Table(df_internal.catalog.RawTable.from_dataset(dataset)) + @property def schema(self) -> pa.Schema: """Returns the schema associated with this table.""" @@ -87,3 +163,86 @@ def schema(self) -> pa.Schema: def kind(self) -> str: """Returns the kind of table.""" return self.table.kind + + +class CatalogProvider(ABC): + """Abstract class for defining a Python based Catalog Provider.""" + + @abstractmethod + def schema_names(self) -> set[str]: + """Set of the names of all schemas in this catalog.""" + ... + + @abstractmethod + def schema(self, name: str) -> Schema | None: + """Retrieve a specific schema from this catalog.""" + ... + + def register_schema( # noqa: B027 + self, name: str, schema: SchemaProviderExportable | SchemaProvider | Schema + ) -> None: + """Add a schema to this catalog. + + This method is optional. If your catalog provides a fixed list of schemas, you + do not need to implement this method. + """ + + def deregister_schema(self, name: str, cascade: bool) -> None: # noqa: B027 + """Remove a schema from this catalog. + + This method is optional. If your catalog provides a fixed list of schemas, you + do not need to implement this method. + + Args: + name: The name of the schema to remove. + cascade: If true, deregister the tables within the schema. + """ + + +class SchemaProvider(ABC): + """Abstract class for defining a Python based Schema Provider.""" + + def owner_name(self) -> str | None: + """Returns the owner of the schema. + + This is an optional method. The default return is None. + """ + return None + + @abstractmethod + def table_names(self) -> set[str]: + """Set of the names of all tables in this schema.""" + ... + + @abstractmethod + def table(self, name: str) -> Table | None: + """Retrieve a specific table from this schema.""" + ... + + def register_table(self, name: str, table: Table) -> None: # noqa: B027 + """Add a table from this schema. + + This method is optional. If your schema provides a fixed list of tables, you do + not need to implement this method. + """ + + def deregister_table(self, name, cascade: bool) -> None: # noqa: B027 + """Remove a table from this schema. + + This method is optional. If your schema provides a fixed list of tables, you do + not need to implement this method. + """ + + @abstractmethod + def table_exist(self, name: str) -> bool: + """Returns true if the table exists in this schema.""" + ... + + +class SchemaProviderExportable(Protocol): + """Type hint for object that has __datafusion_schema_provider__ PyCapsule. + + https://docs.rs/datafusion/latest/datafusion/catalog/trait.SchemaProvider.html + """ + + def __datafusion_schema_provider__(self) -> object: ... diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 5b99b0d26..bce51d644 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -29,7 +29,7 @@ except ImportError: from typing_extensions import deprecated # Python 3.12 -from datafusion.catalog import Catalog, Table +from datafusion.catalog import Catalog, CatalogProvider, Table from datafusion.dataframe import DataFrame from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream @@ -80,6 +80,15 @@ class TableProviderExportable(Protocol): def __datafusion_table_provider__(self) -> object: ... # noqa: D105 +class CatalogProviderExportable(Protocol): + """Type hint for object that has __datafusion_catalog_provider__ PyCapsule. + + https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProvider.html + """ + + def __datafusion_catalog_provider__(self) -> object: ... # noqa: D105 + + class SessionConfig: """Session configuration options.""" @@ -749,6 +758,19 @@ def deregister_table(self, name: str) -> None: """Remove a table from the session.""" self.ctx.deregister_table(name) + def catalog_names(self) -> set[str]: + """Returns the list of catalogs in this context.""" + return self.ctx.catalog_names() + + def register_catalog_provider( + self, name: str, provider: CatalogProviderExportable | CatalogProvider | Catalog + ) -> None: + """Register a catalog provider.""" + if isinstance(provider, Catalog): + self.ctx.register_catalog_provider(name, provider.catalog) + else: + self.ctx.register_catalog_provider(name, provider) + def register_table_provider( self, name: str, provider: TableProviderExportable ) -> None: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 991e6875a..61cb09438 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -760,19 +760,16 @@ def join_on( exprs = [expr.expr for expr in on_exprs] return DataFrame(self.df.join_on(right.df, exprs, how)) - def explain(self, verbose: bool = False, analyze: bool = False) -> DataFrame: - """Return a DataFrame with the explanation of its plan so far. + def explain(self, verbose: bool = False, analyze: bool = False) -> None: + """Print an explanation of the DataFrame's plan so far. If ``analyze`` is specified, runs the plan and reports metrics. Args: verbose: If ``True``, more details will be included. analyze: If ``Tru`e``, the plan will run and metrics reported. - - Returns: - DataFrame with the explanation of its plan. """ - return DataFrame(self.df.explain(verbose, analyze)) + self.df.explain(verbose, analyze) def logical_plan(self) -> LogicalPlan: """Return the unoptimized ``LogicalPlan``. diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py index 23b328458..1f9ecbfc3 100644 --- a/python/tests/test_catalog.py +++ b/python/tests/test_catalog.py @@ -14,9 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations +import datafusion as dfn import pyarrow as pa +import pyarrow.dataset as ds import pytest +from datafusion import SessionContext, Table # Note we take in `database` as a variable even though we don't use @@ -27,9 +31,9 @@ def test_basic(ctx, database): ctx.catalog("non-existent") default = ctx.catalog() - assert default.names() == ["public"] + assert default.names() == {"public"} - for db in [default.database("public"), default.database()]: + for db in [default.schema("public"), default.schema()]: assert db.names() == {"csv1", "csv", "csv2"} table = db.table("csv") @@ -41,3 +45,168 @@ def test_basic(ctx, database): pa.field("float", pa.float64(), nullable=True), ] ) + + +def create_dataset() -> Table: + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + dataset = ds.dataset([batch]) + return Table.from_dataset(dataset) + + +class CustomSchemaProvider(dfn.catalog.SchemaProvider): + def __init__(self): + self.tables = {"table1": create_dataset()} + + def table_names(self) -> set[str]: + return set(self.tables.keys()) + + def register_table(self, name: str, table: Table): + self.tables[name] = table + + def deregister_table(self, name, cascade: bool = True): + del self.tables[name] + + def table(self, name: str) -> Table | None: + return self.tables[name] + + def table_exist(self, name: str) -> bool: + return name in self.tables + + +class CustomCatalogProvider(dfn.catalog.CatalogProvider): + def __init__(self): + self.schemas = {"my_schema": CustomSchemaProvider()} + + def schema_names(self) -> set[str]: + return set(self.schemas.keys()) + + def schema(self, name: str): + return self.schemas[name] + + def register_schema(self, name: str, schema: dfn.catalog.Schema): + self.schemas[name] = schema + + def deregister_schema(self, name, cascade: bool): + del self.schemas[name] + + +def test_python_catalog_provider(ctx: SessionContext): + ctx.register_catalog_provider("my_catalog", CustomCatalogProvider()) + + # Check the default catalog provider + assert ctx.catalog("datafusion").names() == {"public"} + + my_catalog = ctx.catalog("my_catalog") + assert my_catalog.names() == {"my_schema"} + + my_catalog.register_schema("second_schema", CustomSchemaProvider()) + assert my_catalog.schema_names() == {"my_schema", "second_schema"} + + my_catalog.deregister_schema("my_schema") + assert my_catalog.schema_names() == {"second_schema"} + + +def test_in_memory_providers(ctx: SessionContext): + catalog = dfn.catalog.Catalog.memory_catalog() + ctx.register_catalog_provider("in_mem_catalog", catalog) + + assert ctx.catalog_names() == {"datafusion", "in_mem_catalog"} + + schema = dfn.catalog.Schema.memory_schema() + catalog.register_schema("in_mem_schema", schema) + + schema.register_table("my_table", create_dataset()) + + batches = ctx.sql("select * from in_mem_catalog.in_mem_schema.my_table").collect() + + assert len(batches) == 1 + assert batches[0].column(0) == pa.array([1, 2, 3]) + assert batches[0].column(1) == pa.array([4, 5, 6]) + + +def test_python_schema_provider(ctx: SessionContext): + catalog = ctx.catalog() + + catalog.deregister_schema("public") + + catalog.register_schema("test_schema1", CustomSchemaProvider()) + assert catalog.names() == {"test_schema1"} + + catalog.register_schema("test_schema2", CustomSchemaProvider()) + catalog.deregister_schema("test_schema1") + assert catalog.names() == {"test_schema2"} + + +def test_python_table_provider(ctx: SessionContext): + catalog = ctx.catalog() + + catalog.register_schema("custom_schema", CustomSchemaProvider()) + schema = catalog.schema("custom_schema") + + assert schema.table_names() == {"table1"} + + schema.deregister_table("table1") + schema.register_table("table2", create_dataset()) + assert schema.table_names() == {"table2"} + + # Use the default schema instead of our custom schema + + schema = catalog.schema() + + schema.register_table("table3", create_dataset()) + assert schema.table_names() == {"table3"} + + schema.deregister_table("table3") + schema.register_table("table4", create_dataset()) + assert schema.table_names() == {"table4"} + + +def test_in_end_to_end_python_providers(ctx: SessionContext): + """Test registering all python providers and running a query against them.""" + + all_catalog_names = [ + "datafusion", + "custom_catalog", + "in_mem_catalog", + ] + + all_schema_names = [ + "custom_schema", + "in_mem_schema", + ] + + ctx.register_catalog_provider(all_catalog_names[1], CustomCatalogProvider()) + ctx.register_catalog_provider( + all_catalog_names[2], dfn.catalog.Catalog.memory_catalog() + ) + + for catalog_name in all_catalog_names: + catalog = ctx.catalog(catalog_name) + + # Clean out previous schemas if they exist so we can start clean + for schema_name in catalog.schema_names(): + catalog.deregister_schema(schema_name, cascade=False) + + catalog.register_schema(all_schema_names[0], CustomSchemaProvider()) + catalog.register_schema(all_schema_names[1], dfn.catalog.Schema.memory_schema()) + + for schema_name in all_schema_names: + schema = catalog.schema(schema_name) + + for table_name in schema.table_names(): + schema.deregister_table(table_name) + + schema.register_table("test_table", create_dataset()) + + for catalog_name in all_catalog_names: + for schema_name in all_schema_names: + table_full_name = f"{catalog_name}.{schema_name}.test_table" + + batches = ctx.sql(f"select * from {table_full_name}").collect() + + assert len(batches) == 1 + assert batches[0].column(0) == pa.array([1, 2, 3]) + assert batches[0].column(1) == pa.array([4, 5, 6]) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 4a15ac9cf..6dbcc0d5e 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -57,7 +57,7 @@ def test_runtime_configs(tmp_path, path_to_str): ctx = SessionContext(config, runtime) assert ctx is not None - db = ctx.catalog("foo").database("bar") + db = ctx.catalog("foo").schema("bar") assert db is not None @@ -70,7 +70,7 @@ def test_temporary_files(tmp_path, path_to_str): ctx = SessionContext(config, runtime) assert ctx is not None - db = ctx.catalog("foo").database("bar") + db = ctx.catalog("foo").schema("bar") assert db is not None @@ -91,7 +91,7 @@ def test_create_context_with_all_valid_args(): ctx = SessionContext(config, runtime) # verify that at least some of the arguments worked - ctx.catalog("foo").database("bar") + ctx.catalog("foo").schema("bar") with pytest.raises(KeyError): ctx.catalog("datafusion") @@ -105,7 +105,7 @@ def test_register_record_batches(ctx): ctx.register_record_batches("t", [[batch]]) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} result = ctx.sql("SELECT a+b, a-b FROM t").collect() @@ -121,7 +121,7 @@ def test_create_dataframe_registers_unique_table_name(ctx): ) df = ctx.create_dataframe([[batch]]) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -141,7 +141,7 @@ def test_create_dataframe_registers_with_defined_table_name(ctx): ) df = ctx.create_dataframe([[batch]], name="tbl") - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -155,7 +155,7 @@ def test_from_arrow_table(ctx): # convert to DataFrame df = ctx.from_arrow(table) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -200,7 +200,7 @@ def test_from_arrow_table_with_name(ctx): # convert to DataFrame with optional name df = ctx.from_arrow(table, name="tbl") - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert tables[0] == "tbl" @@ -213,7 +213,7 @@ def test_from_arrow_table_empty(ctx): # convert to DataFrame df = ctx.from_arrow(table) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -228,7 +228,7 @@ def test_from_arrow_table_empty_no_schema(ctx): # convert to DataFrame df = ctx.from_arrow(table) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -246,7 +246,7 @@ def test_from_pylist(ctx): ] df = ctx.from_pylist(data) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -260,7 +260,7 @@ def test_from_pydict(ctx): data = {"a": [1, 2, 3], "b": [4, 5, 6]} df = ctx.from_pydict(data) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -276,7 +276,7 @@ def test_from_pandas(ctx): pandas_df = pd.DataFrame(data) df = ctx.from_pandas(pandas_df) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -292,7 +292,7 @@ def test_from_polars(ctx): polars_df = pd.DataFrame(data) df = ctx.from_polars(polars_df) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -303,7 +303,7 @@ def test_from_polars(ctx): def test_register_table(ctx, database): default = ctx.catalog() - public = default.database("public") + public = default.schema("public") assert public.names() == {"csv", "csv1", "csv2"} table = public.table("csv") @@ -313,7 +313,7 @@ def test_register_table(ctx, database): def test_read_table(ctx, database): default = ctx.catalog() - public = default.database("public") + public = default.schema("public") assert public.names() == {"csv", "csv1", "csv2"} table = public.table("csv") @@ -323,7 +323,7 @@ def test_read_table(ctx, database): def test_deregister_table(ctx, database): default = ctx.catalog() - public = default.database("public") + public = default.schema("public") assert public.names() == {"csv", "csv1", "csv2"} ctx.deregister_table("csv") @@ -339,7 +339,7 @@ def test_register_dataset(ctx): dataset = ds.dataset([batch]) ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} result = ctx.sql("SELECT a+b, a-b FROM t").collect() @@ -356,7 +356,7 @@ def test_dataset_filter(ctx, capfd): dataset = ds.dataset([batch]) ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} df = ctx.sql("SELECT a+b, a-b FROM t WHERE a BETWEEN 2 and 3 AND b > 5") # Make sure the filter was pushed down in Physical Plan @@ -455,7 +455,7 @@ def test_dataset_filter_nested_data(ctx): dataset = ds.dataset([batch]) ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} df = ctx.table("t") diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 41cee4ef3..c383edc60 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -75,7 +75,7 @@ def test_register_csv(ctx, tmp_path): ) ctx.register_csv("csv3", path, schema=alternative_schema) - assert ctx.catalog().database().names() == { + assert ctx.catalog().schema().names() == { "csv", "csv1", "csv2", @@ -150,7 +150,7 @@ def test_register_parquet(ctx, tmp_path): path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) ctx.register_parquet("t", path) ctx.register_parquet("t1", str(path)) - assert ctx.catalog().database().names() == {"t", "t1"} + assert ctx.catalog().schema().names() == {"t", "t1"} result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() result = pa.Table.from_batches(result) @@ -188,7 +188,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty parquet_pruning=True, file_extension=".parquet", ) - assert ctx.catalog().database().names() == {"datapp"} + assert ctx.catalog().schema().names() == {"datapp"} result = ctx.sql("SELECT grp, COUNT(*) AS cnt FROM datapp GROUP BY grp").collect() result = pa.Table.from_batches(result) @@ -204,7 +204,7 @@ def test_register_dataset(ctx, tmp_path, path_to_str): dataset = ds.dataset(path, format="parquet") ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() result = pa.Table.from_batches(result) @@ -251,7 +251,7 @@ def test_register_json(ctx, tmp_path): ) ctx.register_json("json3", path, schema=alternative_schema) - assert ctx.catalog().database().names() == { + assert ctx.catalog().schema().names() == { "json", "json1", "json2", @@ -308,7 +308,7 @@ def test_execute(ctx, tmp_path): path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) ctx.register_parquet("t", path) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} # count result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a IS NOT NULL").collect() @@ -451,18 +451,10 @@ def test_udf( id="datetime_ns", ), # Not writtable to parquet - pytest.param( - helpers.data_timedelta("s"), id="timedelta_s", marks=pytest.mark.xfail - ), - pytest.param( - helpers.data_timedelta("ms"), id="timedelta_ms", marks=pytest.mark.xfail - ), - pytest.param( - helpers.data_timedelta("us"), id="timedelta_us", marks=pytest.mark.xfail - ), - pytest.param( - helpers.data_timedelta("ns"), id="timedelta_ns", marks=pytest.mark.xfail - ), + pytest.param(helpers.data_timedelta("s"), id="timedelta_s"), + pytest.param(helpers.data_timedelta("ms"), id="timedelta_ms"), + pytest.param(helpers.data_timedelta("us"), id="timedelta_us"), + pytest.param(helpers.data_timedelta("ns"), id="timedelta_ns"), ], ) def test_simple_select(ctx, tmp_path, arr): @@ -524,7 +516,7 @@ def test_register_listing_table( schema=table.schema if pass_schema else None, file_sort_order=file_sort_order, ) - assert ctx.catalog().database().names() == {"my_table"} + assert ctx.catalog().schema().names() == {"my_table"} result = ctx.sql( "SELECT grp, COUNT(*) AS count FROM my_table GROUP BY grp" diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index f367a447d..43aa327d4 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -34,7 +34,7 @@ def test_substrait_serialization(ctx): ctx.register_record_batches("t", [[batch]]) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} # For now just make sure the method calls blow up substrait_plan = ss.Serde.serialize_to_plan("SELECT * FROM t", ctx) @@ -59,7 +59,7 @@ def test_substrait_file_serialization(ctx, tmp_path, path_to_str): ctx.register_record_batches("t", [[batch]]) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} path = tmp_path / "substrait_plan" path = str(path) if path_to_str else path diff --git a/src/catalog.rs b/src/catalog.rs index 83f8d08cb..17d4ec3b8 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -15,44 +15,54 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashSet; -use std::sync::Arc; - -use pyo3::exceptions::PyKeyError; -use pyo3::prelude::*; - -use crate::errors::{PyDataFusionError, PyDataFusionResult}; -use crate::utils::wait_for_future; +use crate::dataset::Dataset; +use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError, PyDataFusionResult}; +use crate::utils::{validate_pycapsule, wait_for_future}; +use async_trait::async_trait; +use datafusion::catalog::{MemoryCatalogProvider, MemorySchemaProvider}; +use datafusion::common::DataFusionError; use datafusion::{ arrow::pyarrow::ToPyArrow, catalog::{CatalogProvider, SchemaProvider}, datasource::{TableProvider, TableType}, }; +use datafusion_ffi::schema_provider::{FFI_SchemaProvider, ForeignSchemaProvider}; +use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; +use pyo3::exceptions::PyKeyError; +use pyo3::prelude::*; +use pyo3::types::PyCapsule; +use pyo3::IntoPyObjectExt; +use std::any::Any; +use std::collections::HashSet; +use std::sync::Arc; -#[pyclass(name = "Catalog", module = "datafusion", subclass)] +#[pyclass(name = "RawCatalog", module = "datafusion.catalog", subclass)] +#[derive(Clone)] pub struct PyCatalog { pub catalog: Arc, } -#[pyclass(name = "Database", module = "datafusion", subclass)] -pub struct PyDatabase { - pub database: Arc, +#[pyclass(name = "RawSchema", module = "datafusion.catalog", subclass)] +#[derive(Clone)] +pub struct PySchema { + pub schema: Arc, } -#[pyclass(name = "Table", module = "datafusion", subclass)] +#[pyclass(name = "RawTable", module = "datafusion.catalog", subclass)] +#[derive(Clone)] pub struct PyTable { pub table: Arc, } -impl PyCatalog { - pub fn new(catalog: Arc) -> Self { +impl From> for PyCatalog { + fn from(catalog: Arc) -> Self { Self { catalog } } } -impl PyDatabase { - pub fn new(database: Arc) -> Self { - Self { database } +impl From> for PySchema { + fn from(schema: Arc) -> Self { + Self { schema } } } @@ -68,36 +78,109 @@ impl PyTable { #[pymethods] impl PyCatalog { - fn names(&self) -> Vec { - self.catalog.schema_names() + #[new] + fn new(catalog: PyObject) -> Self { + let catalog_provider = + Arc::new(RustWrappedPyCatalogProvider::new(catalog)) as Arc; + catalog_provider.into() + } + + #[staticmethod] + fn memory_catalog() -> Self { + let catalog_provider = + Arc::new(MemoryCatalogProvider::default()) as Arc; + catalog_provider.into() + } + + fn schema_names(&self) -> HashSet { + self.catalog.schema_names().into_iter().collect() } #[pyo3(signature = (name="public"))] - fn database(&self, name: &str) -> PyResult { - match self.catalog.schema(name) { - Some(database) => Ok(PyDatabase::new(database)), - None => Err(PyKeyError::new_err(format!( - "Database with name {name} doesn't exist." - ))), - } + fn schema(&self, name: &str) -> PyResult { + let schema = self + .catalog + .schema(name) + .ok_or(PyKeyError::new_err(format!( + "Schema with name {name} doesn't exist." + )))?; + + Python::with_gil(|py| { + match schema + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => Ok(wrapped_schema.schema_provider.clone_ref(py)), + None => PySchema::from(schema).into_py_any(py), + } + }) + } + + fn register_schema(&self, name: &str, schema_provider: Bound<'_, PyAny>) -> PyResult<()> { + let provider = if schema_provider.hasattr("__datafusion_schema_provider__")? { + let capsule = schema_provider + .getattr("__datafusion_schema_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_schema_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignSchemaProvider = provider.into(); + Arc::new(provider) as Arc + } else { + match schema_provider.extract::() { + Ok(py_schema) => py_schema.schema, + Err(_) => Arc::new(RustWrappedPySchemaProvider::new(schema_provider.into())) + as Arc, + } + }; + + let _ = self + .catalog + .register_schema(name, provider) + .map_err(py_datafusion_err)?; + + Ok(()) + } + + fn deregister_schema(&self, name: &str, cascade: bool) -> PyResult<()> { + let _ = self + .catalog + .deregister_schema(name, cascade) + .map_err(py_datafusion_err)?; + + Ok(()) } fn __repr__(&self) -> PyResult { - Ok(format!( - "Catalog(schema_names=[{}])", - self.names().join(";") - )) + let mut names: Vec = self.schema_names().into_iter().collect(); + names.sort(); + Ok(format!("Catalog(schema_names=[{}])", names.join(", "))) } } #[pymethods] -impl PyDatabase { - fn names(&self) -> HashSet { - self.database.table_names().into_iter().collect() +impl PySchema { + #[new] + fn new(schema_provider: PyObject) -> Self { + let schema_provider = + Arc::new(RustWrappedPySchemaProvider::new(schema_provider)) as Arc; + schema_provider.into() + } + + #[staticmethod] + fn memory_schema() -> Self { + let schema_provider = Arc::new(MemorySchemaProvider::default()) as Arc; + schema_provider.into() + } + + #[getter] + fn table_names(&self) -> HashSet { + self.schema.table_names().into_iter().collect() } fn table(&self, name: &str, py: Python) -> PyDataFusionResult { - if let Some(table) = wait_for_future(py, self.database.table(name))?? { + if let Some(table) = wait_for_future(py, self.schema.table(name))?? { Ok(PyTable::new(table)) } else { Err(PyDataFusionError::Common(format!( @@ -107,14 +190,49 @@ impl PyDatabase { } fn __repr__(&self) -> PyResult { - Ok(format!( - "Database(table_names=[{}])", - Vec::from_iter(self.names()).join(";") - )) + let mut names: Vec = self.table_names().into_iter().collect(); + names.sort(); + Ok(format!("Schema(table_names=[{}])", names.join(";"))) } - // register_table - // deregister_table + fn register_table(&self, name: &str, table_provider: Bound<'_, PyAny>) -> PyResult<()> { + let provider = if table_provider.hasattr("__datafusion_table_provider__")? { + let capsule = table_provider + .getattr("__datafusion_table_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_table_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignTableProvider = provider.into(); + Arc::new(provider) as Arc + } else { + match table_provider.extract::() { + Ok(py_table) => py_table.table, + Err(_) => { + let py = table_provider.py(); + let provider = Dataset::new(&table_provider, py)?; + Arc::new(provider) as Arc + } + } + }; + + let _ = self + .schema + .register_table(name.to_string(), provider) + .map_err(py_datafusion_err)?; + + Ok(()) + } + + fn deregister_table(&self, name: &str) -> PyResult<()> { + let _ = self + .schema + .deregister_table(name) + .map_err(py_datafusion_err)?; + + Ok(()) + } } #[pymethods] @@ -125,6 +243,14 @@ impl PyTable { self.table.schema().to_pyarrow(py) } + #[staticmethod] + fn from_dataset(py: Python<'_>, dataset: &Bound<'_, PyAny>) -> PyResult { + let ds = Arc::new(Dataset::new(dataset, py).map_err(py_datafusion_err)?) + as Arc; + + Ok(Self::new(ds)) + } + /// Get the type of this table for metadata/catalog purposes. #[getter] fn kind(&self) -> &str { @@ -145,3 +271,285 @@ impl PyTable { // fn has_exact_statistics // fn supports_filter_pushdown } + +#[derive(Debug)] +pub(crate) struct RustWrappedPySchemaProvider { + schema_provider: PyObject, + owner_name: Option, +} + +impl RustWrappedPySchemaProvider { + pub fn new(schema_provider: PyObject) -> Self { + let owner_name = Python::with_gil(|py| { + schema_provider + .bind(py) + .getattr("owner_name") + .ok() + .map(|name| name.to_string()) + }); + + Self { + schema_provider, + owner_name, + } + } + + fn table_inner(&self, name: &str) -> PyResult>> { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + let py_table_method = provider.getattr("table")?; + + let py_table = py_table_method.call((name,), None)?; + if py_table.is_none() { + return Ok(None); + } + + if py_table.hasattr("__datafusion_table_provider__")? { + let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_table_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignTableProvider = provider.into(); + + Ok(Some(Arc::new(provider) as Arc)) + } else { + if let Ok(inner_table) = py_table.getattr("table") { + if let Ok(inner_table) = inner_table.extract::() { + return Ok(Some(inner_table.table)); + } + } + + match py_table.extract::() { + Ok(py_table) => Ok(Some(py_table.table)), + Err(_) => { + let ds = Dataset::new(&py_table, py).map_err(py_datafusion_err)?; + Ok(Some(Arc::new(ds) as Arc)) + } + } + } + }) + } +} + +#[async_trait] +impl SchemaProvider for RustWrappedPySchemaProvider { + fn owner_name(&self) -> Option<&str> { + self.owner_name.as_deref() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + + provider + .getattr("table_names") + .and_then(|names| names.extract::>()) + .unwrap_or_else(|err| { + log::error!("Unable to get table_names: {err}"); + Vec::default() + }) + }) + } + + async fn table( + &self, + name: &str, + ) -> datafusion::common::Result>, DataFusionError> { + self.table_inner(name).map_err(to_datafusion_err) + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> datafusion::common::Result>> { + let py_table = PyTable::new(table); + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + let _ = provider + .call_method1("register_table", (name, py_table)) + .map_err(to_datafusion_err)?; + // Since the definition of `register_table` says that an error + // will be returned if the table already exists, there is no + // case where we want to return a table provider as output. + Ok(None) + }) + } + + fn deregister_table( + &self, + name: &str, + ) -> datafusion::common::Result>> { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + let table = provider + .call_method1("deregister_table", (name,)) + .map_err(to_datafusion_err)?; + if table.is_none() { + return Ok(None); + } + + // If we can turn this table provider into a `Dataset`, return it. + // Otherwise, return None. + let dataset = match Dataset::new(&table, py) { + Ok(dataset) => Some(Arc::new(dataset) as Arc), + Err(_) => None, + }; + + Ok(dataset) + }) + } + + fn table_exist(&self, name: &str) -> bool { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + provider + .call_method1("table_exist", (name,)) + .and_then(|pyobj| pyobj.extract()) + .unwrap_or(false) + }) + } +} + +#[derive(Debug)] +pub(crate) struct RustWrappedPyCatalogProvider { + pub(crate) catalog_provider: PyObject, +} + +impl RustWrappedPyCatalogProvider { + pub fn new(catalog_provider: PyObject) -> Self { + Self { catalog_provider } + } + + fn schema_inner(&self, name: &str) -> PyResult>> { + Python::with_gil(|py| { + let provider = self.catalog_provider.bind(py); + + let py_schema = provider.call_method1("schema", (name,))?; + if py_schema.is_none() { + return Ok(None); + } + + if py_schema.hasattr("__datafusion_schema_provider__")? { + let capsule = provider + .getattr("__datafusion_schema_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_schema_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignSchemaProvider = provider.into(); + + Ok(Some(Arc::new(provider) as Arc)) + } else { + if let Ok(inner_schema) = py_schema.getattr("schema") { + if let Ok(inner_schema) = inner_schema.extract::() { + return Ok(Some(inner_schema.schema)); + } + } + match py_schema.extract::() { + Ok(inner_schema) => Ok(Some(inner_schema.schema)), + Err(_) => { + let py_schema = RustWrappedPySchemaProvider::new(py_schema.into()); + + Ok(Some(Arc::new(py_schema) as Arc)) + } + } + } + }) + } +} + +#[async_trait] +impl CatalogProvider for RustWrappedPyCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + Python::with_gil(|py| { + let provider = self.catalog_provider.bind(py); + provider + .getattr("schema_names") + .and_then(|names| names.extract::>()) + .unwrap_or_else(|err| { + log::error!("Unable to get schema_names: {err}"); + Vec::default() + }) + }) + } + + fn schema(&self, name: &str) -> Option> { + self.schema_inner(name).unwrap_or_else(|err| { + log::error!("CatalogProvider schema returned error: {err}"); + None + }) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> datafusion::common::Result>> { + // JRIGHT HERE + // let py_schema: PySchema = schema.into(); + Python::with_gil(|py| { + let py_schema = match schema + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => wrapped_schema.schema_provider.as_any(), + None => &PySchema::from(schema) + .into_py_any(py) + .map_err(to_datafusion_err)?, + }; + + let provider = self.catalog_provider.bind(py); + let schema = provider + .call_method1("register_schema", (name, py_schema)) + .map_err(to_datafusion_err)?; + if schema.is_none() { + return Ok(None); + } + + let schema = Arc::new(RustWrappedPySchemaProvider::new(schema.into())) + as Arc; + + Ok(Some(schema)) + }) + } + + fn deregister_schema( + &self, + name: &str, + cascade: bool, + ) -> datafusion::common::Result>> { + Python::with_gil(|py| { + let provider = self.catalog_provider.bind(py); + let schema = provider + .call_method1("deregister_schema", (name, cascade)) + .map_err(to_datafusion_err)?; + if schema.is_none() { + return Ok(None); + } + + let schema = Arc::new(RustWrappedPySchemaProvider::new(schema.into())) + as Arc; + + Ok(Some(schema)) + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/src/common/data_type.rs b/src/common/data_type.rs index f5f8a6b06..5cf9d6e9f 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -172,7 +172,7 @@ impl DataTypeMap { SqlType::DATE, )), DataType::Duration(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Interval(interval_unit) => Ok(DataTypeMap::new( DataType::Interval(*interval_unit), @@ -189,7 +189,7 @@ impl DataTypeMap { SqlType::BINARY, )), DataType::FixedSizeBinary(_) => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", arrow_type)), + DataFusionError::NotImplemented(format!("{arrow_type:?}")), )), DataType::LargeBinary => Ok(DataTypeMap::new( DataType::LargeBinary, @@ -207,23 +207,22 @@ impl DataTypeMap { SqlType::VARCHAR, )), DataType::List(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - arrow_type + "{arrow_type:?}" )))), DataType::FixedSizeList(_, _) => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", arrow_type)), + DataFusionError::NotImplemented(format!("{arrow_type:?}")), )), DataType::LargeList(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Struct(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Union(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Dictionary(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Decimal128(precision, scale) => Ok(DataTypeMap::new( DataType::Decimal128(*precision, *scale), @@ -236,23 +235,22 @@ impl DataTypeMap { SqlType::DECIMAL, )), DataType::Map(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::RunEndEncoded(_, _) => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", arrow_type)), + DataFusionError::NotImplemented(format!("{arrow_type:?}")), )), DataType::BinaryView => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Utf8View => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - arrow_type + "{arrow_type:?}" )))), DataType::ListView(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::LargeListView(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), } } @@ -379,8 +377,7 @@ impl DataTypeMap { "double" => Ok(DataType::Float64), "byte_array" => Ok(DataType::Utf8), _ => Err(PyValueError::new_err(format!( - "Unable to determine Arrow Data Type from Parquet String type: {:?}", - parquet_str_type + "Unable to determine Arrow Data Type from Parquet String type: {parquet_str_type:?}" ))), }; DataTypeMap::map_from_arrow_type(&arrow_dtype?) @@ -404,12 +401,10 @@ impl DataTypeMap { pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult { match sql_type { SqlType::ANY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::ARRAY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::BIGINT => Ok(DataTypeMap::new( DataType::Int64, @@ -432,11 +427,10 @@ impl DataTypeMap { SqlType::CHAR, )), SqlType::COLUMN_LIST => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::CURSOR => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::DATE => Ok(DataTypeMap::new( DataType::Date64, @@ -449,8 +443,7 @@ impl DataTypeMap { SqlType::DECIMAL, )), SqlType::DISTINCT => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::DOUBLE => Ok(DataTypeMap::new( DataType::Decimal256(1, 1), @@ -458,7 +451,7 @@ impl DataTypeMap { SqlType::DOUBLE, )), SqlType::DYNAMIC_STAR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::FLOAT => Ok(DataTypeMap::new( DataType::Decimal128(1, 1), @@ -466,8 +459,7 @@ impl DataTypeMap { SqlType::FLOAT, )), SqlType::GEOMETRY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::INTEGER => Ok(DataTypeMap::new( DataType::Int8, @@ -475,55 +467,52 @@ impl DataTypeMap { SqlType::INTEGER, )), SqlType::INTERVAL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::INTERVAL_DAY => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_DAY_HOUR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_DAY_MINUTE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_DAY_SECOND => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_HOUR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_HOUR_MINUTE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_HOUR_SECOND => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_MINUTE => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_MINUTE_SECOND => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_MONTH => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_SECOND => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_YEAR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_YEAR_MONTH => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::MAP => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::MULTISET => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::NULL => Ok(DataTypeMap::new( DataType::Null, @@ -531,20 +520,16 @@ impl DataTypeMap { SqlType::NULL, )), SqlType::OTHER => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::REAL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::ROW => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::SARG => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::SMALLINT => Ok(DataTypeMap::new( DataType::Int16, @@ -552,25 +537,22 @@ impl DataTypeMap { SqlType::SMALLINT, )), SqlType::STRUCTURED => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::SYMBOL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::TIME => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::TIME_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::TIMESTAMP => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::TINYINT => Ok(DataTypeMap::new( DataType::Int8, @@ -578,8 +560,7 @@ impl DataTypeMap { SqlType::TINYINT, )), SqlType::UNKNOWN => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::VARBINARY => Ok(DataTypeMap::new( DataType::LargeBinary, @@ -682,8 +663,7 @@ impl PyDataType { "datetime64" => Ok(DataType::Date64), "object" => Ok(DataType::Utf8), _ => Err(PyValueError::new_err(format!( - "Unable to determine Arrow Data Type from Arrow String type: {:?}", - arrow_str_type + "Unable to determine Arrow Data Type from Arrow String type: {arrow_str_type:?}" ))), }; Ok(PyDataType { diff --git a/src/context.rs b/src/context.rs index 6ce1f12bc..36133a33d 100644 --- a/src/context.rs +++ b/src/context.rs @@ -31,7 +31,7 @@ use uuid::Uuid; use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; -use crate::catalog::{PyCatalog, PyTable}; +use crate::catalog::{PyCatalog, PyTable, RustWrappedPyCatalogProvider}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; @@ -49,6 +49,7 @@ use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_f use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; +use datafusion::catalog::CatalogProvider; use datafusion::common::TableReference; use datafusion::common::{exec_err, ScalarValue}; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; @@ -69,8 +70,10 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; +use datafusion_ffi::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvider}; use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType}; +use pyo3::IntoPyObjectExt; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -365,7 +368,7 @@ impl PySessionContext { } else { &upstream_host }; - let url_string = format!("{}{}", scheme, derived_host); + let url_string = format!("{scheme}{derived_host}"); let url = Url::parse(&url_string).unwrap(); self.ctx.runtime_env().register_object_store(&url, store); Ok(()) @@ -614,6 +617,34 @@ impl PySessionContext { Ok(()) } + pub fn register_catalog_provider( + &mut self, + name: &str, + provider: Bound<'_, PyAny>, + ) -> PyDataFusionResult<()> { + let provider = if provider.hasattr("__datafusion_catalog_provider__")? { + let capsule = provider + .getattr("__datafusion_catalog_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_catalog_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignCatalogProvider = provider.into(); + Arc::new(provider) as Arc + } else { + match provider.extract::() { + Ok(py_catalog) => py_catalog.catalog, + Err(_) => Arc::new(RustWrappedPyCatalogProvider::new(provider.into())) + as Arc, + } + }; + + let _ = self.ctx.register_catalog(name, provider); + + Ok(()) + } + /// Construct datafusion dataframe from Arrow Table pub fn register_table_provider( &mut self, @@ -845,14 +876,24 @@ impl PySessionContext { } #[pyo3(signature = (name="datafusion"))] - pub fn catalog(&self, name: &str) -> PyResult { - match self.ctx.catalog(name) { - Some(catalog) => Ok(PyCatalog::new(catalog)), - None => Err(PyKeyError::new_err(format!( - "Catalog with name {} doesn't exist.", - &name, - ))), - } + pub fn catalog(&self, name: &str) -> PyResult { + let catalog = self.ctx.catalog(name).ok_or(PyKeyError::new_err(format!( + "Catalog with name {name} doesn't exist." + )))?; + + Python::with_gil(|py| { + match catalog + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => Ok(wrapped_schema.catalog_provider.clone_ref(py)), + None => PyCatalog::from(catalog).into_py_any(py), + } + }) + } + + pub fn catalog_names(&self) -> HashSet { + self.ctx.catalog_names().into_iter().collect() } pub fn tables(&self) -> HashSet { diff --git a/src/expr.rs b/src/expr.rs index 6b1d01d65..f1e002367 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -171,12 +171,10 @@ impl PyExpr { Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?), Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?), Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( - "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}", - value + "Converting Expr::ScalarFunction to a Python object is not implemented: {value:?}" ))), Expr::WindowFunction(value) => Err(py_unsupported_variant_err(format!( - "Converting Expr::WindowFunction to a Python object is not implemented: {:?}", - value + "Converting Expr::WindowFunction to a Python object is not implemented: {value:?}" ))), Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?), Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?), @@ -188,8 +186,7 @@ impl PyExpr { } #[allow(deprecated)] Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( - "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", - qualifier, options + "Converting Expr::Wildcard to a Python object is not implemented : {qualifier:?} {options:?}" ))), Expr::GroupingSet(value) => { Ok(grouping_set::PyGroupingSet::from(value.clone()).into_bound_py_any(py)?) @@ -198,8 +195,7 @@ impl PyExpr { Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?) } Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!( - "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {:?} - {:?}", - data_type, column + "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {data_type:?} - {column:?}" ))), Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?), } @@ -755,8 +751,7 @@ impl PyExpr { Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), Expr::Literal(scalar_value, _) => DataTypeMap::map_from_scalar_value(scalar_value), _ => Err(py_type_err(format!( - "Non Expr::Literal encountered in types: {:?}", - expr + "Non Expr::Literal encountered in types: {expr:?}" ))), } } diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index a99d83d23..fd4393271 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -116,7 +116,7 @@ impl PyAggregate { } fn __repr__(&self) -> PyResult { - Ok(format!("Aggregate({})", self)) + Ok(format!("Aggregate({self})")) } } diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs index c09f116e3..7c5d3d31f 100644 --- a/src/expr/aggregate_expr.rs +++ b/src/expr/aggregate_expr.rs @@ -75,6 +75,6 @@ impl PyAggregateFunction { /// Get a String representation of this column fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/alias.rs b/src/expr/alias.rs index e8e03cfad..40746f200 100644 --- a/src/expr/alias.rs +++ b/src/expr/alias.rs @@ -64,6 +64,6 @@ impl PyAlias { /// Get a String representation of this column fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs index 62f93cd26..e8081e95b 100644 --- a/src/expr/analyze.rs +++ b/src/expr/analyze.rs @@ -69,7 +69,7 @@ impl PyAnalyze { } fn __repr__(&self) -> PyResult { - Ok(format!("Analyze({})", self)) + Ok(format!("Analyze({self})")) } } diff --git a/src/expr/between.rs b/src/expr/between.rs index a2cac1442..817f1baae 100644 --- a/src/expr/between.rs +++ b/src/expr/between.rs @@ -71,6 +71,6 @@ impl PyBetween { } fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/column.rs b/src/expr/column.rs index 365dbc0d2..50f316f1c 100644 --- a/src/expr/column.rs +++ b/src/expr/column.rs @@ -45,7 +45,7 @@ impl PyColumn { /// Get the column relation fn relation(&self) -> Option { - self.col.relation.as_ref().map(|r| format!("{}", r)) + self.col.relation.as_ref().map(|r| format!("{r}")) } /// Get the fully-qualified column name diff --git a/src/expr/copy_to.rs b/src/expr/copy_to.rs index ebfcb8ebc..473dabfed 100644 --- a/src/expr/copy_to.rs +++ b/src/expr/copy_to.rs @@ -106,7 +106,7 @@ impl PyCopyTo { } fn __repr__(&self) -> PyResult { - Ok(format!("CopyTo({})", self)) + Ok(format!("CopyTo({self})")) } fn __name__(&self) -> PyResult { @@ -129,7 +129,7 @@ impl Display for PyFileType { #[pymethods] impl PyFileType { fn __repr__(&self) -> PyResult { - Ok(format!("FileType({})", self)) + Ok(format!("FileType({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_catalog.rs b/src/expr/create_catalog.rs index f4ea0f517..d2d2ee8f6 100644 --- a/src/expr/create_catalog.rs +++ b/src/expr/create_catalog.rs @@ -81,7 +81,7 @@ impl PyCreateCatalog { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateCatalog({})", self)) + Ok(format!("CreateCatalog({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_catalog_schema.rs b/src/expr/create_catalog_schema.rs index 85f447e1e..e794962f5 100644 --- a/src/expr/create_catalog_schema.rs +++ b/src/expr/create_catalog_schema.rs @@ -81,7 +81,7 @@ impl PyCreateCatalogSchema { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateCatalogSchema({})", self)) + Ok(format!("CreateCatalogSchema({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_external_table.rs b/src/expr/create_external_table.rs index 01ce7d0ca..3e35af006 100644 --- a/src/expr/create_external_table.rs +++ b/src/expr/create_external_table.rs @@ -164,7 +164,7 @@ impl PyCreateExternalTable { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateExternalTable({})", self)) + Ok(format!("CreateExternalTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_function.rs b/src/expr/create_function.rs index 6f3c3f0ff..c02ceebb1 100644 --- a/src/expr/create_function.rs +++ b/src/expr/create_function.rs @@ -163,7 +163,7 @@ impl PyCreateFunction { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateFunction({})", self)) + Ok(format!("CreateFunction({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_index.rs b/src/expr/create_index.rs index 13dadbc3f..0f4b5011a 100644 --- a/src/expr/create_index.rs +++ b/src/expr/create_index.rs @@ -110,7 +110,7 @@ impl PyCreateIndex { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateIndex({})", self)) + Ok(format!("CreateIndex({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs index 8872b2d47..37f4d3420 100644 --- a/src/expr/create_memory_table.rs +++ b/src/expr/create_memory_table.rs @@ -78,7 +78,7 @@ impl PyCreateMemoryTable { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateMemoryTable({})", self)) + Ok(format!("CreateMemoryTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index 87bb76876..718e404d0 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -75,7 +75,7 @@ impl PyCreateView { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateView({})", self)) + Ok(format!("CreateView({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/describe_table.rs b/src/expr/describe_table.rs index 5658a13f2..6c48f3c77 100644 --- a/src/expr/describe_table.rs +++ b/src/expr/describe_table.rs @@ -61,7 +61,7 @@ impl PyDescribeTable { } fn __repr__(&self) -> PyResult { - Ok(format!("DescribeTable({})", self)) + Ok(format!("DescribeTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs index b62b776f8..889e7099d 100644 --- a/src/expr/distinct.rs +++ b/src/expr/distinct.rs @@ -48,8 +48,7 @@ impl Display for PyDistinct { Distinct::All(input) => write!( f, "Distinct ALL - \nInput: {:?}", - input, + \nInput: {input:?}", ), Distinct::On(distinct_on) => { write!( @@ -71,7 +70,7 @@ impl PyDistinct { } fn __repr__(&self) -> PyResult { - Ok(format!("Distinct({})", self)) + Ok(format!("Distinct({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/drop_catalog_schema.rs b/src/expr/drop_catalog_schema.rs index b7420a99c..b4a4c521c 100644 --- a/src/expr/drop_catalog_schema.rs +++ b/src/expr/drop_catalog_schema.rs @@ -101,7 +101,7 @@ impl PyDropCatalogSchema { } fn __repr__(&self) -> PyResult { - Ok(format!("DropCatalogSchema({})", self)) + Ok(format!("DropCatalogSchema({self})")) } } diff --git a/src/expr/drop_function.rs b/src/expr/drop_function.rs index 9fbd78fdc..fca9eb94b 100644 --- a/src/expr/drop_function.rs +++ b/src/expr/drop_function.rs @@ -76,7 +76,7 @@ impl PyDropFunction { } fn __repr__(&self) -> PyResult { - Ok(format!("DropFunction({})", self)) + Ok(format!("DropFunction({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/drop_table.rs b/src/expr/drop_table.rs index 96983c1cf..3f442539a 100644 --- a/src/expr/drop_table.rs +++ b/src/expr/drop_table.rs @@ -70,7 +70,7 @@ impl PyDropTable { } fn __repr__(&self) -> PyResult { - Ok(format!("DropTable({})", self)) + Ok(format!("DropTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/drop_view.rs b/src/expr/drop_view.rs index 1d1ab1e59..6196c8bb5 100644 --- a/src/expr/drop_view.rs +++ b/src/expr/drop_view.rs @@ -83,7 +83,7 @@ impl PyDropView { } fn __repr__(&self) -> PyResult { - Ok(format!("DropView({})", self)) + Ok(format!("DropView({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs index a1534ac15..758213423 100644 --- a/src/expr/empty_relation.rs +++ b/src/expr/empty_relation.rs @@ -65,7 +65,7 @@ impl PyEmptyRelation { /// Get a String representation of this column fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } fn __name__(&self) -> PyResult { diff --git a/src/expr/filter.rs b/src/expr/filter.rs index 9bdb667cd..4fcb600cd 100644 --- a/src/expr/filter.rs +++ b/src/expr/filter.rs @@ -72,7 +72,7 @@ impl PyFilter { } fn __repr__(&self) -> String { - format!("Filter({})", self) + format!("Filter({self})") } } diff --git a/src/expr/join.rs b/src/expr/join.rs index 76ec532e7..b8d1d9da7 100644 --- a/src/expr/join.rs +++ b/src/expr/join.rs @@ -177,7 +177,7 @@ impl PyJoin { } fn __repr__(&self) -> PyResult { - Ok(format!("Join({})", self)) + Ok(format!("Join({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/like.rs b/src/expr/like.rs index 2e1f060bd..f180f5d4c 100644 --- a/src/expr/like.rs +++ b/src/expr/like.rs @@ -75,7 +75,7 @@ impl PyLike { } fn __repr__(&self) -> String { - format!("Like({})", self) + format!("Like({self})") } } @@ -133,7 +133,7 @@ impl PyILike { } fn __repr__(&self) -> String { - format!("Like({})", self) + format!("Like({self})") } } @@ -191,6 +191,6 @@ impl PySimilarTo { } fn __repr__(&self) -> String { - format!("Like({})", self) + format!("Like({self})") } } diff --git a/src/expr/limit.rs b/src/expr/limit.rs index c2a33ff89..92552814e 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -81,7 +81,7 @@ impl PyLimit { } fn __repr__(&self) -> PyResult { - Ok(format!("Limit({})", self)) + Ok(format!("Limit({self})")) } } diff --git a/src/expr/projection.rs b/src/expr/projection.rs index dc7e5e3c1..b5a9ef34a 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -85,7 +85,7 @@ impl PyProjection { } fn __repr__(&self) -> PyResult { - Ok(format!("Projection({})", self)) + Ok(format!("Projection({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/recursive_query.rs b/src/expr/recursive_query.rs index 65181f7d3..2517b7417 100644 --- a/src/expr/recursive_query.rs +++ b/src/expr/recursive_query.rs @@ -89,7 +89,7 @@ impl PyRecursiveQuery { } fn __repr__(&self) -> PyResult { - Ok(format!("RecursiveQuery({})", self)) + Ok(format!("RecursiveQuery({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/repartition.rs b/src/expr/repartition.rs index 3e782d6af..48b5e7041 100644 --- a/src/expr/repartition.rs +++ b/src/expr/repartition.rs @@ -108,7 +108,7 @@ impl PyRepartition { } fn __repr__(&self) -> PyResult { - Ok(format!("Repartition({})", self)) + Ok(format!("Repartition({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/sort.rs b/src/expr/sort.rs index ed4947591..79a8aee50 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -87,7 +87,7 @@ impl PySort { } fn __repr__(&self) -> PyResult { - Ok(format!("Sort({})", self)) + Ok(format!("Sort({self})")) } } diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs index 12f74e4d8..79e35d978 100644 --- a/src/expr/sort_expr.rs +++ b/src/expr/sort_expr.rs @@ -85,6 +85,6 @@ impl PySortExpr { } fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index 5ebfe6927..77f56f9a9 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -62,7 +62,7 @@ impl PySubquery { } fn __repr__(&self) -> PyResult { - Ok(format!("Subquery({})", self)) + Ok(format!("Subquery({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs index 267a4d485..3302e7f23 100644 --- a/src/expr/subquery_alias.rs +++ b/src/expr/subquery_alias.rs @@ -72,7 +72,7 @@ impl PySubqueryAlias { } fn __repr__(&self) -> PyResult { - Ok(format!("SubqueryAlias({})", self)) + Ok(format!("SubqueryAlias({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index 6a0d53f0f..329964687 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -136,7 +136,7 @@ impl PyTableScan { } fn __repr__(&self) -> PyResult { - Ok(format!("TableScan({})", self)) + Ok(format!("TableScan({self})")) } } diff --git a/src/expr/union.rs b/src/expr/union.rs index 5a08ccc13..e0b221398 100644 --- a/src/expr/union.rs +++ b/src/expr/union.rs @@ -66,7 +66,7 @@ impl PyUnion { } fn __repr__(&self) -> PyResult { - Ok(format!("Union({})", self)) + Ok(format!("Union({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/unnest.rs b/src/expr/unnest.rs index 8e70e0990..c8833347f 100644 --- a/src/expr/unnest.rs +++ b/src/expr/unnest.rs @@ -66,7 +66,7 @@ impl PyUnnest { } fn __repr__(&self) -> PyResult { - Ok(format!("Unnest({})", self)) + Ok(format!("Unnest({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/unnest_expr.rs b/src/expr/unnest_expr.rs index 2234d24b1..634186ed8 100644 --- a/src/expr/unnest_expr.rs +++ b/src/expr/unnest_expr.rs @@ -58,7 +58,7 @@ impl PyUnnestExpr { } fn __repr__(&self) -> PyResult { - Ok(format!("UnnestExpr({})", self)) + Ok(format!("UnnestExpr({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/window.rs b/src/expr/window.rs index 052d9eeb4..a408731c2 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -185,8 +185,7 @@ impl PyWindowFrame { "groups" => WindowFrameUnits::Groups, _ => { return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, + "{units:?}", )))); } }; @@ -197,8 +196,7 @@ impl PyWindowFrame { WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), WindowFrameUnits::Groups => { return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, + "{units:?}", )))); } }, @@ -210,8 +208,7 @@ impl PyWindowFrame { WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), WindowFrameUnits::Groups => { return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, + "{units:?}", )))); } }, @@ -236,7 +233,7 @@ impl PyWindowFrame { /// Get a String representation of this window frame fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/functions.rs b/src/functions.rs index b2bafcb65..b40500b8b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -937,7 +937,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(left))?; m.add_wrapped(wrap_pyfunction!(length))?; m.add_wrapped(wrap_pyfunction!(ln))?; - m.add_wrapped(wrap_pyfunction!(log))?; + m.add_wrapped(wrap_pyfunction!(self::log))?; m.add_wrapped(wrap_pyfunction!(log10))?; m.add_wrapped(wrap_pyfunction!(log2))?; m.add_wrapped(wrap_pyfunction!(lower))?; diff --git a/src/lib.rs b/src/lib.rs index 1293eee3c..29d3f41da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,10 +77,10 @@ pub(crate) struct TokioRuntime(tokio::runtime::Runtime); /// datafusion directory. #[pymodule] fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { + // Initialize logging + pyo3_log::init(); + // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -98,6 +98,10 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + let catalog = PyModule::new(py, "catalog")?; + catalog::init_module(&catalog)?; + m.add_submodule(&catalog)?; + // Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/ let common = PyModule::new(py, "common")?; common::init_module(&common)?; diff --git a/src/physical_plan.rs b/src/physical_plan.rs index f0be45c6a..49db643e1 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -78,8 +78,7 @@ impl PyExecutionPlan { let proto_plan = datafusion_proto::protobuf::PhysicalPlanNode::decode(bytes).map_err(|e| { PyRuntimeError::new_err(format!( - "Unable to decode logical node from serialized bytes: {}", - e + "Unable to decode logical node from serialized bytes: {e}" )) })?; diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 198d68bdc..97d320470 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -201,8 +201,7 @@ impl PyLogicalPlan { let proto_plan = datafusion_proto::protobuf::LogicalPlanNode::decode(bytes).map_err(|e| { PyRuntimeError::new_err(format!( - "Unable to decode logical node from serialized bytes: {}", - e + "Unable to decode logical node from serialized bytes: {e}" )) })?; diff --git a/src/utils.rs b/src/utils.rs index f4e121fd5..3b30de5de 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -109,8 +109,7 @@ pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyRe let capsule_name = capsule_name.unwrap().to_str()?; if capsule_name != name { return Err(PyValueError::new_err(format!( - "Expected name '{}' in PyCapsule, instead got '{}'", - name, capsule_name + "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'" ))); } @@ -127,7 +126,7 @@ pub(crate) fn py_obj_to_scalar_value(py: Python, obj: PyObject) -> PyResult Date: Wed, 2 Jul 2025 09:59:02 -0400 Subject: [PATCH 096/248] feat: add FFI support for user defined functions (#1145) * Intermediate work adding ffi scalar udf * Add scalar UDF and example * Add aggregate udf via ffi * Initial commit for window ffi integration * Remove unused import --- docs/source/contributor-guide/ffi.rst | 2 +- examples/datafusion-ffi-example/Cargo.lock | 217 ++++++++++-------- examples/datafusion-ffi-example/Cargo.toml | 8 +- .../python/tests/_test_aggregate_udf.py | 77 +++++++ .../python/tests/_test_scalar_udf.py | 70 ++++++ .../python/tests/_test_window_udf.py | 89 +++++++ .../src/aggregate_udf.rs | 81 +++++++ .../src/catalog_provider.rs | 1 - examples/datafusion-ffi-example/src/lib.rs | 9 + .../datafusion-ffi-example/src/scalar_udf.rs | 91 ++++++++ .../datafusion-ffi-example/src/window_udf.rs | 81 +++++++ python/datafusion/user_defined.py | 107 ++++++++- src/functions.rs | 2 +- src/udaf.rs | 31 ++- src/udf.rs | 25 +- src/udwf.rs | 27 ++- 16 files changed, 805 insertions(+), 113 deletions(-) create mode 100644 examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py create mode 100644 examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py create mode 100644 examples/datafusion-ffi-example/python/tests/_test_window_udf.py create mode 100644 examples/datafusion-ffi-example/src/aggregate_udf.rs create mode 100644 examples/datafusion-ffi-example/src/scalar_udf.rs create mode 100644 examples/datafusion-ffi-example/src/window_udf.rs diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst index c1f9806b3..a40af1234 100644 --- a/docs/source/contributor-guide/ffi.rst +++ b/docs/source/contributor-guide/ffi.rst @@ -176,7 +176,7 @@ By convention the ``datafusion-python`` library expects a Python object that has ``TableProvider`` PyCapsule to have this capsule accessible by calling a function named ``__datafusion_table_provider__``. You can see a complete working example of how to share a ``TableProvider`` from one python library to DataFusion Python in the -`repository examples folder `_. +`repository examples folder `_. This section has been written using ``TableProvider`` as an example. It is the first extension that has been written using this approach and the most thoroughly implemented. diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock index e5a1ca8d1..1b4ca6bee 100644 --- a/examples/datafusion-ffi-example/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -323,6 +323,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" dependencies = [ "bitflags", + "serde", + "serde_json", ] [[package]] @@ -748,9 +750,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" +checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96" dependencies = [ "arrow", "arrow-ipc", @@ -775,7 +777,6 @@ dependencies = [ "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", - "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -790,7 +791,7 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.1", "regex", "sqlparser", "tempfile", @@ -803,9 +804,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" +checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee" dependencies = [ "arrow", "async-trait", @@ -829,9 +830,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" +checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038" dependencies = [ "arrow", "async-trait", @@ -852,9 +853,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" +checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12" dependencies = [ "ahash", "arrow", @@ -876,9 +877,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" +checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2" dependencies = [ "futures", "log", @@ -887,9 +888,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" +checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b" dependencies = [ "arrow", "async-compression", @@ -912,7 +913,7 @@ dependencies = [ "log", "object_store", "parquet", - "rand", + "rand 0.9.1", "tempfile", "tokio", "tokio-util", @@ -923,9 +924,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc" dependencies = [ "arrow", "async-trait", @@ -948,9 +949,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c" dependencies = [ "arrow", "async-trait", @@ -973,9 +974,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b" dependencies = [ "arrow", "async-trait", @@ -998,21 +999,21 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.1", "tokio", ] [[package]] name = "datafusion-doc" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" +checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" [[package]] name = "datafusion-execution" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" +checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" dependencies = [ "arrow", "dashmap", @@ -1022,16 +1023,16 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand", + "rand 0.9.1", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" +checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" dependencies = [ "arrow", "chrono", @@ -1050,9 +1051,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" +checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" dependencies = [ "arrow", "datafusion-common", @@ -1063,9 +1064,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c" +checksum = "980cca31de37f5dadf7ea18e4ffc2b6833611f45bed5ef9de0831d2abb50f1ef" dependencies = [ "abi_stable", "arrow", @@ -1073,7 +1074,9 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-functions-aggregate-common", "datafusion-proto", + "datafusion-proto-common", "futures", "log", "prost", @@ -1081,11 +1084,25 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-ffi-example" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "async-trait", + "datafusion", + "datafusion-ffi", + "pyo3", + "pyo3-build-config", +] + [[package]] name = "datafusion-functions" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" +checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" dependencies = [ "arrow", "arrow-buffer", @@ -1103,7 +1120,7 @@ dependencies = [ "itertools", "log", "md-5", - "rand", + "rand 0.9.1", "regex", "sha2", "unicode-segmentation", @@ -1112,9 +1129,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" +checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" dependencies = [ "ahash", "arrow", @@ -1133,9 +1150,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" +checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" dependencies = [ "ahash", "arrow", @@ -1146,9 +1163,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" +checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" dependencies = [ "arrow", "arrow-ord", @@ -1167,9 +1184,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" +checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b" dependencies = [ "arrow", "async-trait", @@ -1183,10 +1200,11 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" +checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4" dependencies = [ + "arrow", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -1200,9 +1218,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" +checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1210,9 +1228,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" +checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" dependencies = [ "datafusion-expr", "quote", @@ -1221,9 +1239,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" +checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058" dependencies = [ "arrow", "chrono", @@ -1240,9 +1258,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" +checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" dependencies = [ "ahash", "arrow", @@ -1262,9 +1280,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" +checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" dependencies = [ "ahash", "arrow", @@ -1276,9 +1294,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" +checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae" dependencies = [ "arrow", "datafusion-common", @@ -1295,9 +1313,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" +checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23" dependencies = [ "ahash", "arrow", @@ -1325,9 +1343,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83" +checksum = "d8f5d9acd7d96e3bf2a7bb04818373cab6e51de0356e3694b94905fee7b4e8b6" dependencies = [ "arrow", "chrono", @@ -1341,9 +1359,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f" +checksum = "09ecb5ec152c4353b60f7a5635489834391f7a291d2b39a4820cd469e318b78e" dependencies = [ "arrow", "datafusion-common", @@ -1352,9 +1370,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49" dependencies = [ "arrow", "async-trait", @@ -1376,9 +1394,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" +checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df" dependencies = [ "arrow", "bigdecimal", @@ -1441,20 +1459,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "ffi-table-provider" -version = "0.1.0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", - "async-trait", - "datafusion", - "datafusion-ffi", - "pyo3", - "pyo3-build-config", -] - [[package]] name = "fixedbitset" version = "0.5.7" @@ -1488,6 +1492,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1666,6 +1676,11 @@ name = "hashbrown" version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -2271,12 +2286,14 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" dependencies = [ "fixedbitset", + "hashbrown 0.15.3", "indexmap", + "serde", ] [[package]] @@ -2305,7 +2322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -2484,19 +2501,27 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.3", ] [[package]] name = "rand_chacha" -version = "0.3.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.3", ] [[package]] @@ -2504,8 +2529,14 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.3.3", ] [[package]] @@ -3032,9 +3063,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "getrandom 0.3.3", "js-sys", diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml index 319163554..b26ab48e3 100644 --- a/examples/datafusion-ffi-example/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -16,13 +16,13 @@ # under the License. [package] -name = "ffi-table-provider" -version = "0.1.0" +name = "datafusion-ffi-example" +version = "0.2.0" edition = "2021" [dependencies] -datafusion = { version = "47.0.0" } -datafusion-ffi = { version = "47.0.0" } +datafusion = { version = "48.0.0" } +datafusion-ffi = { version = "48.0.0" } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } arrow = { version = "55.0.0" } arrow-array = { version = "55.0.0" } diff --git a/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py b/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py new file mode 100644 index 000000000..7ea6b295c --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext, col, udaf +from datafusion_ffi_example import MySumUDF + + +def setup_context_with_table(): + ctx = SessionContext() + + # Pick numbers here so we get the same value in both groups + # since we cannot be certain of the output order of batches + batch = pa.RecordBatch.from_arrays( + [ + pa.array([1, 2, 3, None], type=pa.int64()), + pa.array([1, 1, 2, 2], type=pa.int64()), + ], + names=["a", "b"], + ) + ctx.register_record_batches("test_table", [[batch]]) + return ctx + + +def test_ffi_aggregate_register(): + ctx = setup_context_with_table() + my_udaf = udaf(MySumUDF()) + ctx.register_udaf(my_udaf) + + result = ctx.sql("select my_custom_sum(a) from test_table group by b").collect() + + assert len(result) == 2 + assert result[0].num_columns == 1 + + result = [r.column(0) for r in result] + expected = [ + pa.array([3], type=pa.int64()), + pa.array([3], type=pa.int64()), + ] + + assert result == expected + + +def test_ffi_aggregate_call_directly(): + ctx = setup_context_with_table() + my_udaf = udaf(MySumUDF()) + + result = ( + ctx.table("test_table").aggregate([col("b")], [my_udaf(col("a"))]).collect() + ) + + assert len(result) == 2 + assert result[0].num_columns == 2 + + result = [r.column(1) for r in result] + expected = [ + pa.array([3], type=pa.int64()), + pa.array([3], type=pa.int64()), + ] + + assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py b/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py new file mode 100644 index 000000000..0c949c34a --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext, col, udf +from datafusion_ffi_example import IsNullUDF + + +def setup_context_with_table(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3, None])], + names=["a"], + ) + ctx.register_record_batches("test_table", [[batch]]) + return ctx + + +def test_ffi_scalar_register(): + ctx = setup_context_with_table() + my_udf = udf(IsNullUDF()) + ctx.register_udf(my_udf) + + result = ctx.sql("select my_custom_is_null(a) from test_table").collect() + + assert len(result) == 1 + assert result[0].num_columns == 1 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([False, False, False, True], type=pa.bool_()), + ] + + assert result == expected + + +def test_ffi_scalar_call_directly(): + ctx = setup_context_with_table() + my_udf = udf(IsNullUDF()) + + result = ctx.table("test_table").select(my_udf(col("a"))).collect() + + assert len(result) == 1 + assert result[0].num_columns == 1 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([False, False, False, True], type=pa.bool_()), + ] + + assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_window_udf.py b/examples/datafusion-ffi-example/python/tests/_test_window_udf.py new file mode 100644 index 000000000..7d96994b9 --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_window_udf.py @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext, col, udwf +from datafusion_ffi_example import MyRankUDF + + +def setup_context_with_table(): + ctx = SessionContext() + + # Pick numbers here so we get the same value in both groups + # since we cannot be certain of the output order of batches + batch = pa.RecordBatch.from_arrays( + [ + pa.array([40, 10, 30, 20], type=pa.int64()), + ], + names=["a"], + ) + ctx.register_record_batches("test_table", [[batch]]) + return ctx + + +def test_ffi_window_register(): + ctx = setup_context_with_table() + my_udwf = udwf(MyRankUDF()) + ctx.register_udwf(my_udwf) + + result = ctx.sql( + "select a, my_custom_rank() over (order by a) from test_table" + ).collect() + assert len(result) == 1 + assert result[0].num_columns == 2 + + results = [ + (result[0][0][idx].as_py(), result[0][1][idx].as_py()) for idx in range(4) + ] + results.sort() + + expected = [ + (10, 1), + (20, 2), + (30, 3), + (40, 4), + ] + assert results == expected + + +def test_ffi_window_call_directly(): + ctx = setup_context_with_table() + my_udwf = udwf(MyRankUDF()) + + result = ( + ctx.table("test_table") + .select(col("a"), my_udwf().order_by(col("a")).build()) + .collect() + ) + + assert len(result) == 1 + assert result[0].num_columns == 2 + + results = [ + (result[0][0][idx].as_py(), result[0][1][idx].as_py()) for idx in range(4) + ] + results.sort() + + expected = [ + (10, 1), + (20, 2), + (30, 3), + (40, 4), + ] + assert results == expected diff --git a/examples/datafusion-ffi-example/src/aggregate_udf.rs b/examples/datafusion-ffi-example/src/aggregate_udf.rs new file mode 100644 index 000000000..9481fe9c6 --- /dev/null +++ b/examples/datafusion-ffi-example/src/aggregate_udf.rs @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_schema::DataType; +use datafusion::error::Result as DataFusionResult; +use datafusion::functions_aggregate::sum::Sum; +use datafusion::logical_expr::function::AccumulatorArgs; +use datafusion::logical_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature}; +use datafusion_ffi::udaf::FFI_AggregateUDF; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::any::Any; +use std::sync::Arc; + +#[pyclass(name = "MySumUDF", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct MySumUDF { + inner: Arc, +} + +#[pymethods] +impl MySumUDF { + #[new] + fn new() -> Self { + Self { + inner: Arc::new(Sum::new()), + } + } + + fn __datafusion_aggregate_udf__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_aggregate_udf".into(); + + let func = Arc::new(AggregateUDF::from(self.clone())); + let provider = FFI_AggregateUDF::from(func); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl AggregateUDFImpl for MySumUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "my_custom_sum" + } + + fn signature(&self) -> &Signature { + self.inner.signature() + } + + fn return_type(&self, arg_types: &[DataType]) -> DataFusionResult { + self.inner.return_type(arg_types) + } + + fn accumulator(&self, acc_args: AccumulatorArgs) -> DataFusionResult> { + self.inner.accumulator(acc_args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> DataFusionResult> { + self.inner.coerce_types(arg_types) + } +} diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs index 54e61cf3e..cd2616916 100644 --- a/examples/datafusion-ffi-example/src/catalog_provider.rs +++ b/examples/datafusion-ffi-example/src/catalog_provider.rs @@ -24,7 +24,6 @@ use datafusion::{ catalog::{ CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, TableProvider, }, - common::exec_err, datasource::MemTable, error::{DataFusionError, Result}, }; diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs index 3a4cf2247..79af276fd 100644 --- a/examples/datafusion-ffi-example/src/lib.rs +++ b/examples/datafusion-ffi-example/src/lib.rs @@ -16,18 +16,27 @@ // under the License. use crate::catalog_provider::MyCatalogProvider; +use crate::aggregate_udf::MySumUDF; +use crate::scalar_udf::IsNullUDF; use crate::table_function::MyTableFunction; use crate::table_provider::MyTableProvider; +use crate::window_udf::MyRankUDF; use pyo3::prelude::*; pub(crate) mod catalog_provider; +pub(crate) mod aggregate_udf; +pub(crate) mod scalar_udf; pub(crate) mod table_function; pub(crate) mod table_provider; +pub(crate) mod window_udf; #[pymodule] fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/examples/datafusion-ffi-example/src/scalar_udf.rs b/examples/datafusion-ffi-example/src/scalar_udf.rs new file mode 100644 index 000000000..727666638 --- /dev/null +++ b/examples/datafusion-ffi-example/src/scalar_udf.rs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{Array, BooleanArray}; +use arrow_schema::DataType; +use datafusion::common::ScalarValue; +use datafusion::error::Result as DataFusionResult; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, + Volatility, +}; +use datafusion_ffi::udf::FFI_ScalarUDF; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::any::Any; +use std::sync::Arc; + +#[pyclass(name = "IsNullUDF", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct IsNullUDF { + signature: Signature, +} + +#[pymethods] +impl IsNullUDF { + #[new] + fn new() -> Self { + Self { + signature: Signature::new(TypeSignature::Any(1), Volatility::Immutable), + } + } + + fn __datafusion_scalar_udf__<'py>(&self, py: Python<'py>) -> PyResult> { + let name = cr"datafusion_scalar_udf".into(); + + let func = Arc::new(ScalarUDF::from(self.clone())); + let provider = FFI_ScalarUDF::from(func); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl ScalarUDFImpl for IsNullUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "my_custom_is_null" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> DataFusionResult { + Ok(DataType::Boolean) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> DataFusionResult { + let input = &args.args[0]; + + Ok(match input { + ColumnarValue::Array(arr) => match arr.is_nullable() { + true => { + let nulls = arr.nulls().unwrap(); + let nulls = BooleanArray::from_iter(nulls.iter().map(|x| Some(!x))); + ColumnarValue::Array(Arc::new(nulls)) + } + false => ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))), + }, + ColumnarValue::Scalar(sv) => { + ColumnarValue::Scalar(ScalarValue::Boolean(Some(sv == &ScalarValue::Null))) + } + }) + } +} diff --git a/examples/datafusion-ffi-example/src/window_udf.rs b/examples/datafusion-ffi-example/src/window_udf.rs new file mode 100644 index 000000000..e0d397956 --- /dev/null +++ b/examples/datafusion-ffi-example/src/window_udf.rs @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_schema::{DataType, FieldRef}; +use datafusion::error::Result as DataFusionResult; +use datafusion::functions_window::rank::rank_udwf; +use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; +use datafusion::logical_expr::{PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl}; +use datafusion_ffi::udwf::FFI_WindowUDF; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::any::Any; +use std::sync::Arc; + +#[pyclass(name = "MyRankUDF", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct MyRankUDF { + inner: Arc, +} + +#[pymethods] +impl MyRankUDF { + #[new] + fn new() -> Self { + Self { inner: rank_udwf() } + } + + fn __datafusion_window_udf__<'py>(&self, py: Python<'py>) -> PyResult> { + let name = cr"datafusion_window_udf".into(); + + let func = Arc::new(WindowUDF::from(self.clone())); + let provider = FFI_WindowUDF::from(func); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl WindowUDFImpl for MyRankUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "my_custom_rank" + } + + fn signature(&self) -> &Signature { + self.inner.signature() + } + + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> DataFusionResult> { + self.inner + .inner() + .partition_evaluator(partition_evaluator_args) + } + + fn field(&self, field_args: WindowUDFFieldArgs) -> DataFusionResult { + self.inner.inner().field(field_args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> DataFusionResult> { + self.inner.coerce_types(arg_types) + } +} diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index dd634c7fb..bd686acbb 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -22,7 +22,7 @@ import functools from abc import ABCMeta, abstractmethod from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, TypeVar, overload import pyarrow as pa @@ -77,6 +77,12 @@ def __str__(self) -> str: return self.name.lower() +class ScalarUDFExportable(Protocol): + """Type hint for object that has __datafusion_scalar_udf__ PyCapsule.""" + + def __datafusion_scalar_udf__(self) -> object: ... # noqa: D105 + + class ScalarUDF: """Class for performing scalar user-defined functions (UDF). @@ -96,6 +102,9 @@ def __init__( See helper method :py:func:`udf` for argument details. """ + if hasattr(func, "__datafusion_scalar_udf__"): + self._udf = df_internal.ScalarUDF.from_pycapsule(func) + return if isinstance(input_types, pa.DataType): input_types = [input_types] self._udf = df_internal.ScalarUDF( @@ -134,6 +143,10 @@ def udf( name: Optional[str] = None, ) -> ScalarUDF: ... + @overload + @staticmethod + def udf(func: ScalarUDFExportable) -> ScalarUDF: ... + @staticmethod def udf(*args: Any, **kwargs: Any): # noqa: D417 """Create a new User-Defined Function (UDF). @@ -147,7 +160,10 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417 Args: func (Callable, optional): Only needed when calling as a function. - Skip this argument when using ``udf`` as a decorator. + Skip this argument when using `udf` as a decorator. If you have a Rust + backed ScalarUDF within a PyCapsule, you can pass this parameter + and ignore the rest. They will be determined directly from the + underlying function. See the online documentation for more information. input_types (list[pa.DataType]): The data types of the arguments to ``func``. This list must be of the same length as the number of arguments. @@ -215,12 +231,31 @@ def wrapper(*args: Any, **kwargs: Any): return decorator + if hasattr(args[0], "__datafusion_scalar_udf__"): + return ScalarUDF.from_pycapsule(args[0]) + if args and callable(args[0]): # Case 1: Used as a function, require the first parameter to be callable return _function(*args, **kwargs) # Case 2: Used as a decorator with parameters return _decorator(*args, **kwargs) + @staticmethod + def from_pycapsule(func: ScalarUDFExportable) -> ScalarUDF: + """Create a Scalar UDF from ScalarUDF PyCapsule object. + + This function will instantiate a Scalar UDF that uses a DataFusion + ScalarUDF that is exported via the FFI bindings. + """ + name = str(func.__class__) + return ScalarUDF( + name=name, + func=func, + input_types=None, + return_type=None, + volatility=None, + ) + class Accumulator(metaclass=ABCMeta): """Defines how an :py:class:`AggregateUDF` accumulates values.""" @@ -242,6 +277,12 @@ def evaluate(self) -> pa.Scalar: """Return the resultant value.""" +class AggregateUDFExportable(Protocol): + """Type hint for object that has __datafusion_aggregate_udf__ PyCapsule.""" + + def __datafusion_aggregate_udf__(self) -> object: ... # noqa: D105 + + class AggregateUDF: """Class for performing scalar user-defined functions (UDF). @@ -263,6 +304,9 @@ def __init__( See :py:func:`udaf` for a convenience function and argument descriptions. """ + if hasattr(accumulator, "__datafusion_aggregate_udf__"): + self._udaf = df_internal.AggregateUDF.from_pycapsule(accumulator) + return self._udaf = df_internal.AggregateUDF( name, accumulator, @@ -307,7 +351,7 @@ def udaf( ) -> AggregateUDF: ... @staticmethod - def udaf(*args: Any, **kwargs: Any): # noqa: D417 + def udaf(*args: Any, **kwargs: Any): # noqa: D417, C901 """Create a new User-Defined Aggregate Function (UDAF). This class allows you to define an aggregate function that can be used in @@ -364,6 +408,10 @@ def udf4() -> Summarize: Args: accum: The accumulator python function. Only needed when calling as a function. Skip this argument when using ``udaf`` as a decorator. + If you have a Rust backed AggregateUDF within a PyCapsule, you can + pass this parameter and ignore the rest. They will be determined + directly from the underlying function. See the online documentation + for more information. input_types: The data types of the arguments to ``accum``. return_type: The data type of the return value. state_type: The data types of the intermediate accumulation. @@ -422,12 +470,32 @@ def wrapper(*args: Any, **kwargs: Any) -> Expr: return decorator + if hasattr(args[0], "__datafusion_aggregate_udf__"): + return AggregateUDF.from_pycapsule(args[0]) + if args and callable(args[0]): # Case 1: Used as a function, require the first parameter to be callable return _function(*args, **kwargs) # Case 2: Used as a decorator with parameters return _decorator(*args, **kwargs) + @staticmethod + def from_pycapsule(func: AggregateUDFExportable) -> AggregateUDF: + """Create an Aggregate UDF from AggregateUDF PyCapsule object. + + This function will instantiate a Aggregate UDF that uses a DataFusion + AggregateUDF that is exported via the FFI bindings. + """ + name = str(func.__class__) + return AggregateUDF( + name=name, + accumulator=func, + input_types=None, + return_type=None, + state_type=None, + volatility=None, + ) + class WindowEvaluator: """Evaluator class for user-defined window functions (UDWF). @@ -588,6 +656,12 @@ def include_rank(self) -> bool: return False +class WindowUDFExportable(Protocol): + """Type hint for object that has __datafusion_window_udf__ PyCapsule.""" + + def __datafusion_window_udf__(self) -> object: ... # noqa: D105 + + class WindowUDF: """Class for performing window user-defined functions (UDF). @@ -608,6 +682,9 @@ def __init__( See :py:func:`udwf` for a convenience function and argument descriptions. """ + if hasattr(func, "__datafusion_window_udf__"): + self._udwf = df_internal.WindowUDF.from_pycapsule(func) + return self._udwf = df_internal.WindowUDF( name, func, input_types, return_type, str(volatility) ) @@ -683,7 +760,10 @@ def biased_numbers() -> BiasedNumbers: Args: func: Only needed when calling as a function. Skip this argument when - using ``udwf`` as a decorator. + using ``udwf`` as a decorator. If you have a Rust backed WindowUDF + within a PyCapsule, you can pass this parameter and ignore the rest. + They will be determined directly from the underlying function. See + the online documentation for more information. input_types: The data types of the arguments. return_type: The data type of the return value. volatility: See :py:class:`Volatility` for allowed values. @@ -692,6 +772,9 @@ def biased_numbers() -> BiasedNumbers: Returns: A user-defined window function that can be used in window function calls. """ + if hasattr(args[0], "__datafusion_window_udf__"): + return WindowUDF.from_pycapsule(args[0]) + if args and callable(args[0]): # Case 1: Used as a function, require the first parameter to be callable return WindowUDF._create_window_udf(*args, **kwargs) @@ -759,6 +842,22 @@ def wrapper(*args: Any, **kwargs: Any) -> Expr: return decorator + @staticmethod + def from_pycapsule(func: WindowUDFExportable) -> WindowUDF: + """Create a Window UDF from WindowUDF PyCapsule object. + + This function will instantiate a Window UDF that uses a DataFusion + WindowUDF that is exported via the FFI bindings. + """ + name = str(func.__class__) + return WindowUDF( + name=name, + func=func, + input_types=None, + return_type=None, + volatility=None, + ) + class TableFunction: """Class for performing user-defined table functions (UDTF). diff --git a/src/functions.rs b/src/functions.rs index b40500b8b..eeef48385 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -682,7 +682,7 @@ pub fn approx_percentile_cont_with_weight( add_builder_fns_to_aggregate(agg_fn, None, filter, None, None) } -// We handle first_value explicitly because the signature expects an order_by +// We handle last_value explicitly because the signature expects an order_by // https://github.com/apache/datafusion/issues/12376 #[pyfunction] #[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] diff --git a/src/udaf.rs b/src/udaf.rs index 34a9cd51d..78f4e2b0c 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -19,6 +19,10 @@ use std::sync::Arc; use pyo3::{prelude::*, types::PyTuple}; +use crate::common::data_type::PyScalarValue; +use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; +use crate::expr::PyExpr; +use crate::utils::{parse_volatility, validate_pycapsule}; use datafusion::arrow::array::{Array, ArrayRef}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; @@ -27,11 +31,8 @@ use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::{ create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, }; - -use crate::common::data_type::PyScalarValue; -use crate::errors::to_datafusion_err; -use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use datafusion_ffi::udaf::{FFI_AggregateUDF, ForeignAggregateUDF}; +use pyo3::types::PyCapsule; #[derive(Debug)] struct RustAccumulator { @@ -183,6 +184,26 @@ impl PyAggregateUDF { Ok(Self { function }) } + #[staticmethod] + pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { + if func.hasattr("__datafusion_aggregate_udf__")? { + let capsule = func.getattr("__datafusion_aggregate_udf__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_aggregate_udf")?; + + let udaf = unsafe { capsule.reference::() }; + let udaf: ForeignAggregateUDF = udaf.try_into()?; + + Ok(Self { + function: udaf.into(), + }) + } else { + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_aggregate_udf__ does not exist on AggregateUDF object.".to_string(), + )) + } + } + /// creates a new PyExpr with the call of the udf #[pyo3(signature = (*args))] fn __call__(&self, args: Vec) -> PyResult { diff --git a/src/udf.rs b/src/udf.rs index 574c9d7b5..de1e3f18c 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use datafusion_ffi::udf::{FFI_ScalarUDF, ForeignScalarUDF}; +use pyo3::types::PyCapsule; use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; @@ -29,8 +31,9 @@ use datafusion::logical_expr::ScalarUDF; use datafusion::logical_expr::{create_udf, ColumnarValue}; use crate::errors::to_datafusion_err; +use crate::errors::{py_datafusion_err, PyDataFusionResult}; use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use crate::utils::{parse_volatility, validate_pycapsule}; /// Create a Rust callable function from a python function that expects pyarrow arrays fn pyarrow_function_to_rust( @@ -105,6 +108,26 @@ impl PyScalarUDF { Ok(Self { function }) } + #[staticmethod] + pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { + if func.hasattr("__datafusion_scalar_udf__")? { + let capsule = func.getattr("__datafusion_scalar_udf__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_scalar_udf")?; + + let udf = unsafe { capsule.reference::() }; + let udf: ForeignScalarUDF = udf.try_into()?; + + Ok(Self { + function: udf.into(), + }) + } else { + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_scalar_udf__ does not exist on ScalarUDF object.".to_string(), + )) + } + } + /// creates a new PyExpr with the call of the udf #[pyo3(signature = (*args))] fn __call__(&self, args: Vec) -> PyResult { diff --git a/src/udwf.rs b/src/udwf.rs index a0c8cc59a..4fb98916b 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -27,16 +27,17 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use crate::common::data_type::PyScalarValue; -use crate::errors::to_datafusion_err; +use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use crate::utils::{parse_volatility, validate_pycapsule}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::{ PartitionEvaluator, PartitionEvaluatorFactory, Signature, Volatility, WindowUDF, WindowUDFImpl, }; -use pyo3::types::{PyList, PyTuple}; +use datafusion_ffi::udwf::{FFI_WindowUDF, ForeignWindowUDF}; +use pyo3::types::{PyCapsule, PyList, PyTuple}; #[derive(Debug)] struct RustPartitionEvaluator { @@ -245,6 +246,26 @@ impl PyWindowUDF { Ok(self.function.call(args).into()) } + #[staticmethod] + pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { + if func.hasattr("__datafusion_window_udf__")? { + let capsule = func.getattr("__datafusion_window_udf__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_window_udf")?; + + let udwf = unsafe { capsule.reference::() }; + let udwf: ForeignWindowUDF = udwf.try_into()?; + + Ok(Self { + function: udwf.into(), + }) + } else { + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_window_udf__ does not exist on WindowUDF object.".to_string(), + )) + } + } + fn __repr__(&self) -> PyResult { Ok(format!("WindowUDF({})", self.function.name())) } From 2e1b71369eefc97c22b82be84bbabb414f748fb9 Mon Sep 17 00:00:00 2001 From: kosiew Date: Fri, 4 Jul 2025 20:36:05 +0800 Subject: [PATCH 097/248] refactor: style loading logic in DataFrameHtmlFormatter (#1177) --- python/datafusion/dataframe_formatter.py | 117 ++++++++--------------- python/tests/test_dataframe.py | 60 +++++------- 2 files changed, 63 insertions(+), 114 deletions(-) diff --git a/python/datafusion/dataframe_formatter.py b/python/datafusion/dataframe_formatter.py index 27f00f9c3..2323224b8 100644 --- a/python/datafusion/dataframe_formatter.py +++ b/python/datafusion/dataframe_formatter.py @@ -135,9 +135,6 @@ class DataFrameHtmlFormatter: session """ - # Class variable to track if styles have been loaded in the notebook - _styles_loaded = False - def __init__( self, max_cell_length: int = 25, @@ -260,23 +257,6 @@ def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: """ self._custom_header_builder = builder - @classmethod - def is_styles_loaded(cls) -> bool: - """Check if HTML styles have been loaded in the current session. - - This method is primarily intended for debugging UI rendering issues - related to style loading. - - Returns: - True if styles have been loaded, False otherwise - - Example: - >>> from datafusion.dataframe_formatter import DataFrameHtmlFormatter - >>> DataFrameHtmlFormatter.is_styles_loaded() - False - """ - return cls._styles_loaded - def format_html( self, batches: list, @@ -315,18 +295,7 @@ def format_html( # Build HTML components html = [] - # Only include styles and scripts if: - # 1. Not using shared styles, OR - # 2. Using shared styles but they haven't been loaded yet - include_styles = ( - not self.use_shared_styles or not DataFrameHtmlFormatter._styles_loaded - ) - - if include_styles: - html.extend(self._build_html_header()) - # If we're using shared styles, mark them as loaded - if self.use_shared_styles: - DataFrameHtmlFormatter._styles_loaded = True + html.extend(self._build_html_header()) html.extend(self._build_table_container_start()) @@ -338,7 +307,7 @@ def format_html( html.append("") # Add footer (JavaScript and messages) - if include_styles and self.enable_cell_expansion: + if self.enable_cell_expansion: html.append(self._get_javascript()) # Always add truncation message if needed (independent of styles) @@ -375,14 +344,20 @@ def format_str( def _build_html_header(self) -> list[str]: """Build the HTML header with CSS styles.""" - html = [] - html.append("") + html.append(f"") return html def _build_table_container_start(self) -> list[str]: @@ -570,28 +545,31 @@ def _get_default_css(self) -> str: def _get_javascript(self) -> str: """Get JavaScript code for interactive elements.""" return """ - - """ + +""" class FormatterManager: @@ -712,24 +690,9 @@ def reset_formatter() -> None: >>> reset_formatter() # Reset formatter to default settings """ formatter = DataFrameHtmlFormatter() - # Reset the styles_loaded flag to ensure styles will be reloaded - DataFrameHtmlFormatter._styles_loaded = False set_formatter(formatter) -def reset_styles_loaded_state() -> None: - """Reset the styles loaded state to force reloading of styles. - - This can be useful when switching between notebook sessions or - when styles need to be refreshed. - - Example: - >>> from datafusion.html_formatter import reset_styles_loaded_state - >>> reset_styles_loaded_state() # Force styles to reload in next render - """ - DataFrameHtmlFormatter._styles_loaded = False - - def _refresh_formatter_reference() -> None: """Refresh formatter reference in any modules using it. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index c9ae38d8e..a3870ead8 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -42,7 +42,6 @@ configure_formatter, get_formatter, reset_formatter, - reset_styles_loaded_state, ) from datafusion.expr import Window from pyarrow.csv import write_csv @@ -2177,27 +2176,15 @@ def test_html_formatter_shared_styles(df, clean_formatter_state): # First, ensure we're using shared styles configure_formatter(use_shared_styles=True) - # Get HTML output for first table - should include styles html_first = df._repr_html_() - - # Verify styles are included in first render - assert "
" - f"{field.name}
" - f"
" - "" - "" - f"{formatted_value}" - f"" - f"
" - f"
{formatted_value}