From 31fee392d41f723179678ee7fdac2719ef1d40d1 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 19 Jan 2025 07:51:14 -0500 Subject: [PATCH 001/206] Feat/use uv python management (#994) * Remove requirements files and add dependencies into pyproject.toml instead * Remove old conda files since we will use uv as our primary method for developers to set up environments * Working through CI changes to use uv instead of pip and conda * Add uv lock to exclude files * Revert "Remove old conda files since we will use uv as our primary method for developers to set up environments" This reverts commit 88aff7e39334cde8101ef6d313c6bb2bd1f3981c. * Windows workflows don't use source command * Add in extra include for ignoreing rat * Use uv commands in CI * Remove conda recipes and CI stages * Working on CI using uv * Install doc requirements * Remove caching uv * Set uv venv * Add requirements for building * Revert github action to allowed one * Call uv sync with verbose mode so users can see the build occuring in CI * Test setting specific hash on action * Test setting rust-toolchain github action with pinned version * Testing night rust toolchain against apache rejection criteria * Github action is fickle with the pattern matching * Switch all Ci to use nightly rust toolchain until infra team whitelists the stable toolchain * Speed up CI by preventing build during uv sync * Additional uv commands missing no-project option * Setting python versions of dependencies to match lowest supported python version, 3.8 * Update maturin and move to deps for dev * CI ordering was wrong and maturin needed uv option * Switch to stable toolchain * uv requires two dashes * Submodule init * change directories for unit tests * Add deps for build * Maturin build doesn't take uv as parameter * Update documentation for setting up with uv * Enable cache in CI * Update documentation to use uv * Small adjustment to CI config --- .github/workflows/build.yml | 62 +- .github/workflows/conda.yml | 107 - .github/workflows/docs.yaml | 23 +- .github/workflows/test.yaml | 59 +- .pre-commit-config.yaml | 4 +- README.md | 68 +- conda/environments/datafusion-cuda-dev.yaml | 44 - conda/environments/datafusion-dev.yaml | 41 - conda/recipes/bld.bat | 26 - conda/recipes/build.sh | 84 - conda/recipes/meta.yaml | 75 - dev/python_lint.sh | 2 +- dev/release/README.md | 27 +- dev/release/rat_exclude_files.txt | 4 +- dev/release/verify-release-candidate.sh | 6 +- docs/README.md | 32 +- docs/build.sh | 6 + docs/mdbook/src/installation.md | 53 +- docs/requirements.txt | 26 - .../source/contributor-guide/introduction.rst | 42 +- pyproject.toml | 25 +- requirements-310.txt | 195 -- requirements-311.txt | 175 -- requirements-312.txt | 184 -- requirements.in | 26 - uv.lock | 1842 +++++++++++++++++ 26 files changed, 2031 insertions(+), 1207 deletions(-) delete mode 100644 .github/workflows/conda.yml delete mode 100644 conda/environments/datafusion-cuda-dev.yaml delete mode 100644 conda/environments/datafusion-dev.yaml delete mode 100644 conda/recipes/bld.bat delete mode 100644 conda/recipes/build.sh delete mode 100644 conda/recipes/meta.yaml delete mode 100644 docs/requirements.txt delete mode 100644 requirements-310.txt delete mode 100644 requirements-311.txt delete mode 100644 requirements-312.txt delete mode 100644 requirements.in create mode 100644 uv.lock diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 084a96192..acabad3ca 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,28 +31,33 @@ jobs: - name: Install Python uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" + + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + # Use the --no-install-package to only install the dependencies + # but do not yet build the rust library - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install ruff + run: uv sync --dev --no-install-package datafusion + # Update output format to enable automatic inline annotations. - name: Run Ruff run: | - ruff check --output-format=github python/ - ruff format --check python/ + uv run --no-project ruff check --output-format=github python/ + uv run --no-project ruff format --check python/ generate-license: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: astral-sh/setup-uv@v5 with: - profile: minimal - toolchain: stable - override: true + enable-cache: true + - name: Generate license file - run: python ./dev/create_license.py + run: uv run --no-project python ./dev/create_license.py - uses: actions/upload-artifact@v4 with: name: python-wheel-license @@ -74,15 +79,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Upgrade pip - run: python -m pip install --upgrade pip - - - name: Install maturin - run: pip install maturin==1.5.1 + - uses: dtolnay/rust-toolchain@stable - run: rm LICENSE.txt - name: Download LICENSE.txt @@ -97,8 +94,14 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + - name: Build Python package - run: maturin build --release --strip --features substrait + run: | + uv sync --dev --no-install-package datafusion + uv run --no-project maturin build --release --strip --features substrait - name: List Windows wheels if: matrix.os == 'windows-latest' @@ -132,15 +135,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Upgrade pip - run: python -m pip install --upgrade pip - - - name: Install maturin - run: pip install maturin==1.5.1 + - uses: dtolnay/rust-toolchain@stable - run: rm LICENSE.txt - name: Download LICENSE.txt @@ -155,9 +150,14 @@ jobs: version: "27.4" repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + - name: Build Python package run: | - maturin build --release --strip --features substrait + uv sync --dev --no-install-package datafusion + uv run --no-project maturin build --release --strip --features substrait - name: List Mac wheels run: find target/wheels/ diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml deleted file mode 100644 index c2b8fab02..000000000 --- a/.github/workflows/conda.yml +++ /dev/null @@ -1,107 +0,0 @@ -name: Build conda nightly -on: - push: - branches: - - main - pull_request: - paths: - - Cargo.toml - - Cargo.lock - - pyproject.toml - - conda/recipes/** - - .github/workflows/conda.yml - schedule: - - cron: '0 0 * * 0' - -# When this workflow is queued, automatically cancel any previous running -# or pending jobs from the same branch -concurrency: - group: conda-${{ github.head_ref }} - cancel-in-progress: true - -# Required shell entrypoint to have properly activated conda environments -defaults: - run: - shell: bash -l {0} - -jobs: - conda: - name: "Build conda nightlies (python: ${{ matrix.python }}, arch: ${{ matrix.arch }})" - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python: ["3.8", "3.9", "3.10", "3.11"] - arch: ["linux-64", "linux-aarch64"] - steps: - - name: Manage disk space - if: matrix.arch == 'linux-aarch64' - run: | - sudo mkdir -p /opt/empty_dir || true - for d in \ - /opt/ghc \ - /opt/hostedtoolcache \ - /usr/lib/jvm \ - /usr/local/.ghcup \ - /usr/local/lib/android \ - /usr/local/share/powershell \ - /usr/share/dotnet \ - /usr/share/swift \ - ; do - sudo rsync --stats -a --delete /opt/empty_dir/ $d || true - done - sudo apt-get purge -y -f firefox \ - google-chrome-stable \ - microsoft-edge-stable - sudo apt-get autoremove -y >& /dev/null - sudo apt-get autoclean -y >& /dev/null - sudo docker image prune --all --force - df -h - - name: Create swapfile - if: matrix.arch == 'linux-aarch64' - run: | - sudo fallocate -l 10GiB /swapfile || true - sudo chmod 600 /swapfile || true - sudo mkswap /swapfile || true - sudo swapon /swapfile || true - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Set up Python - uses: conda-incubator/setup-miniconda@v3.0.4 - with: - miniforge-variant: Miniforge3 - python-version: "3.8" - channel-priority: strict - - name: Install dependencies - run: | - conda install -c conda-forge conda-build conda-verify - - which python - pip list - conda list - # Clean the conda cache - - name: Clean Conda Cache - run: conda clean --all --yes - - name: Build conda packages - run: | - # suffix for nightly package versions - export VERSION_SUFFIX=a`date +%y%m%d` - - conda build conda/recipes \ - --python ${{ matrix.python }} \ - --variants "{target_platform: [${{ matrix.arch }}]}" \ - --error-overlinking \ - --no-test \ - --no-anaconda-upload \ - --output-folder packages - - name: Test conda packages - if: matrix.arch == 'linux-64' # can only test native platform packages - run: | - conda build --test packages/${{ matrix.arch }}/*.tar.bz2 - - name: Upload conda packages as artifacts - uses: actions/upload-artifact@v4 - with: - name: "conda nightlies (python - ${{ matrix.python }}, arch - ${{ matrix.arch }})" - # need to install all conda channel metadata to properly install locally - path: packages/ diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 86288e2d8..9037e0a5c 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -57,27 +57,24 @@ jobs: version: '27.4' repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install dependencies - run: | - set -x - python3 -m venv venv - source venv/bin/activate - pip install -r requirements-311.txt - pip install -r docs/requirements.txt - - name: Build Datafusion + - name: Install dependencies and build + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Build repo run: | - set -x - source venv/bin/activate - maturin develop + uv venv + uv sync --dev --no-install-package datafusion --group docs + uv run --no-project maturin develop --uv - name: Build docs run: | set -x - source venv/bin/activate cd docs curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet - make html + uv run --no-project make html - name: Copy & push the generated HTML if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag') diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 21faedecd..c93d4c06f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -43,11 +43,10 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@stable id: rust-toolchain with: - toolchain: ${{ matrix.toolchain }} - override: true + components: clippy,rustfmt - name: Install Protoc uses: arduino/setup-protoc@v3 @@ -64,60 +63,35 @@ jobs: uses: actions/cache@v4 with: path: ~/.cargo - key: cargo-cache-${{ steps.rust-toolchain.outputs.rustc_hash }}-${{ hashFiles('Cargo.lock') }} + key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }} - name: Check Formatting - uses: actions-rs/cargo@v1 if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - with: - command: fmt - args: -- --check + run: cargo fmt -- --check - name: Run Clippy - uses: actions-rs/cargo@v1 if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - with: - command: clippy - args: --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure - - - name: Create Virtualenv (3.12) - if: ${{ matrix.python-version == '3.12' }} - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements-312.txt + run: cargo clippy --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure - - name: Create Virtualenv (3.10) - if: ${{ matrix.python-version == '3.10' }} - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements-310.txt - - - name: Create Virtualenv (3.11) - if: ${{ matrix.python-version == '3.11' }} - run: | - python -m venv venv - source venv/bin/activate - pip install -r requirements-311.txt + - name: Install dependencies and build + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true - name: Run tests env: RUST_BACKTRACE: 1 run: | git submodule update --init - source venv/bin/activate - pip install -e . -vv - pytest -v . + uv sync --dev --no-install-package datafusion + uv run --no-project maturin develop --uv + uv run --no-project pytest -v . - name: FFI unit tests run: | - source venv/bin/activate - pip install -e . -vv - pip install maturin==1.5.1 cd examples/ffi-table-provider - maturin develop --release --strip - pytest python/tests/_test_table_provider.py + uv run --no-project maturin develop --uv + uv run --no-project pytest python/tests/_test_table_provider.py - name: Cache the generated dataset id: cache-tpch-dataset @@ -134,7 +108,6 @@ jobs: - name: Run TPC-H examples run: | - source venv/bin/activate cd examples/tpch - python convert_data_to_parquet.py - pytest _tests.py + uv run --no-project python convert_data_to_parquet.py + uv run --no-project pytest _tests.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8509fae2c..e20fedf5c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,9 +17,9 @@ repos: - repo: https://github.com/rhysd/actionlint - rev: v1.6.23 + rev: v1.7.6 hooks: - - id: actionlint-docker + - id: actionlint-docker - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.3.0 diff --git a/README.md b/README.md index ca612c1ab..5aaf7f5f3 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,13 @@ See [examples](examples/README.md) for more information. - [Serialize query plans using Substrait](https://github.com/apache/datafusion-python/blob/main/examples/substrait.py) -## How to install (from pip) +## How to install + +### uv + +```bash +uv add datafusion +``` ### Pip @@ -164,61 +170,69 @@ You can verify the installation by running: ## How to develop -This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). +This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). The Maturin tools used in this workflow can be installed either via `uv` or `pip`. Both approaches should offer the same experience. It is recommended to use `uv` since it has significant performance improvements +over `pip`. -The Maturin tools used in this workflow can be installed either via Conda or Pip. Both approaches should offer the same experience. Multiple approaches are only offered to appease developer preference. Bootstrapping for both Conda and Pip are as follows. +Bootstrap (`uv`): -Bootstrap (Conda): +By default `uv` will attempt to build the datafusion python package. For our development we prefer to build manually. This means +that when creating your virtual environment using `uv sync` you need to pass in the additional `--no-install-package datafusion` +and for `uv run` commands the additional parameter `--no-project` ```bash # fetch this repo git clone git@github.com:apache/datafusion-python.git -# create the conda environment for dev -conda env create -f ./conda/environments/datafusion-dev.yaml -n datafusion-dev -# activate the conda environment -conda activate datafusion-dev +# create the virtual enviornment +uv sync --dev --no-install-package datafusion +# activate the environment +source .venv/bin/activate ``` -Or alternatively, if you are on an OS that supports CUDA Toolkit, you can use `-f ./conda/environments/datafusion-cuda-dev.yaml`. - -Bootstrap (Pip): +Bootstrap (`pip`): ```bash # fetch this repo git clone git@github.com:apache/datafusion-python.git # prepare development environment (used to build wheel / install in development) -python3 -m venv venv +python3 -m venv .venv # activate the venv -source venv/bin/activate +source .venv/bin/activate # update pip itself if necessary python -m pip install -U pip -# install dependencies (for Python 3.8+) -python -m pip install -r requirements.in +# install dependencies +python -m pip install -r pyproject.toml ``` The tests rely on test data in git submodules. ```bash -git submodule init -git submodule update +git submodule update --init ``` Whenever rust code changes (your changes or via `git pull`): ```bash # make sure you activate the venv using "source venv/bin/activate" first -maturin develop +maturin develop --uv python -m pytest ``` +Alternatively if you are using `uv` you can do the following without +needing to activate the virtual environment: + +```bash +uv run --no-project maturin develop --uv +uv --no-project pytest . +``` + ### Running & Installing pre-commit hooks -arrow-datafusion-python takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce +`datafusion-python` takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keeping PRs clean and concise. Our pre-commit hooks can be installed by running `pre-commit install`, which will install the configurations in -your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete +your DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete the commit if an offending lint is found allowing you to make changes locally before pushing. The pre-commit hooks can also be run adhoc without installing them by simply running `pre-commit run --all-files` @@ -236,18 +250,8 @@ There are scripts in `ci/scripts` for running Rust and Python linters. ## How to update dependencies -To change test dependencies, change the `requirements.in` and run +To change test dependencies, change the `pyproject.toml` and run ```bash -# install pip-tools (this can be done only once), also consider running in venv -python -m pip install pip-tools -python -m piptools compile --generate-hashes -o requirements-310.txt +uv sync --dev --no-install-package datafusion ``` - -To update dependencies, run with `-U` - -```bash -python -m piptools compile -U --generate-hashes -o requirements-310.txt -``` - -More details [here](https://github.com/jazzband/pip-tools) diff --git a/conda/environments/datafusion-cuda-dev.yaml b/conda/environments/datafusion-cuda-dev.yaml deleted file mode 100644 index 1f6f23942..000000000 --- a/conda/environments/datafusion-cuda-dev.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -channels: - - conda-forge -dependencies: - - black - - flake8 - - isort - - maturin>=1.5.1 - - mypy - - numpy - - pyarrow>=11.0.0 - - pytest - - toml - - importlib_metadata - - python>=3.10 - # Packages useful for building distributions and releasing - - mamba - - conda-build - - anaconda-client - # Packages for documentation building - - sphinx - - pydata-sphinx-theme==0.8.0 - - myst-parser - - jinja2 - # GPU packages - - cudf - - cudatoolkit=11.8 -name: datafusion-dev diff --git a/conda/environments/datafusion-dev.yaml b/conda/environments/datafusion-dev.yaml deleted file mode 100644 index b4b503dc6..000000000 --- a/conda/environments/datafusion-dev.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -channels: - - conda-forge -dependencies: - - black - - flake8 - - isort - - maturin>=1.5.1 - - mypy - - numpy - - pyarrow>=11.0.0 - - pytest - - toml - - importlib_metadata - - python>=3.10 - # Packages useful for building distributions and releasing - - mamba - - conda-build - - anaconda-client - # Packages for documentation building - - sphinx - - pydata-sphinx-theme==0.8.0 - - myst-parser - - jinja2 -name: datafusion-dev diff --git a/conda/recipes/bld.bat b/conda/recipes/bld.bat deleted file mode 100644 index 90626a637..000000000 --- a/conda/recipes/bld.bat +++ /dev/null @@ -1,26 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -maturin build -vv -j %CPU_COUNT% --release --strip --features substrait --manylinux off --interpreter=%PYTHON% - -FOR /F "delims=" %%i IN ('dir /s /b target\wheels\*.whl') DO set datafusion_wheel=%%i - -%PYTHON% -m pip install --no-deps %datafusion_wheel% -vv - -cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/build.sh b/conda/recipes/build.sh deleted file mode 100644 index 259894313..000000000 --- a/conda/recipes/build.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -set -ex - -# See https://github.com/conda-forge/rust-feedstock/blob/master/recipe/build.sh for cc env explanation -if [ "$c_compiler" = gcc ] ; then - case "$target_platform" in - linux-64) rust_env_arch=X86_64_UNKNOWN_LINUX_GNU ;; - linux-aarch64) rust_env_arch=AARCH64_UNKNOWN_LINUX_GNU ;; - linux-ppc64le) rust_env_arch=POWERPC64LE_UNKNOWN_LINUX_GNU ;; - *) echo "unknown target_platform $target_platform" ; exit 1 ;; - esac - - export CARGO_TARGET_${rust_env_arch}_LINKER=$CC -fi - -declare -a _xtra_maturin_args - -mkdir -p $SRC_DIR/.cargo - -if [ "$target_platform" = "osx-64" ] ; then - cat <> $SRC_DIR/.cargo/config -[target.x86_64-apple-darwin] -linker = "$CC" -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - -EOF - - _xtra_maturin_args+=(--target=x86_64-apple-darwin) - -elif [ "$target_platform" = "osx-arm64" ] ; then - cat <> $SRC_DIR/.cargo/config -# Required for intermediate codegen stuff -[target.x86_64-apple-darwin] -linker = "$CC_FOR_BUILD" - -# Required for final binary artifacts for target -[target.aarch64-apple-darwin] -linker = "$CC" -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - -EOF - _xtra_maturin_args+=(--target=aarch64-apple-darwin) - - # This variable must be set to the directory containing the target's libpython DSO - export PYO3_CROSS_LIB_DIR=$PREFIX/lib - - # xref: https://github.com/PyO3/pyo3/commit/7beb2720 - export PYO3_PYTHON_VERSION=${PY_VER} - - # xref: https://github.com/conda-forge/python-feedstock/issues/621 - sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/os-patch.py - sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/platform-patch.py -fi - -maturin build -vv -j "${CPU_COUNT}" --release --strip --features substrait --manylinux off --interpreter="${PYTHON}" "${_xtra_maturin_args[@]}" - -"${PYTHON}" -m pip install $SRC_DIR/target/wheels/datafusion*.whl --no-deps -vv - -cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/meta.yaml b/conda/recipes/meta.yaml deleted file mode 100644 index b0784253a..000000000 --- a/conda/recipes/meta.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -{% set name = "datafusion" %} -{% set major_minor_patch = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').split('.') %} -{% set new_patch = major_minor_patch[2] | int + 1 %} -{% set version = (major_minor_patch[:2] + [new_patch]) | join('.') + environ.get('VERSION_SUFFIX', '') %} - - -package: - name: {{ name|lower }} - version: {{ version }} - -source: - git_url: ../.. - -build: - number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ python | replace(".", "") }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} - -requirements: - build: - - python # [build_platform != target_platform] - - cross-python_{{ target_platform }} # [build_platform != target_platform] - - zlib # [build_platform != target_platform] - - {{ compiler('c') }} - - {{ compiler('rust') }} - - cargo-bundle-licenses - - maturin >=1.5.1,<1.6.0 - - libprotobuf =3 - host: - - python - - maturin >=1.5.1,<1.6.0 - - pip - - zlib - - xz # [linux64] - run: - - python - - pyarrow >=11.0.0 - - typing_extensions - -test: - imports: - - datafusion - commands: - - pip check - requires: - - pip - -about: - home: https://arrow.apache.org/datafusion - license: Apache-2.0 - license_family: APACHE - license_file: - - LICENSE.txt - - THIRDPARTY.yml - description: | - DataFusion is an extensible query execution framework, written in Rust, - that uses Apache Arrow as its in-memory format. - doc_url: https://arrow.apache.org/datafusion - dev_url: https://github.com/apache/arrow-datafusion diff --git a/dev/python_lint.sh b/dev/python_lint.sh index 29f0d4833..2d867f29d 100755 --- a/dev/python_lint.sh +++ b/dev/python_lint.sh @@ -21,6 +21,6 @@ # DataFusion CI does set -e -source venv/bin/activate +source .venv/bin/activate flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503 black --line-length 79 . diff --git a/dev/release/README.md b/dev/release/README.md index b2c015e1d..f0b333999 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -172,8 +172,8 @@ git checkout 40.0.0-rc1 git submodule update --init --recursive # create the env -python3 -m venv venv -source venv/bin/activate +python3 -m venv .venv +source .venv/bin/activate # install release candidate pip install --extra-index-url https://test.pypi.org/simple/ datafusion==40.0.0 @@ -218,28 +218,9 @@ uploading them using `twine`: twine upload --repository pypi dist-release/* ``` -### Publish Python Artifacts to Anaconda +### Publish Python Artifacts to conda-forge -Publishing artifacts to Anaconda is similar to PyPi. First, Download the source tarball created in the previous step and untar it. - -```bash -# Assuming you have an existing conda environment named `datafusion-dev` if not see root README for instructions -conda activate datafusion-dev -conda build . -``` - -This will setup a virtual conda environment and build the artifacts inside of that virtual env. This step can take a few minutes as the entire build, host, and runtime environments are setup. Once complete a local filesystem path will be emitted for the location of the resulting package. Observe that path and copy to your clipboard. - -Ex: `/home/conda/envs/datafusion/conda-bld/linux-64/datafusion-0.7.0.tar.bz2` - -Now you are ready to publish this resulting package to anaconda.org. This can be accomplished in a few simple steps. - -```bash -# First login to Anaconda with the datafusion credentials -anaconda login -# Upload the package -anaconda upload /home/conda/envs/datafusion/conda-bld/linux-64/datafusion-0.7.0.tar.bz2 -``` +Pypi packages auto upload to conda-forge via [datafusion feedstock](https://github.com/conda-forge/datafusion-feedstock) ### Push the Release Tag diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index f65ddd06e..dcd5d9aac 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -45,4 +45,6 @@ Cargo.lock .github/* benchmarks/tpch/queries/q*.sql benchmarks/tpch/create_tables.sql -.cargo/config.toml \ No newline at end of file +.cargo/config.toml +**/.cargo/config.toml +uv.lock \ No newline at end of file diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 3879a267f..1a9104b55 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -106,7 +106,7 @@ setup_tempdir() { } test_source_distribution() { - # install rust toolchain in a similar fashion like test-miniconda + # install rust toolchain export RUSTUP_HOME=$PWD/test-rustup export CARGO_HOME=$PWD/test-rustup @@ -125,8 +125,8 @@ test_source_distribution() { git clone https://github.com/apache/arrow-testing.git testing git clone https://github.com/apache/parquet-testing.git parquet-testing - python3 -m venv venv - source venv/bin/activate + python3 -m venv .venv + source .venv/bin/activate python3 -m pip install -U pip python3 -m pip install -r requirements-310.txt maturin develop diff --git a/docs/README.md b/docs/README.md index b4b94120e..2bffea9bd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -26,42 +26,32 @@ when changes are merged to the main branch. ## Dependencies It's recommended to install build dependencies and build the documentation -inside a Python `venv`. +inside a Python `venv` using `uv`. To prepare building the documentation run the following on the root level of the project: -1. Set up virtual environment if it was not already created - ```bash - python3 -m venv venv - ``` -1. Activate virtual environment - ```bash - source venv/bin/activate - ``` -1. Install Datafusion's Python dependencies - ```bash - pip install -r requirements-310.txt - ``` -1. Install documentation dependencies - ```bash - pip install -r docs/requirements.txt - ``` +```bash +# Set up a virtual environment with the documentation dependencies +uv sync --dev --group docs --no-install-package datafusion +``` ## Build & Preview Run the provided script to build the HTML pages. ```bash -cd docs -./build.sh +# Build the repository +uv run --no-project maturin develop --uv +# Build the documentation +uv run --no-project docs/build.sh ``` -The HTML will be generated into a `build` directory. +The HTML will be generated into a `build` directory in `docs`. Preview the site on Linux by running this command. ```bash -firefox build/html/index.html +firefox docs/build/html/index.html ``` ## Release Process diff --git a/docs/build.sh b/docs/build.sh index 31398d195..f73330323 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -20,6 +20,10 @@ set -e +original_dir=$(pwd) +script_dir=$(dirname "$(realpath "$0")") +cd "$script_dir" || exit + if [ ! -f pokemon.csv ]; then curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv fi @@ -33,3 +37,5 @@ rm -rf temp 2> /dev/null mkdir temp cp -rf source/* temp/ make SOURCEDIR=`pwd`/temp html + +cd "$original_dir" || exit diff --git a/docs/mdbook/src/installation.md b/docs/mdbook/src/installation.md index ba00c8b80..b29f3b66b 100644 --- a/docs/mdbook/src/installation.md +++ b/docs/mdbook/src/installation.md @@ -18,44 +18,45 @@ DataFusion is easy to install, just like any other Python library. -## Using pip +## Using uv -``` bash -pip install datafusion -``` +If you do not yet have a virtual environment, create one: -## Conda & JupyterLab setup +```bash +uv venv +``` -This section explains how to install DataFusion in a conda environment with other libraries that allow for a nice Jupyter workflow. This setup is completely optional. These steps are only needed if you'd like to run DataFusion in a Jupyter notebook and have an interface like this: +You can add datafusion to your virtual environment with the usual: -![DataFusion in Jupyter](https://github.com/MrPowers/datafusion-book/raw/main/src/images/datafusion-jupyterlab.png) +```bash +uv pip install datafusion +``` -Create a conda environment with DataFusion, Jupyter, and other useful dependencies in the `datafusion-env.yml` file: +Or, to add to a project: +```bash +uv add datafusion ``` -name: datafusion-env -channels: - - conda-forge - - defaults -dependencies: - - python=3.9 - - ipykernel - - nb_conda - - jupyterlab - - jupyterlab_code_formatter - - isort - - black - - pip - - pip: - - datafusion +## Using pip + +``` bash +pip install datafusion ``` -Create the environment with `conda env create -f datafusion-env.yml`. +## uv & JupyterLab setup -Activate the environment with `conda activate datafusion-env`. +This section explains how to install DataFusion in a uv environment with other libraries that allow for a nice Jupyter workflow. This setup is completely optional. These steps are only needed if you'd like to run DataFusion in a Jupyter notebook and have an interface like this: -Run `jupyter lab` or open the [JupyterLab Desktop application](https://github.com/jupyterlab/jupyterlab-desktop) to start running DataFusion in a Jupyter notebook. +![DataFusion in Jupyter](https://github.com/MrPowers/datafusion-book/raw/main/src/images/datafusion-jupyterlab.png) + +Create a virtual environment with DataFusion, Jupyter, and other useful dependencies and start the desktop application. + +```bash +uv venv +uv pip install datafusion jupyterlab jupyterlab_code_formatter +uv run jupyter lab +``` ## Examples diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index f5cece78e..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -sphinx -pydata-sphinx-theme==0.8.0 -myst-parser -maturin -jinja2 -ipython -pandas -pickleshare -sphinx-autoapi diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index 4457a898f..fb98cfd1d 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -29,22 +29,24 @@ Doing so is a great way to help the community as well as get more familiar with How to develop -------------- -This assumes that you have rust and cargo installed. We use the workflow recommended by `pyo3 `_ and `maturin `_. +This assumes that you have rust and cargo installed. We use the workflow recommended by +`pyo3 `_ and `maturin `_. We recommend using +`uv `_ for python package management. + +By default `uv` will attempt to build the datafusion python package. For our development we prefer to build manually. This means +that when creating your virtual environment using `uv sync` you need to pass in the additional `--no-install-package datafusion` +and for `uv run` commands the additional parameter `--no-project` Bootstrap: .. code-block:: shell # fetch this repo - git clone git@github.com:apache/arrow-datafusion-python.git - # prepare development environment (used to build wheel / install in development) - python3 -m venv venv - # activate the venv - source venv/bin/activate - # update pip itself if necessary - python -m pip install -U pip - # install dependencies (for Python 3.8+) - python -m pip install -r requirements-310.txt + git clone git@github.com:apache/datafusion-python.git + # create the virtual enviornment + uv sync --dev --no-install-package datafusion + # activate the environment + source .venv/bin/activate The tests rely on test data in git submodules. @@ -58,8 +60,8 @@ Whenever rust code changes (your changes or via `git pull`): .. code-block:: shell - # make sure you activate the venv using "source venv/bin/activate" first - maturin develop + # make sure you activate the venv using "source .venv/bin/activate" first + maturin develop -uv python -m pytest Running & Installing pre-commit hooks @@ -86,20 +88,10 @@ Mostly, the ``python`` code is limited to pure wrappers with type hints and good Update Dependencies ------------------- -To change test dependencies, change the `requirements.in` and run - -.. code-block:: shell - - # install pip-tools (this can be done only once), also consider running in venv - python -m pip install pip-tools - python -m piptools compile --generate-hashes -o requirements-310.txt +To change test dependencies, change the ``pyproject.toml`` and run - -To update dependencies, run with `-U` +To update dependencies, run .. code-block:: shell - python -m piptools compile -U --generate-hashes -o requirements-310.txt - - -More details about pip-tools `here `_ + uv sync --dev --no-install-package datafusion diff --git a/pyproject.toml b/pyproject.toml index 98bda5aae..6e8acfe71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["maturin>=1.5.1,<1.6.0"] +requires = ["maturin>=1.8.1"] build-backend = "maturin" [project] @@ -24,7 +24,7 @@ name = "datafusion" description = "Build and run queries against data" readme = "README.md" license = { file = "LICENSE.txt" } -requires-python = ">=3.7" +requires-python = ">=3.8" keywords = ["datafusion", "dataframe", "rust", "query-engine"] classifiers = [ "Development Status :: 2 - Pre-Alpha", @@ -35,7 +35,6 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -82,3 +81,23 @@ max-doc-length = 88 "dev/*" = ["D"] "benchmarks/*" = ["D", "F"] "docs/*" = ["D"] + +[dependency-groups] +dev = [ + "maturin>=1.8.1", + "numpy>1.24.4 ; python_full_version >= '3.10'", + "pytest>=7.4.4", + "ruff>=0.9.1", + "toml>=0.10.2", +] +docs = [ + "sphinx>=7.1.2", + "pydata-sphinx-theme==0.8.0", + "myst-parser>=3.0.1", + "jinja2>=3.1.5", + "ipython>=8.12.3", + "pandas>=2.0.3", + "pickleshare>=0.7.5", + "sphinx-autoapi>=3.4.0", + "setuptools>=75.3.0", +] \ No newline at end of file diff --git a/requirements-310.txt b/requirements-310.txt deleted file mode 100644 index d7d25f3f1..000000000 --- a/requirements-310.txt +++ /dev/null @@ -1,195 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --generate-hashes --output-file=requirements-310.txt -# -exceptiongroup==1.2.1 \ - --hash=sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad \ - --hash=sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16 - # via pytest -iniconfig==2.0.0 \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - # via pytest -maturin==1.6.0 \ - --hash=sha256:16ef860df20028618b5a064da06b02c1c47acba064a4d25aaf84662a459ec599 \ - --hash=sha256:337899784955934dd67b30497d1dd5fab22da89f60bb079dbaf2eaa446b97a10 \ - --hash=sha256:4e931c92037128ade49cd26dd040d9c46ad8092d8170cc44f5c3a0b4a052d576 \ - --hash=sha256:50133965e52d8b5b969381fee3fde111ae2383905cdaba7650f256e08ccddcd4 \ - --hash=sha256:a2a2436628c36d98dabd79b52256df7e12fc4fd1b122984d9373fdf918fd4609 \ - --hash=sha256:aa4eb7dca7d246b466392f21016f67ff09a9aff2305fa714ca25a2344e4639e7 \ - --hash=sha256:b955025c24c8babc808db49e0ff90db8b4b1320dcc16b14eb26132841737230d \ - --hash=sha256:bd85edcb1b8e2bcddc1b7d16ce58ce00a66aa80c422745c8ad9e132ac40d4b48 \ - --hash=sha256:c87d1a7596c42b589099adb831343a56e02373588366e4cede96cbdf8bd68f9d \ - --hash=sha256:d67ca8dc7f3b2314bd3bf83c4de52645e220ee312fd526e53acc6a735f233fad \ - --hash=sha256:d8620970bd0b6a0acb99dbd0b1c2ebb7a69909d25f6023bdff9635a39001aa51 \ - --hash=sha256:d92b045e90ed919a8a2520dda64e3f384e5e746ea51e1498cc6ac3e9e5c76054 \ - --hash=sha256:dbbbf25dc3c207b0a7bd4f3aea1df33d4f22b8508592796a6f36f4d8ed216db0 - # via -r requirements.in -mypy==1.10.0 \ - --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ - --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ - --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ - --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ - --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ - --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ - --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ - --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ - --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ - --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ - --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ - --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ - --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ - --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ - --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ - --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ - --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ - --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ - --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ - --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ - --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ - --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ - --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ - --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ - --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ - --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ - --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 - # via -r requirements.in -mypy-extensions==1.0.0 \ - --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ - --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via mypy -numpy==2.0.0 \ - --hash=sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f \ - --hash=sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238 \ - --hash=sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f \ - --hash=sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95 \ - --hash=sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a \ - --hash=sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a \ - --hash=sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2 \ - --hash=sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2 \ - --hash=sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f \ - --hash=sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609 \ - --hash=sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f \ - --hash=sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad \ - --hash=sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86 \ - --hash=sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65 \ - --hash=sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb \ - --hash=sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995 \ - --hash=sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a \ - --hash=sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85 \ - --hash=sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4 \ - --hash=sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275 \ - --hash=sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1 \ - --hash=sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196 \ - --hash=sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d \ - --hash=sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e \ - --hash=sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514 \ - --hash=sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f \ - --hash=sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6 \ - --hash=sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4 \ - --hash=sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44 \ - --hash=sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df \ - --hash=sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581 \ - --hash=sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787 \ - --hash=sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5 \ - --hash=sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc \ - --hash=sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871 \ - --hash=sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54 \ - --hash=sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2 \ - --hash=sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98 \ - --hash=sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9 \ - --hash=sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864 \ - --hash=sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de \ - --hash=sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289 \ - --hash=sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b \ - --hash=sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c \ - --hash=sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9 - # via - # -r requirements.in - # pyarrow -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via pytest -pluggy==1.5.0 \ - --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ - --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - # via pytest -pyarrow==16.1.0 \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 - # via -r requirements.in -pytest==8.2.2 \ - --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ - --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 - # via -r requirements.in -ruff==0.4.9 \ - --hash=sha256:06b60f91bfa5514bb689b500a25ba48e897d18fea14dce14b48a0c40d1635893 \ - --hash=sha256:0e8e7b95673f22e0efd3571fb5b0cf71a5eaaa3cc8a776584f3b2cc878e46bff \ - --hash=sha256:2d45ddc6d82e1190ea737341326ecbc9a61447ba331b0a8962869fcada758505 \ - --hash=sha256:4555056049d46d8a381f746680db1c46e67ac3b00d714606304077682832998e \ - --hash=sha256:5d5460f789ccf4efd43f265a58538a2c24dbce15dbf560676e430375f20a8198 \ - --hash=sha256:673bddb893f21ab47a8334c8e0ea7fd6598ecc8e698da75bcd12a7b9d0a3206e \ - --hash=sha256:732dd550bfa5d85af8c3c6cbc47ba5b67c6aed8a89e2f011b908fc88f87649db \ - --hash=sha256:784d3ec9bd6493c3b720a0b76f741e6c2d7d44f6b2be87f5eef1ae8cc1d54c84 \ - --hash=sha256:78de3fdb95c4af084087628132336772b1c5044f6e710739d440fc0bccf4d321 \ - --hash=sha256:8064590fd1a50dcf4909c268b0e7c2498253273309ad3d97e4a752bb9df4f521 \ - --hash=sha256:88bffe9c6a454bf8529f9ab9091c99490578a593cc9f9822b7fc065ee0712a06 \ - --hash=sha256:8c1aff58c31948cc66d0b22951aa19edb5af0a3af40c936340cd32a8b1ab7438 \ - --hash=sha256:98ec2775fd2d856dc405635e5ee4ff177920f2141b8e2d9eb5bd6efd50e80317 \ - --hash=sha256:b262ed08d036ebe162123170b35703aaf9daffecb698cd367a8d585157732991 \ - --hash=sha256:e0a22c4157e53d006530c902107c7f550b9233e9706313ab57b892d7197d8e52 \ - --hash=sha256:e91175fbe48f8a2174c9aad70438fe9cb0a5732c4159b2a10a3565fea2d94cde \ - --hash=sha256:f1cb0828ac9533ba0135d148d214e284711ede33640465e706772645483427e3 - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f - # via - # maturin - # mypy - # pytest -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via mypy diff --git a/requirements-311.txt b/requirements-311.txt deleted file mode 100644 index 35b91133c..000000000 --- a/requirements-311.txt +++ /dev/null @@ -1,175 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --generate-hashes --output-file=requirements-311.txt -# -iniconfig==2.0.0 \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - # via pytest -maturin==1.6.0 \ - --hash=sha256:16ef860df20028618b5a064da06b02c1c47acba064a4d25aaf84662a459ec599 \ - --hash=sha256:337899784955934dd67b30497d1dd5fab22da89f60bb079dbaf2eaa446b97a10 \ - --hash=sha256:4e931c92037128ade49cd26dd040d9c46ad8092d8170cc44f5c3a0b4a052d576 \ - --hash=sha256:50133965e52d8b5b969381fee3fde111ae2383905cdaba7650f256e08ccddcd4 \ - --hash=sha256:a2a2436628c36d98dabd79b52256df7e12fc4fd1b122984d9373fdf918fd4609 \ - --hash=sha256:aa4eb7dca7d246b466392f21016f67ff09a9aff2305fa714ca25a2344e4639e7 \ - --hash=sha256:b955025c24c8babc808db49e0ff90db8b4b1320dcc16b14eb26132841737230d \ - --hash=sha256:bd85edcb1b8e2bcddc1b7d16ce58ce00a66aa80c422745c8ad9e132ac40d4b48 \ - --hash=sha256:c87d1a7596c42b589099adb831343a56e02373588366e4cede96cbdf8bd68f9d \ - --hash=sha256:d67ca8dc7f3b2314bd3bf83c4de52645e220ee312fd526e53acc6a735f233fad \ - --hash=sha256:d8620970bd0b6a0acb99dbd0b1c2ebb7a69909d25f6023bdff9635a39001aa51 \ - --hash=sha256:d92b045e90ed919a8a2520dda64e3f384e5e746ea51e1498cc6ac3e9e5c76054 \ - --hash=sha256:dbbbf25dc3c207b0a7bd4f3aea1df33d4f22b8508592796a6f36f4d8ed216db0 - # via -r requirements.in -mypy==1.10.0 \ - --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ - --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ - --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ - --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ - --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ - --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ - --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ - --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ - --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ - --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ - --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ - --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ - --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ - --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ - --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ - --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ - --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ - --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ - --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ - --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ - --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ - --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ - --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ - --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ - --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ - --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ - --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 - # via -r requirements.in -mypy-extensions==1.0.0 \ - --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ - --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via mypy -numpy==1.26.4 \ - --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ - --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ - --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ - --hash=sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0 \ - --hash=sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010 \ - --hash=sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a \ - --hash=sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea \ - --hash=sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c \ - --hash=sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71 \ - --hash=sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110 \ - --hash=sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be \ - --hash=sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a \ - --hash=sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a \ - --hash=sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5 \ - --hash=sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed \ - --hash=sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd \ - --hash=sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c \ - --hash=sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e \ - --hash=sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0 \ - --hash=sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c \ - --hash=sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a \ - --hash=sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b \ - --hash=sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0 \ - --hash=sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6 \ - --hash=sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2 \ - --hash=sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a \ - --hash=sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30 \ - --hash=sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218 \ - --hash=sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5 \ - --hash=sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07 \ - --hash=sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2 \ - --hash=sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4 \ - --hash=sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764 \ - --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ - --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ - --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f - # via - # -r requirements.in - # pyarrow -packaging==24.0 \ - --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ - --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 - # via pytest -pluggy==1.5.0 \ - --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ - --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - # via pytest -pyarrow==16.1.0 \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 - # via -r requirements.in -pytest==8.2.2 \ - --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ - --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 - # via -r requirements.in -ruff==0.4.8 \ - --hash=sha256:14019a06dbe29b608f6b7cbcec300e3170a8d86efaddb7b23405cb7f7dcaf780 \ - --hash=sha256:16d717b1d57b2e2fd68bd0bf80fb43931b79d05a7131aa477d66fc40fbd86268 \ - --hash=sha256:284c2e3f3396fb05f5f803c9fffb53ebbe09a3ebe7dda2929ed8d73ded736deb \ - --hash=sha256:384154a1c3f4bf537bac69f33720957ee49ac8d484bfc91720cc94172026ceed \ - --hash=sha256:6d795d7639212c2dfd01991259460101c22aabf420d9b943f153ab9d9706e6a9 \ - --hash=sha256:6ea874950daca5697309d976c9afba830d3bf0ed66887481d6bca1673fc5b66a \ - --hash=sha256:704977a658131651a22b5ebeb28b717ef42ac6ee3b11e91dc87b633b5d83142b \ - --hash=sha256:72584676164e15a68a15778fd1b17c28a519e7a0622161eb2debdcdabdc71883 \ - --hash=sha256:7663a6d78f6adb0eab270fa9cf1ff2d28618ca3a652b60f2a234d92b9ec89066 \ - --hash=sha256:9678d5c9b43315f323af2233a04d747409d1e3aa6789620083a82d1066a35199 \ - --hash=sha256:a7354f921e3fbe04d2a62d46707e569f9315e1a613307f7311a935743c51a764 \ - --hash=sha256:aad360893e92486662ef3be0a339c5ca3c1b109e0134fcd37d534d4be9fb8de3 \ - --hash=sha256:d05f8d6f0c3cce5026cecd83b7a143dcad503045857bc49662f736437380ad45 \ - --hash=sha256:e14a3a095d07560a9d6769a72f781d73259655919d9b396c650fc98a8157555d \ - --hash=sha256:e9d5ce97cacc99878aa0d084c626a15cd21e6b3d53fd6f9112b7fc485918e1fa \ - --hash=sha256:eeceb78da8afb6de0ddada93112869852d04f1cd0f6b80fe464fd4e35c330913 \ - --hash=sha256:fc95aac2943ddf360376be9aa3107c8cf9640083940a8c5bd824be692d2216dc - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -typing-extensions==4.12.1 \ - --hash=sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a \ - --hash=sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1 - # via mypy diff --git a/requirements-312.txt b/requirements-312.txt deleted file mode 100644 index e4de5a5d2..000000000 --- a/requirements-312.txt +++ /dev/null @@ -1,184 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --generate-hashes --output-file=requirements-312.txt -# -iniconfig==2.0.0 \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - # via pytest -maturin==1.6.0 \ - --hash=sha256:16ef860df20028618b5a064da06b02c1c47acba064a4d25aaf84662a459ec599 \ - --hash=sha256:337899784955934dd67b30497d1dd5fab22da89f60bb079dbaf2eaa446b97a10 \ - --hash=sha256:4e931c92037128ade49cd26dd040d9c46ad8092d8170cc44f5c3a0b4a052d576 \ - --hash=sha256:50133965e52d8b5b969381fee3fde111ae2383905cdaba7650f256e08ccddcd4 \ - --hash=sha256:a2a2436628c36d98dabd79b52256df7e12fc4fd1b122984d9373fdf918fd4609 \ - --hash=sha256:aa4eb7dca7d246b466392f21016f67ff09a9aff2305fa714ca25a2344e4639e7 \ - --hash=sha256:b955025c24c8babc808db49e0ff90db8b4b1320dcc16b14eb26132841737230d \ - --hash=sha256:bd85edcb1b8e2bcddc1b7d16ce58ce00a66aa80c422745c8ad9e132ac40d4b48 \ - --hash=sha256:c87d1a7596c42b589099adb831343a56e02373588366e4cede96cbdf8bd68f9d \ - --hash=sha256:d67ca8dc7f3b2314bd3bf83c4de52645e220ee312fd526e53acc6a735f233fad \ - --hash=sha256:d8620970bd0b6a0acb99dbd0b1c2ebb7a69909d25f6023bdff9635a39001aa51 \ - --hash=sha256:d92b045e90ed919a8a2520dda64e3f384e5e746ea51e1498cc6ac3e9e5c76054 \ - --hash=sha256:dbbbf25dc3c207b0a7bd4f3aea1df33d4f22b8508592796a6f36f4d8ed216db0 - # via -r requirements.in -mypy==1.10.0 \ - --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ - --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ - --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ - --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ - --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ - --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ - --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ - --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ - --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ - --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ - --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ - --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ - --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ - --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ - --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ - --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ - --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ - --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ - --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ - --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ - --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ - --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ - --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ - --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ - --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ - --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ - --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 - # via -r requirements.in -mypy-extensions==1.0.0 \ - --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ - --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via mypy -numpy==2.0.0 \ - --hash=sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f \ - --hash=sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238 \ - --hash=sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f \ - --hash=sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95 \ - --hash=sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a \ - --hash=sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a \ - --hash=sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2 \ - --hash=sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2 \ - --hash=sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f \ - --hash=sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609 \ - --hash=sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f \ - --hash=sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad \ - --hash=sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86 \ - --hash=sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65 \ - --hash=sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb \ - --hash=sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995 \ - --hash=sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a \ - --hash=sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85 \ - --hash=sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4 \ - --hash=sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275 \ - --hash=sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1 \ - --hash=sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196 \ - --hash=sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d \ - --hash=sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e \ - --hash=sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514 \ - --hash=sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f \ - --hash=sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6 \ - --hash=sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4 \ - --hash=sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44 \ - --hash=sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df \ - --hash=sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581 \ - --hash=sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787 \ - --hash=sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5 \ - --hash=sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc \ - --hash=sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871 \ - --hash=sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54 \ - --hash=sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2 \ - --hash=sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98 \ - --hash=sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9 \ - --hash=sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864 \ - --hash=sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de \ - --hash=sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289 \ - --hash=sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b \ - --hash=sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c \ - --hash=sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9 - # via - # -r requirements.in - # pyarrow -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via pytest -pluggy==1.5.0 \ - --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ - --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - # via pytest -pyarrow==16.1.0 \ - --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ - --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ - --hash=sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f \ - --hash=sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2 \ - --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \ - --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \ - --hash=sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b \ - --hash=sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55 \ - --hash=sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15 \ - --hash=sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e \ - --hash=sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f \ - --hash=sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c \ - --hash=sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a \ - --hash=sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa \ - --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \ - --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \ - --hash=sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628 \ - --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \ - --hash=sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e \ - --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \ - --hash=sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b \ - --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \ - --hash=sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c \ - --hash=sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f \ - --hash=sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3 \ - --hash=sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6 \ - --hash=sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c \ - --hash=sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147 \ - --hash=sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5 \ - --hash=sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7 \ - --hash=sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710 \ - --hash=sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4 \ - --hash=sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed \ - --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848 \ - --hash=sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83 \ - --hash=sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444 - # via -r requirements.in -pytest==8.2.2 \ - --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ - --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 - # via -r requirements.in -ruff==0.4.9 \ - --hash=sha256:06b60f91bfa5514bb689b500a25ba48e897d18fea14dce14b48a0c40d1635893 \ - --hash=sha256:0e8e7b95673f22e0efd3571fb5b0cf71a5eaaa3cc8a776584f3b2cc878e46bff \ - --hash=sha256:2d45ddc6d82e1190ea737341326ecbc9a61447ba331b0a8962869fcada758505 \ - --hash=sha256:4555056049d46d8a381f746680db1c46e67ac3b00d714606304077682832998e \ - --hash=sha256:5d5460f789ccf4efd43f265a58538a2c24dbce15dbf560676e430375f20a8198 \ - --hash=sha256:673bddb893f21ab47a8334c8e0ea7fd6598ecc8e698da75bcd12a7b9d0a3206e \ - --hash=sha256:732dd550bfa5d85af8c3c6cbc47ba5b67c6aed8a89e2f011b908fc88f87649db \ - --hash=sha256:784d3ec9bd6493c3b720a0b76f741e6c2d7d44f6b2be87f5eef1ae8cc1d54c84 \ - --hash=sha256:78de3fdb95c4af084087628132336772b1c5044f6e710739d440fc0bccf4d321 \ - --hash=sha256:8064590fd1a50dcf4909c268b0e7c2498253273309ad3d97e4a752bb9df4f521 \ - --hash=sha256:88bffe9c6a454bf8529f9ab9091c99490578a593cc9f9822b7fc065ee0712a06 \ - --hash=sha256:8c1aff58c31948cc66d0b22951aa19edb5af0a3af40c936340cd32a8b1ab7438 \ - --hash=sha256:98ec2775fd2d856dc405635e5ee4ff177920f2141b8e2d9eb5bd6efd50e80317 \ - --hash=sha256:b262ed08d036ebe162123170b35703aaf9daffecb698cd367a8d585157732991 \ - --hash=sha256:e0a22c4157e53d006530c902107c7f550b9233e9706313ab57b892d7197d8e52 \ - --hash=sha256:e91175fbe48f8a2174c9aad70438fe9cb0a5732c4159b2a10a3565fea2d94cde \ - --hash=sha256:f1cb0828ac9533ba0135d148d214e284711ede33640465e706772645483427e3 - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via mypy diff --git a/requirements.in b/requirements.in deleted file mode 100644 index 1b7f62052..000000000 --- a/requirements.in +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -maturin>=1.5.1 -mypy -numpy -pyarrow>=11.0.0 -pytest -ruff -toml -importlib_metadata; python_version < "3.8" -PyGitHub diff --git a/uv.lock b/uv.lock new file mode 100644 index 000000000..75d9ed018 --- /dev/null +++ b/uv.lock @@ -0,0 +1,1842 @@ +version = 1 +requires-python = ">=3.8" +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", + "python_full_version < '3.9'", +] + +[[package]] +name = "alabaster" +version = "0.7.13" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/94/71/a8ee96d1fd95ca04a0d2e2d9c4081dac4c2d2b12f7ddb899c8cb9bfd1532/alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2", size = 11454 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3", size = 13857 }, +] + +[[package]] +name = "alabaster" +version = "0.7.16" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92", size = 13511 }, +] + +[[package]] +name = "alabaster" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, +] + +[[package]] +name = "appnope" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, +] + +[[package]] +name = "astroid" +version = "3.2.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/53/1067e1113ecaf58312357f2cd93063674924119d80d173adc3f6f2387aa2/astroid-3.2.4.tar.gz", hash = "sha256:0e14202810b30da1b735827f78f5157be2bbd4a7a59b7707ca0bfc2fb4c0063a", size = 397576 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/96/b32bbbb46170a1c8b8b1f28c794202e25cfe743565e9d3469b8eb1e0cc05/astroid-3.2.4-py3-none-any.whl", hash = "sha256:413658a61eeca6202a59231abb473f932038fbcbf1666587f66d482083413a25", size = 276348 }, +] + +[[package]] +name = "astroid" +version = "3.3.8" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "typing-extensions", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/28/0bc8a17d6cd4cc3c79ae41b7105a2b9a327c110e5ddd37a8a27b29a5c8a2/astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c", size = 275153 }, +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + +[[package]] +name = "babel" +version = "2.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytz", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, +] + +[[package]] +name = "backcall" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/40/764a663805d84deee23043e1426a9175567db89c8b3287b5c2ad9f71aa93/backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", size = 18041 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/1c/ff6546b6c12603d8dd1070aa3c3d273ad4c07f5771689a7b69a550e8c951/backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255", size = 11157 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, +] + +[[package]] +name = "certifi" +version = "2024.12.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/bd/1d41ee578ce09523c81a15426705dd20969f5abf006d1afe8aeff0dd776a/certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db", size = 166010 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/32/8f6669fc4798494966bf446c8c4a162e0b5d893dff088afddf76414f70e1/certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56", size = 164927 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/58/5580c1716040bc89206c77d8f74418caf82ce519aae06450393ca73475d1/charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de", size = 198013 }, + { url = "https://files.pythonhosted.org/packages/d0/11/00341177ae71c6f5159a08168bcb98c6e6d196d372c94511f9f6c9afe0c6/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176", size = 141285 }, + { url = "https://files.pythonhosted.org/packages/01/09/11d684ea5819e5a8f5100fb0b38cf8d02b514746607934134d31233e02c8/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037", size = 151449 }, + { url = "https://files.pythonhosted.org/packages/08/06/9f5a12939db324d905dc1f70591ae7d7898d030d7662f0d426e2286f68c9/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f", size = 143892 }, + { url = "https://files.pythonhosted.org/packages/93/62/5e89cdfe04584cb7f4d36003ffa2936681b03ecc0754f8e969c2becb7e24/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a", size = 146123 }, + { url = "https://files.pythonhosted.org/packages/a9/ac/ab729a15c516da2ab70a05f8722ecfccc3f04ed7a18e45c75bbbaa347d61/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a", size = 147943 }, + { url = "https://files.pythonhosted.org/packages/03/d2/3f392f23f042615689456e9a274640c1d2e5dd1d52de36ab8f7955f8f050/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247", size = 142063 }, + { url = "https://files.pythonhosted.org/packages/f2/e3/e20aae5e1039a2cd9b08d9205f52142329f887f8cf70da3650326670bddf/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408", size = 150578 }, + { url = "https://files.pythonhosted.org/packages/8d/af/779ad72a4da0aed925e1139d458adc486e61076d7ecdcc09e610ea8678db/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb", size = 153629 }, + { url = "https://files.pythonhosted.org/packages/c2/b6/7aa450b278e7aa92cf7732140bfd8be21f5f29d5bf334ae987c945276639/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d", size = 150778 }, + { url = "https://files.pythonhosted.org/packages/39/f4/d9f4f712d0951dcbfd42920d3db81b00dd23b6ab520419626f4023334056/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807", size = 146453 }, + { url = "https://files.pythonhosted.org/packages/49/2b/999d0314e4ee0cff3cb83e6bc9aeddd397eeed693edb4facb901eb8fbb69/charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f", size = 95479 }, + { url = "https://files.pythonhosted.org/packages/2d/ce/3cbed41cff67e455a386fb5e5dd8906cdda2ed92fbc6297921f2e4419309/charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f", size = 102790 }, + { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, + { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, + { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, + { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335 }, + { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862 }, + { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673 }, + { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211 }, + { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039 }, + { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939 }, + { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075 }, + { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340 }, + { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205 }, + { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441 }, + { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, + { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, + { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, + { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 }, + { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 }, + { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 }, + { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 }, + { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 }, + { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 }, + { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 }, + { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 }, + { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 }, + { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 }, + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/10/bd/6517ea94f2672e801011d50b5d06be2a0deaf566aea27bcdcd47e5195357/charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c", size = 195653 }, + { url = "https://files.pythonhosted.org/packages/e5/0d/815a2ba3f283b4eeaa5ece57acade365c5b4135f65a807a083c818716582/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9", size = 140701 }, + { url = "https://files.pythonhosted.org/packages/aa/17/c94be7ee0d142687e047fe1de72060f6d6837f40eedc26e87e6e124a3fc6/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8", size = 150495 }, + { url = "https://files.pythonhosted.org/packages/f7/33/557ac796c47165fc141e4fb71d7b0310f67e05cb420756f3a82e0a0068e0/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6", size = 142946 }, + { url = "https://files.pythonhosted.org/packages/1e/0d/38ef4ae41e9248d63fc4998d933cae22473b1b2ac4122cf908d0f5eb32aa/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c", size = 144737 }, + { url = "https://files.pythonhosted.org/packages/43/01/754cdb29dd0560f58290aaaa284d43eea343ad0512e6ad3b8b5c11f08592/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a", size = 147471 }, + { url = "https://files.pythonhosted.org/packages/ba/cd/861883ba5160c7a9bd242c30b2c71074cda2aefcc0addc91118e0d4e0765/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd", size = 140801 }, + { url = "https://files.pythonhosted.org/packages/6f/7f/0c0dad447819e90b93f8ed238cc8f11b91353c23c19e70fa80483a155bed/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd", size = 149312 }, + { url = "https://files.pythonhosted.org/packages/8e/09/9f8abcc6fff60fb727268b63c376c8c79cc37b833c2dfe1f535dfb59523b/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824", size = 152347 }, + { url = "https://files.pythonhosted.org/packages/be/e5/3f363dad2e24378f88ccf63ecc39e817c29f32e308ef21a7a6d9c1201165/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca", size = 149888 }, + { url = "https://files.pythonhosted.org/packages/e4/10/a78c0e91f487b4ad0ef7480ac765e15b774f83de2597f1b6ef0eaf7a2f99/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b", size = 145169 }, + { url = "https://files.pythonhosted.org/packages/d3/81/396e7d7f5d7420da8273c91175d2e9a3f569288e3611d521685e4b9ac9cc/charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e", size = 95094 }, + { url = "https://files.pythonhosted.org/packages/40/bb/20affbbd9ea29c71ea123769dc568a6d42052ff5089c5fe23e21e21084a6/charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4", size = 102139 }, + { url = "https://files.pythonhosted.org/packages/7f/c0/b913f8f02836ed9ab32ea643c6fe4d3325c3d8627cf6e78098671cafff86/charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", size = 197867 }, + { url = "https://files.pythonhosted.org/packages/0f/6c/2bee440303d705b6fb1e2ec789543edec83d32d258299b16eed28aad48e0/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", size = 141385 }, + { url = "https://files.pythonhosted.org/packages/3d/04/cb42585f07f6f9fd3219ffb6f37d5a39b4fd2db2355b23683060029c35f7/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", size = 151367 }, + { url = "https://files.pythonhosted.org/packages/54/54/2412a5b093acb17f0222de007cc129ec0e0df198b5ad2ce5699355269dfe/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770", size = 143928 }, + { url = "https://files.pythonhosted.org/packages/5a/6d/e2773862b043dcf8a221342954f375392bb2ce6487bcd9f2c1b34e1d6781/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4", size = 146203 }, + { url = "https://files.pythonhosted.org/packages/b9/f8/ca440ef60d8f8916022859885f231abb07ada3c347c03d63f283bec32ef5/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537", size = 148082 }, + { url = "https://files.pythonhosted.org/packages/04/d2/42fd330901aaa4b805a1097856c2edf5095e260a597f65def493f4b8c833/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496", size = 142053 }, + { url = "https://files.pythonhosted.org/packages/9e/af/3a97a4fa3c53586f1910dadfc916e9c4f35eeada36de4108f5096cb7215f/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78", size = 150625 }, + { url = "https://files.pythonhosted.org/packages/26/ae/23d6041322a3556e4da139663d02fb1b3c59a23ab2e2b56432bd2ad63ded/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7", size = 153549 }, + { url = "https://files.pythonhosted.org/packages/94/22/b8f2081c6a77cb20d97e57e0b385b481887aa08019d2459dc2858ed64871/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6", size = 150945 }, + { url = "https://files.pythonhosted.org/packages/c7/0b/c5ec5092747f801b8b093cdf5610e732b809d6cb11f4c51e35fc28d1d389/charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294", size = 146595 }, + { url = "https://files.pythonhosted.org/packages/0c/5a/0b59704c38470df6768aa154cc87b1ac7c9bb687990a1559dc8765e8627e/charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5", size = 95453 }, + { url = "https://files.pythonhosted.org/packages/85/2d/a9790237cb4d01a6d57afadc8573c8b73c609ade20b80f4cda30802009ee/charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765", size = 102811 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = { editable = "." } +dependencies = [ + { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pyarrow", version = "18.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] + +[package.dev-dependencies] +dev = [ + { name = "maturin" }, + { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pytest" }, + { name = "ruff" }, + { name = "toml" }, +] +docs = [ + { name = "ipython", version = "8.12.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "ipython", version = "8.31.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "jinja2" }, + { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "myst-parser", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pandas", version = "2.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "pandas", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pickleshare" }, + { name = "pydata-sphinx-theme" }, + { name = "setuptools", version = "75.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "setuptools", version = "75.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinx-autoapi" }, +] + +[package.metadata] +requires-dist = [ + { name = "pyarrow", specifier = ">=11.0.0" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "maturin", specifier = ">=1.8.1" }, + { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" }, + { name = "pytest", specifier = ">=7.4.4" }, + { name = "ruff", specifier = ">=0.9.1" }, + { name = "toml", specifier = ">=0.10.2" }, +] +docs = [ + { name = "ipython", specifier = ">=8.12.3" }, + { name = "jinja2", specifier = ">=3.1.5" }, + { name = "myst-parser", specifier = ">=3.0.1" }, + { name = "pandas", specifier = ">=2.0.3" }, + { name = "pickleshare", specifier = ">=0.7.5" }, + { name = "pydata-sphinx-theme", specifier = "==0.8.0" }, + { name = "setuptools", specifier = ">=75.3.0" }, + { name = "sphinx", specifier = ">=7.1.2" }, + { name = "sphinx-autoapi", specifier = ">=3.4.0" }, +] + +[[package]] +name = "decorator" +version = "5.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/0c/8d907af351aa16b42caae42f9d6aa37b900c67308052d10fdce809f8d952/decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", size = 35016 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, +] + +[[package]] +name = "docutils" +version = "0.20.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666 }, +] + +[[package]] +name = "docutils" +version = "0.21.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, +] + +[[package]] +name = "executing" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + +[[package]] +name = "imagesize" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769 }, +] + +[[package]] +name = "importlib-metadata" +version = "8.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp", version = "3.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "zipp", version = "3.21.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + +[[package]] +name = "ipython" +version = "8.12.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "appnope", marker = "python_full_version < '3.9' and sys_platform == 'darwin'" }, + { name = "backcall", marker = "python_full_version < '3.9'" }, + { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version < '3.9'" }, + { name = "jedi", marker = "python_full_version < '3.9'" }, + { name = "matplotlib-inline", marker = "python_full_version < '3.9'" }, + { name = "pexpect", marker = "python_full_version < '3.9' and sys_platform != 'win32'" }, + { name = "pickleshare", marker = "python_full_version < '3.9'" }, + { name = "prompt-toolkit", marker = "python_full_version < '3.9'" }, + { name = "pygments", marker = "python_full_version < '3.9'" }, + { name = "stack-data", marker = "python_full_version < '3.9'" }, + { name = "traitlets", marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/6a/44ef299b1762f5a73841e87fae8a73a8cc8aee538d6dc8c77a5afe1fd2ce/ipython-8.12.3.tar.gz", hash = "sha256:3910c4b54543c2ad73d06579aa771041b7d5707b033bd488669b4cf544e3b363", size = 5470171 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/97/8fe103906cd81bc42d3b0175b5534a9f67dccae47d6451131cf8d0d70bb2/ipython-8.12.3-py3-none-any.whl", hash = "sha256:b0340d46a933d27c657b211a329d0be23793c36595acf9e6ef4164bc01a1804c", size = 798307 }, +] + +[[package]] +name = "ipython" +version = "8.18.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version == '3.9.*'" }, + { name = "exceptiongroup", marker = "python_full_version == '3.9.*'" }, + { name = "jedi", marker = "python_full_version == '3.9.*'" }, + { name = "matplotlib-inline", marker = "python_full_version == '3.9.*'" }, + { name = "pexpect", marker = "python_full_version == '3.9.*' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version == '3.9.*'" }, + { name = "pygments", marker = "python_full_version == '3.9.*'" }, + { name = "stack-data", marker = "python_full_version == '3.9.*'" }, + { name = "traitlets", marker = "python_full_version == '3.9.*'" }, + { name = "typing-extensions", marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/b9/3ba6c45a6df813c09a48bac313c22ff83efa26cbb55011218d925a46e2ad/ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", size = 5486330 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/6b/d9fdcdef2eb6a23f391251fde8781c38d42acd82abe84d054cb74f7863b0/ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397", size = 808161 }, +] + +[[package]] +name = "ipython" +version = "8.31.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version >= '3.10'" }, + { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" }, + { name = "jedi", marker = "python_full_version >= '3.10'" }, + { name = "matplotlib-inline", marker = "python_full_version >= '3.10'" }, + { name = "pexpect", marker = "python_full_version >= '3.10' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version >= '3.10'" }, + { name = "pygments", marker = "python_full_version >= '3.10'" }, + { name = "stack-data", marker = "python_full_version >= '3.10'" }, + { name = "traitlets", marker = "python_full_version >= '3.10'" }, + { name = "typing-extensions", marker = "python_full_version >= '3.10' and python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/35/6f90fdddff7a08b7b715fccbd2427b5212c9525cd043d26fdc45bee0708d/ipython-8.31.0.tar.gz", hash = "sha256:b6a2274606bec6166405ff05e54932ed6e5cfecaca1fc05f2cacde7bb074d70b", size = 5501011 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/60/d0feb6b6d9fe4ab89fe8fe5b47cbf6cd936bfd9f1e7ffa9d0015425aeed6/ipython-8.31.0-py3-none-any.whl", hash = "sha256:46ec58f8d3d076a61d128fe517a51eb730e3aaf0c184ea8c17d16e366660c6a6", size = 821583 }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278 }, +] + +[[package]] +name = "jinja2" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe", version = "2.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "markupsafe", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/92/b3130cbbf5591acf9ade8708c365f3238046ac7cb8ccba6e81abccb0ccff/jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", size = 244674 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/0f/2ba5fbcd631e3e88689309dbe978c5769e883e4b84ebfe7da30b43275c5a/jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb", size = 134596 }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, +] + +[[package]] +name = "markupsafe" +version = "2.1.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/87/5b/aae44c6655f3801e81aa3eef09dbbf012431987ba564d7231722f68df02d/MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", size = 19384 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/54/ad5eb37bf9d51800010a74e4665425831a9db4e7c4e0fde4352e391e808e/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", size = 18206 }, + { url = "https://files.pythonhosted.org/packages/6a/4a/a4d49415e600bacae038c67f9fecc1d5433b9d3c71a4de6f33537b89654c/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", size = 14079 }, + { url = "https://files.pythonhosted.org/packages/0a/7b/85681ae3c33c385b10ac0f8dd025c30af83c78cec1c37a6aa3b55e67f5ec/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", size = 26620 }, + { url = "https://files.pythonhosted.org/packages/7c/52/2b1b570f6b8b803cef5ac28fdf78c0da318916c7d2fe9402a84d591b394c/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", size = 25818 }, + { url = "https://files.pythonhosted.org/packages/29/fe/a36ba8c7ca55621620b2d7c585313efd10729e63ef81e4e61f52330da781/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", size = 25493 }, + { url = "https://files.pythonhosted.org/packages/60/ae/9c60231cdfda003434e8bd27282b1f4e197ad5a710c14bee8bea8a9ca4f0/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", size = 30630 }, + { url = "https://files.pythonhosted.org/packages/65/dc/1510be4d179869f5dafe071aecb3f1f41b45d37c02329dfba01ff59e5ac5/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", size = 29745 }, + { url = "https://files.pythonhosted.org/packages/30/39/8d845dd7d0b0613d86e0ef89549bfb5f61ed781f59af45fc96496e897f3a/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", size = 30021 }, + { url = "https://files.pythonhosted.org/packages/c7/5c/356a6f62e4f3c5fbf2602b4771376af22a3b16efa74eb8716fb4e328e01e/MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", size = 16659 }, + { url = "https://files.pythonhosted.org/packages/69/48/acbf292615c65f0604a0c6fc402ce6d8c991276e16c80c46a8f758fbd30c/MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", size = 17213 }, + { url = "https://files.pythonhosted.org/packages/11/e7/291e55127bb2ae67c64d66cef01432b5933859dfb7d6949daa721b89d0b3/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", size = 18219 }, + { url = "https://files.pythonhosted.org/packages/6b/cb/aed7a284c00dfa7c0682d14df85ad4955a350a21d2e3b06d8240497359bf/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", size = 14098 }, + { url = "https://files.pythonhosted.org/packages/1c/cf/35fe557e53709e93feb65575c93927942087e9b97213eabc3fe9d5b25a55/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", size = 29014 }, + { url = "https://files.pythonhosted.org/packages/97/18/c30da5e7a0e7f4603abfc6780574131221d9148f323752c2755d48abad30/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", size = 28220 }, + { url = "https://files.pythonhosted.org/packages/0c/40/2e73e7d532d030b1e41180807a80d564eda53babaf04d65e15c1cf897e40/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", size = 27756 }, + { url = "https://files.pythonhosted.org/packages/18/46/5dca760547e8c59c5311b332f70605d24c99d1303dd9a6e1fc3ed0d73561/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", size = 33988 }, + { url = "https://files.pythonhosted.org/packages/6d/c5/27febe918ac36397919cd4a67d5579cbbfa8da027fa1238af6285bb368ea/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", size = 32718 }, + { url = "https://files.pythonhosted.org/packages/f8/81/56e567126a2c2bc2684d6391332e357589a96a76cb9f8e5052d85cb0ead8/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", size = 33317 }, + { url = "https://files.pythonhosted.org/packages/00/0b/23f4b2470accb53285c613a3ab9ec19dc944eaf53592cb6d9e2af8aa24cc/MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", size = 16670 }, + { url = "https://files.pythonhosted.org/packages/b7/a2/c78a06a9ec6d04b3445a949615c4c7ed86a0b2eb68e44e7541b9d57067cc/MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", size = 17224 }, + { url = "https://files.pythonhosted.org/packages/53/bd/583bf3e4c8d6a321938c13f49d44024dbe5ed63e0a7ba127e454a66da974/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", size = 18215 }, + { url = "https://files.pythonhosted.org/packages/48/d6/e7cd795fc710292c3af3a06d80868ce4b02bfbbf370b7cee11d282815a2a/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", size = 14069 }, + { url = "https://files.pythonhosted.org/packages/51/b5/5d8ec796e2a08fc814a2c7d2584b55f889a55cf17dd1a90f2beb70744e5c/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", size = 29452 }, + { url = "https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", size = 28462 }, + { url = "https://files.pythonhosted.org/packages/2d/75/fd6cb2e68780f72d47e6671840ca517bda5ef663d30ada7616b0462ad1e3/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", size = 27869 }, + { url = "https://files.pythonhosted.org/packages/b0/81/147c477391c2750e8fc7705829f7351cf1cd3be64406edcf900dc633feb2/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", size = 33906 }, + { url = "https://files.pythonhosted.org/packages/8b/ff/9a52b71839d7a256b563e85d11050e307121000dcebc97df120176b3ad93/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", size = 32296 }, + { url = "https://files.pythonhosted.org/packages/88/07/2dc76aa51b481eb96a4c3198894f38b480490e834479611a4053fbf08623/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", size = 33038 }, + { url = "https://files.pythonhosted.org/packages/96/0c/620c1fb3661858c0e37eb3cbffd8c6f732a67cd97296f725789679801b31/MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", size = 16572 }, + { url = "https://files.pythonhosted.org/packages/3f/14/c3554d512d5f9100a95e737502f4a2323a1959f6d0d01e0d0997b35f7b10/MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", size = 17127 }, + { url = "https://files.pythonhosted.org/packages/f8/ff/2c942a82c35a49df5de3a630ce0a8456ac2969691b230e530ac12314364c/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", size = 18192 }, + { url = "https://files.pythonhosted.org/packages/4f/14/6f294b9c4f969d0c801a4615e221c1e084722ea6114ab2114189c5b8cbe0/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", size = 14072 }, + { url = "https://files.pythonhosted.org/packages/81/d4/fd74714ed30a1dedd0b82427c02fa4deec64f173831ec716da11c51a50aa/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", size = 26928 }, + { url = "https://files.pythonhosted.org/packages/c7/bd/50319665ce81bb10e90d1cf76f9e1aa269ea6f7fa30ab4521f14d122a3df/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", size = 26106 }, + { url = "https://files.pythonhosted.org/packages/4c/6f/f2b0f675635b05f6afd5ea03c094557bdb8622fa8e673387444fe8d8e787/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68", size = 25781 }, + { url = "https://files.pythonhosted.org/packages/51/e0/393467cf899b34a9d3678e78961c2c8cdf49fb902a959ba54ece01273fb1/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", size = 30518 }, + { url = "https://files.pythonhosted.org/packages/f6/02/5437e2ad33047290dafced9df741d9efc3e716b75583bbd73a9984f1b6f7/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", size = 29669 }, + { url = "https://files.pythonhosted.org/packages/0e/7d/968284145ffd9d726183ed6237c77938c021abacde4e073020f920e060b2/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", size = 29933 }, + { url = "https://files.pythonhosted.org/packages/bf/f3/ecb00fc8ab02b7beae8699f34db9357ae49d9f21d4d3de6f305f34fa949e/MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", size = 16656 }, + { url = "https://files.pythonhosted.org/packages/92/21/357205f03514a49b293e214ac39de01fadd0970a6e05e4bf1ddd0ffd0881/MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", size = 17206 }, + { url = "https://files.pythonhosted.org/packages/0f/31/780bb297db036ba7b7bbede5e1d7f1e14d704ad4beb3ce53fb495d22bc62/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", size = 18193 }, + { url = "https://files.pythonhosted.org/packages/6c/77/d77701bbef72892affe060cdacb7a2ed7fd68dae3b477a8642f15ad3b132/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", size = 14073 }, + { url = "https://files.pythonhosted.org/packages/d9/a7/1e558b4f78454c8a3a0199292d96159eb4d091f983bc35ef258314fe7269/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", size = 26486 }, + { url = "https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", size = 25685 }, + { url = "https://files.pythonhosted.org/packages/6a/18/ae5a258e3401f9b8312f92b028c54d7026a97ec3ab20bfaddbdfa7d8cce8/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", size = 25338 }, + { url = "https://files.pythonhosted.org/packages/0b/cc/48206bd61c5b9d0129f4d75243b156929b04c94c09041321456fd06a876d/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", size = 30439 }, + { url = "https://files.pythonhosted.org/packages/d1/06/a41c112ab9ffdeeb5f77bc3e331fdadf97fa65e52e44ba31880f4e7f983c/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", size = 29531 }, + { url = "https://files.pythonhosted.org/packages/02/8c/ab9a463301a50dab04d5472e998acbd4080597abc048166ded5c7aa768c8/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", size = 29823 }, + { url = "https://files.pythonhosted.org/packages/bc/29/9bc18da763496b055d8e98ce476c8e718dcfd78157e17f555ce6dd7d0895/MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", size = 16658 }, + { url = "https://files.pythonhosted.org/packages/f6/f8/4da07de16f10551ca1f640c92b5f316f9394088b183c6a57183df6de5ae4/MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", size = 17211 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 }, + { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393 }, + { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732 }, + { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866 }, + { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964 }, + { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977 }, + { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366 }, + { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091 }, + { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065 }, + { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514 }, + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 }, + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352 }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122 }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085 }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978 }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208 }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357 }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344 }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101 }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603 }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510 }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486 }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480 }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914 }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796 }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473 }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114 }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098 }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208 }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 }, + { url = "https://files.pythonhosted.org/packages/a7/ea/9b1530c3fdeeca613faeb0fb5cbcf2389d816072fab72a71b45749ef6062/MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a", size = 14344 }, + { url = "https://files.pythonhosted.org/packages/4b/c2/fbdbfe48848e7112ab05e627e718e854d20192b674952d9042ebd8c9e5de/MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff", size = 12389 }, + { url = "https://files.pythonhosted.org/packages/f0/25/7a7c6e4dbd4f867d95d94ca15449e91e52856f6ed1905d58ef1de5e211d0/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13", size = 21607 }, + { url = "https://files.pythonhosted.org/packages/53/8f/f339c98a178f3c1e545622206b40986a4c3307fe39f70ccd3d9df9a9e425/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144", size = 20728 }, + { url = "https://files.pythonhosted.org/packages/1a/03/8496a1a78308456dbd50b23a385c69b41f2e9661c67ea1329849a598a8f9/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29", size = 20826 }, + { url = "https://files.pythonhosted.org/packages/e6/cf/0a490a4bd363048c3022f2f475c8c05582179bb179defcee4766fb3dcc18/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0", size = 21843 }, + { url = "https://files.pythonhosted.org/packages/19/a3/34187a78613920dfd3cdf68ef6ce5e99c4f3417f035694074beb8848cd77/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0", size = 21219 }, + { url = "https://files.pythonhosted.org/packages/17/d8/5811082f85bb88410ad7e452263af048d685669bbbfb7b595e8689152498/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178", size = 20946 }, + { url = "https://files.pythonhosted.org/packages/7c/31/bd635fb5989440d9365c5e3c47556cfea121c7803f5034ac843e8f37c2f2/MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f", size = 15063 }, + { url = "https://files.pythonhosted.org/packages/b3/73/085399401383ce949f727afec55ec3abd76648d04b9f22e1c0e99cb4bec3/MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a", size = 15506 }, +] + +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, +] + +[[package]] +name = "maturin" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/08/ccb0f917722a35ab0d758be9bb5edaf645c3a3d6170061f10d396ecd273f/maturin-1.8.1.tar.gz", hash = "sha256:49cd964aabf59f8b0a6969f9860d2cdf194ac331529caae14c884f5659568857", size = 197397 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/00/f34077315f34db8ad2ccf6bfe11b864ca27baab3a1320634da8e3cf89a48/maturin-1.8.1-py3-none-linux_armv6l.whl", hash = "sha256:7e590a23d9076b8a994f2e67bc63dc9a2d1c9a41b1e7b45ac354ba8275254e89", size = 7568415 }, + { url = "https://files.pythonhosted.org/packages/5c/07/9219976135ce0cb32d2fa6ea5c6d0ad709013d9a17967312e149b98153a6/maturin-1.8.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d8251a95682c83ea60988c804b620c181911cd824aa107b4a49ac5333c92968", size = 14527816 }, + { url = "https://files.pythonhosted.org/packages/e6/04/fa009a00903acdd1785d58322193140bfe358595347c39f315112dabdf9e/maturin-1.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b9fc1a4354cac5e32c190410208039812ea88c4a36bd2b6499268ec49ef5de00", size = 7580446 }, + { url = "https://files.pythonhosted.org/packages/9b/d4/414b2aab9bbfe88182b734d3aa1b4fef7d7701e50f6be48500378b8c8721/maturin-1.8.1-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:621e171c6b39f95f1d0df69a118416034fbd59c0f89dcaea8c2ea62019deecba", size = 7650535 }, + { url = "https://files.pythonhosted.org/packages/f0/64/879418a8a0196013ec1fb19eada0781c04a30e8d6d9227e80f91275a4f5b/maturin-1.8.1-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:98f638739a5132962347871b85c91f525c9246ef4d99796ae98a2031e3df029f", size = 8006702 }, + { url = "https://files.pythonhosted.org/packages/39/c2/605829324f8371294f70303aca130682df75318958efed246873d3d604ab/maturin-1.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:f9f5c47521924b6e515cbc652a042fe5f17f8747445be9d931048e5d8ddb50a4", size = 7368164 }, + { url = "https://files.pythonhosted.org/packages/be/6c/30e136d397bb146b94b628c0ef7f17708281611b97849e2cf37847025ac7/maturin-1.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:0f4407c7353c31bfbb8cdeb82bc2170e474cbfb97b5ba27568f440c9d6c1fdd4", size = 7450889 }, + { url = "https://files.pythonhosted.org/packages/1b/50/e1f5023512696d4e56096f702e2f68d6d9a30afe0a4eec82b0e27b8eb4e4/maturin-1.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:ec49cd70cad3c389946c6e2bc0bd50772a7fcb463040dd800720345897eec9bf", size = 9585819 }, + { url = "https://files.pythonhosted.org/packages/b7/80/b24b5248d89d2e5982553900237a337ea098ca9297b8369ca2aa95549e0f/maturin-1.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08767d794de8f8a11c5c8b1b47a4ff9fb6ae2d2d97679e27030f2f509c8c2a0", size = 10920801 }, + { url = "https://files.pythonhosted.org/packages/6e/f4/8ede7a662fabf93456b44390a5ad22630e25fb5ddaecf787251071b2e143/maturin-1.8.1-py3-none-win32.whl", hash = "sha256:d678407713f3e10df33c5b3d7a343ec0551eb7f14d8ad9ba6febeb96f4e4c75c", size = 6873556 }, + { url = "https://files.pythonhosted.org/packages/9c/22/757f093ed0e319e9648155b8c9d716765442bea5bc98ebc58ad4ad5b0524/maturin-1.8.1-py3-none-win_amd64.whl", hash = "sha256:a526f90fe0e5cb59ffb81f4ff547ddc42e823bbdeae4a31012c0893ca6dcaf46", size = 7823153 }, + { url = "https://files.pythonhosted.org/packages/a4/f5/051413e04f6da25069db5e76759ecdb8cd2a8ab4a94045b5a3bf548c66fa/maturin-1.8.1-py3-none-win_arm64.whl", hash = "sha256:e95f077fd2ddd2f048182880eed458c308571a534be3eb2add4d3dac55bf57f4", size = 6552131 }, +] + +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "myst-parser" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", + "python_full_version < '3.9'", +] +dependencies = [ + { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "jinja2", marker = "python_full_version < '3.10'" }, + { name = "markdown-it-py", marker = "python_full_version < '3.10'" }, + { name = "mdit-py-plugins", marker = "python_full_version < '3.10'" }, + { name = "pyyaml", marker = "python_full_version < '3.10'" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/64/e2f13dac02f599980798c01156393b781aec983b52a6e4057ee58f07c43a/myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87", size = 92392 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/de/21aa8394f16add8f7427f0a1326ccd2b3a2a8a3245c9252bc5ac034c6155/myst_parser-3.0.1-py3-none-any.whl", hash = "sha256:6457aaa33a5d474aca678b8ead9b3dc298e89c68e67012e73146ea6fd54babf1", size = 83163 }, +] + +[[package]] +name = "myst-parser" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "jinja2", marker = "python_full_version >= '3.10'" }, + { name = "markdown-it-py", marker = "python_full_version >= '3.10'" }, + { name = "mdit-py-plugins", marker = "python_full_version >= '3.10'" }, + { name = "pyyaml", marker = "python_full_version >= '3.10'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/55/6d1741a1780e5e65038b74bce6689da15f620261c490c3511eb4c12bac4b/myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531", size = 93858 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/b4/b036f8fdb667587bb37df29dc6644681dd78b7a2a6321a34684b79412b28/myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d", size = 84563 }, +] + +[[package]] +name = "numpy" +version = "1.24.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140 }, + { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297 }, + { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611 }, + { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357 }, + { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222 }, + { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514 }, + { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508 }, + { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033 }, + { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951 }, + { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923 }, + { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446 }, + { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466 }, + { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722 }, + { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102 }, + { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616 }, + { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263 }, + { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660 }, + { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112 }, + { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549 }, + { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950 }, + { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228 }, + { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170 }, + { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918 }, + { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441 }, + { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590 }, + { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744 }, + { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290 }, +] + +[[package]] +name = "numpy" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/91/3495b3237510f79f5d81f2508f9f13fea78ebfdf07538fc7444badda173d/numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece", size = 21165245 }, + { url = "https://files.pythonhosted.org/packages/05/33/26178c7d437a87082d11019292dce6d3fe6f0e9026b7b2309cbf3e489b1d/numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04", size = 13738540 }, + { url = "https://files.pythonhosted.org/packages/ec/31/cc46e13bf07644efc7a4bf68df2df5fb2a1a88d0cd0da9ddc84dc0033e51/numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66", size = 5300623 }, + { url = "https://files.pythonhosted.org/packages/6e/16/7bfcebf27bb4f9d7ec67332ffebee4d1bf085c84246552d52dbb548600e7/numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b", size = 6901774 }, + { url = "https://files.pythonhosted.org/packages/f9/a3/561c531c0e8bf082c5bef509d00d56f82e0ea7e1e3e3a7fc8fa78742a6e5/numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd", size = 13907081 }, + { url = "https://files.pythonhosted.org/packages/fa/66/f7177ab331876200ac7563a580140643d1179c8b4b6a6b0fc9838de2a9b8/numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318", size = 19523451 }, + { url = "https://files.pythonhosted.org/packages/25/7f/0b209498009ad6453e4efc2c65bcdf0ae08a182b2b7877d7ab38a92dc542/numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8", size = 19927572 }, + { url = "https://files.pythonhosted.org/packages/3e/df/2619393b1e1b565cd2d4c4403bdd979621e2c4dea1f8532754b2598ed63b/numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326", size = 14400722 }, + { url = "https://files.pythonhosted.org/packages/22/ad/77e921b9f256d5da36424ffb711ae79ca3f451ff8489eeca544d0701d74a/numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97", size = 6472170 }, + { url = "https://files.pythonhosted.org/packages/10/05/3442317535028bc29cf0c0dd4c191a4481e8376e9f0db6bcf29703cadae6/numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131", size = 15905558 }, + { url = "https://files.pythonhosted.org/packages/8b/cf/034500fb83041aa0286e0fb16e7c76e5c8b67c0711bb6e9e9737a717d5fe/numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448", size = 21169137 }, + { url = "https://files.pythonhosted.org/packages/4a/d9/32de45561811a4b87fbdee23b5797394e3d1504b4a7cf40c10199848893e/numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195", size = 13703552 }, + { url = "https://files.pythonhosted.org/packages/c1/ca/2f384720020c7b244d22508cb7ab23d95f179fcfff33c31a6eeba8d6c512/numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57", size = 5298957 }, + { url = "https://files.pythonhosted.org/packages/0e/78/a3e4f9fb6aa4e6fdca0c5428e8ba039408514388cf62d89651aade838269/numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a", size = 6905573 }, + { url = "https://files.pythonhosted.org/packages/a0/72/cfc3a1beb2caf4efc9d0b38a15fe34025230da27e1c08cc2eb9bfb1c7231/numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669", size = 13914330 }, + { url = "https://files.pythonhosted.org/packages/ba/a8/c17acf65a931ce551fee11b72e8de63bf7e8a6f0e21add4c937c83563538/numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951", size = 19534895 }, + { url = "https://files.pythonhosted.org/packages/ba/86/8767f3d54f6ae0165749f84648da9dcc8cd78ab65d415494962c86fac80f/numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9", size = 19937253 }, + { url = "https://files.pythonhosted.org/packages/df/87/f76450e6e1c14e5bb1eae6836478b1028e096fd02e85c1c37674606ab752/numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15", size = 14414074 }, + { url = "https://files.pythonhosted.org/packages/5c/ca/0f0f328e1e59f73754f06e1adfb909de43726d4f24c6a3f8805f34f2b0fa/numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4", size = 6470640 }, + { url = "https://files.pythonhosted.org/packages/eb/57/3a3f14d3a759dcf9bf6e9eda905794726b758819df4663f217d658a58695/numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc", size = 15910230 }, + { url = "https://files.pythonhosted.org/packages/45/40/2e117be60ec50d98fa08c2f8c48e09b3edea93cfcabd5a9ff6925d54b1c2/numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b", size = 20895803 }, + { url = "https://files.pythonhosted.org/packages/46/92/1b8b8dee833f53cef3e0a3f69b2374467789e0bb7399689582314df02651/numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e", size = 13471835 }, + { url = "https://files.pythonhosted.org/packages/7f/19/e2793bde475f1edaea6945be141aef6c8b4c669b90c90a300a8954d08f0a/numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c", size = 5038499 }, + { url = "https://files.pythonhosted.org/packages/e3/ff/ddf6dac2ff0dd50a7327bcdba45cb0264d0e96bb44d33324853f781a8f3c/numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c", size = 6633497 }, + { url = "https://files.pythonhosted.org/packages/72/21/67f36eac8e2d2cd652a2e69595a54128297cdcb1ff3931cfc87838874bd4/numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692", size = 13621158 }, + { url = "https://files.pythonhosted.org/packages/39/68/e9f1126d757653496dbc096cb429014347a36b228f5a991dae2c6b6cfd40/numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a", size = 19236173 }, + { url = "https://files.pythonhosted.org/packages/d1/e9/1f5333281e4ebf483ba1c888b1d61ba7e78d7e910fdd8e6499667041cc35/numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c", size = 19634174 }, + { url = "https://files.pythonhosted.org/packages/71/af/a469674070c8d8408384e3012e064299f7a2de540738a8e414dcfd639996/numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded", size = 14099701 }, + { url = "https://files.pythonhosted.org/packages/d0/3d/08ea9f239d0e0e939b6ca52ad403c84a2bce1bde301a8eb4888c1c1543f1/numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5", size = 6174313 }, + { url = "https://files.pythonhosted.org/packages/b2/b5/4ac39baebf1fdb2e72585c8352c56d063b6126be9fc95bd2bb5ef5770c20/numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a", size = 15606179 }, + { url = "https://files.pythonhosted.org/packages/43/c1/41c8f6df3162b0c6ffd4437d729115704bd43363de0090c7f913cfbc2d89/numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c", size = 21169942 }, + { url = "https://files.pythonhosted.org/packages/39/bc/fd298f308dcd232b56a4031fd6ddf11c43f9917fbc937e53762f7b5a3bb1/numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd", size = 13711512 }, + { url = "https://files.pythonhosted.org/packages/96/ff/06d1aa3eeb1c614eda245c1ba4fb88c483bee6520d361641331872ac4b82/numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b", size = 5306976 }, + { url = "https://files.pythonhosted.org/packages/2d/98/121996dcfb10a6087a05e54453e28e58694a7db62c5a5a29cee14c6e047b/numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729", size = 6906494 }, + { url = "https://files.pythonhosted.org/packages/15/31/9dffc70da6b9bbf7968f6551967fc21156207366272c2a40b4ed6008dc9b/numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1", size = 13912596 }, + { url = "https://files.pythonhosted.org/packages/b9/14/78635daab4b07c0930c919d451b8bf8c164774e6a3413aed04a6d95758ce/numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd", size = 19526099 }, + { url = "https://files.pythonhosted.org/packages/26/4c/0eeca4614003077f68bfe7aac8b7496f04221865b3a5e7cb230c9d055afd/numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d", size = 19932823 }, + { url = "https://files.pythonhosted.org/packages/f1/46/ea25b98b13dccaebddf1a803f8c748680d972e00507cd9bc6dcdb5aa2ac1/numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d", size = 14404424 }, + { url = "https://files.pythonhosted.org/packages/c8/a6/177dd88d95ecf07e722d21008b1b40e681a929eb9e329684d449c36586b2/numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa", size = 6476809 }, + { url = "https://files.pythonhosted.org/packages/ea/2b/7fc9f4e7ae5b507c1a3a21f0f15ed03e794c1242ea8a242ac158beb56034/numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73", size = 15911314 }, + { url = "https://files.pythonhosted.org/packages/8f/3b/df5a870ac6a3be3a86856ce195ef42eec7ae50d2a202be1f5a4b3b340e14/numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8", size = 21025288 }, + { url = "https://files.pythonhosted.org/packages/2c/97/51af92f18d6f6f2d9ad8b482a99fb74e142d71372da5d834b3a2747a446e/numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4", size = 6762793 }, + { url = "https://files.pythonhosted.org/packages/12/46/de1fbd0c1b5ccaa7f9a005b66761533e2f6a3e560096682683a223631fe9/numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c", size = 19334885 }, + { url = "https://files.pythonhosted.org/packages/cc/dc/d330a6faefd92b446ec0f0dfea4c3207bb1fef3c4771d19cf4543efd2c78/numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385", size = 15828784 }, +] + +[[package]] +name = "numpy" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/fdbf6a7871703df6160b5cf3dd774074b086d278172285c52c2758b76305/numpy-2.2.1.tar.gz", hash = "sha256:45681fd7128c8ad1c379f0ca0776a8b0c6583d2f69889ddac01559dfe4390918", size = 20227662 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/c4/5588367dc9f91e1a813beb77de46ea8cab13f778e1b3a0e661ab031aba44/numpy-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5edb4e4caf751c1518e6a26a83501fda79bff41cc59dac48d70e6d65d4ec4440", size = 21213214 }, + { url = "https://files.pythonhosted.org/packages/d8/8b/32dd9f08419023a4cf856c5ad0b4eba9b830da85eafdef841a104c4fc05a/numpy-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa3017c40d513ccac9621a2364f939d39e550c542eb2a894b4c8da92b38896ab", size = 14352248 }, + { url = "https://files.pythonhosted.org/packages/84/2d/0e895d02940ba6e12389f0ab5cac5afcf8dc2dc0ade4e8cad33288a721bd/numpy-2.2.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:61048b4a49b1c93fe13426e04e04fdf5a03f456616f6e98c7576144677598675", size = 5391007 }, + { url = "https://files.pythonhosted.org/packages/11/b9/7f1e64a0d46d9c2af6d17966f641fb12d5b8ea3003f31b2308f3e3b9a6aa/numpy-2.2.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7671dc19c7019103ca44e8d94917eba8534c76133523ca8406822efdd19c9308", size = 6926174 }, + { url = "https://files.pythonhosted.org/packages/2e/8c/043fa4418bc9364e364ab7aba8ff6ef5f6b9171ade22de8fbcf0e2fa4165/numpy-2.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4250888bcb96617e00bfa28ac24850a83c9f3a16db471eca2ee1f1714df0f957", size = 14330914 }, + { url = "https://files.pythonhosted.org/packages/f7/b6/d8110985501ca8912dfc1c3bbef99d66e62d487f72e46b2337494df77364/numpy-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7746f235c47abc72b102d3bce9977714c2444bdfaea7888d241b4c4bb6a78bf", size = 16379607 }, + { url = "https://files.pythonhosted.org/packages/e2/57/bdca9fb8bdaa810c3a4ff2eb3231379b77f618a7c0d24be9f7070db50775/numpy-2.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:059e6a747ae84fce488c3ee397cee7e5f905fd1bda5fb18c66bc41807ff119b2", size = 15541760 }, + { url = "https://files.pythonhosted.org/packages/97/55/3b9147b3cbc3b6b1abc2a411dec5337a46c873deca0dd0bf5bef9d0579cc/numpy-2.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f62aa6ee4eb43b024b0e5a01cf65a0bb078ef8c395e8713c6e8a12a697144528", size = 18168476 }, + { url = "https://files.pythonhosted.org/packages/00/e7/7c2cde16c9b87a8e14fdd262ca7849c4681cf48c8a774505f7e6f5e3b643/numpy-2.2.1-cp310-cp310-win32.whl", hash = "sha256:48fd472630715e1c1c89bf1feab55c29098cb403cc184b4859f9c86d4fcb6a95", size = 6570985 }, + { url = "https://files.pythonhosted.org/packages/a1/a8/554b0e99fc4ac11ec481254781a10da180d0559c2ebf2c324232317349ee/numpy-2.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:b541032178a718c165a49638d28272b771053f628382d5e9d1c93df23ff58dbf", size = 12913384 }, + { url = "https://files.pythonhosted.org/packages/59/14/645887347124e101d983e1daf95b48dc3e136bf8525cb4257bf9eab1b768/numpy-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40f9e544c1c56ba8f1cf7686a8c9b5bb249e665d40d626a23899ba6d5d9e1484", size = 21217379 }, + { url = "https://files.pythonhosted.org/packages/9f/fd/2279000cf29f58ccfd3778cbf4670dfe3f7ce772df5e198c5abe9e88b7d7/numpy-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9b57eaa3b0cd8db52049ed0330747b0364e899e8a606a624813452b8203d5f7", size = 14388520 }, + { url = "https://files.pythonhosted.org/packages/58/b0/034eb5d5ba12d66ab658ff3455a31f20add0b78df8203c6a7451bd1bee21/numpy-2.2.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bc8a37ad5b22c08e2dbd27df2b3ef7e5c0864235805b1e718a235bcb200cf1cb", size = 5389286 }, + { url = "https://files.pythonhosted.org/packages/5d/69/6f3cccde92e82e7835fdb475c2bf439761cbf8a1daa7c07338e1e132dfec/numpy-2.2.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9036d6365d13b6cbe8f27a0eaf73ddcc070cae584e5ff94bb45e3e9d729feab5", size = 6930345 }, + { url = "https://files.pythonhosted.org/packages/d1/72/1cd38e91ab563e67f584293fcc6aca855c9ae46dba42e6b5ff4600022899/numpy-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51faf345324db860b515d3f364eaa93d0e0551a88d6218a7d61286554d190d73", size = 14335748 }, + { url = "https://files.pythonhosted.org/packages/f2/d4/f999444e86986f3533e7151c272bd8186c55dda554284def18557e013a2a/numpy-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38efc1e56b73cc9b182fe55e56e63b044dd26a72128fd2fbd502f75555d92591", size = 16391057 }, + { url = "https://files.pythonhosted.org/packages/99/7b/85cef6a3ae1b19542b7afd97d0b296526b6ef9e3c43ea0c4d9c4404fb2d0/numpy-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:31b89fa67a8042e96715c68e071a1200c4e172f93b0fbe01a14c0ff3ff820fc8", size = 15556943 }, + { url = "https://files.pythonhosted.org/packages/69/7e/b83cc884c3508e91af78760f6b17ab46ad649831b1fa35acb3eb26d9e6d2/numpy-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c86e2a209199ead7ee0af65e1d9992d1dce7e1f63c4b9a616500f93820658d0", size = 18180785 }, + { url = "https://files.pythonhosted.org/packages/b2/9f/eb4a9a38867de059dcd4b6e18d47c3867fbd3795d4c9557bb49278f94087/numpy-2.2.1-cp311-cp311-win32.whl", hash = "sha256:b34d87e8a3090ea626003f87f9392b3929a7bbf4104a05b6667348b6bd4bf1cd", size = 6568983 }, + { url = "https://files.pythonhosted.org/packages/6d/1e/be3b9f3073da2f8c7fa361fcdc231b548266b0781029fdbaf75eeab997fd/numpy-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:360137f8fb1b753c5cde3ac388597ad680eccbbbb3865ab65efea062c4a1fd16", size = 12917260 }, + { url = "https://files.pythonhosted.org/packages/62/12/b928871c570d4a87ab13d2cc19f8817f17e340d5481621930e76b80ffb7d/numpy-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:694f9e921a0c8f252980e85bce61ebbd07ed2b7d4fa72d0e4246f2f8aa6642ab", size = 20909861 }, + { url = "https://files.pythonhosted.org/packages/3d/c3/59df91ae1d8ad7c5e03efd63fd785dec62d96b0fe56d1f9ab600b55009af/numpy-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3683a8d166f2692664262fd4900f207791d005fb088d7fdb973cc8d663626faa", size = 14095776 }, + { url = "https://files.pythonhosted.org/packages/af/4e/8ed5868efc8e601fb69419644a280e9c482b75691466b73bfaab7d86922c/numpy-2.2.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:780077d95eafc2ccc3ced969db22377b3864e5b9a0ea5eb347cc93b3ea900315", size = 5126239 }, + { url = "https://files.pythonhosted.org/packages/1a/74/dd0bbe650d7bc0014b051f092f2de65e34a8155aabb1287698919d124d7f/numpy-2.2.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:55ba24ebe208344aa7a00e4482f65742969a039c2acfcb910bc6fcd776eb4355", size = 6659296 }, + { url = "https://files.pythonhosted.org/packages/7f/11/4ebd7a3f4a655764dc98481f97bd0a662fb340d1001be6050606be13e162/numpy-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b1d07b53b78bf84a96898c1bc139ad7f10fda7423f5fd158fd0f47ec5e01ac7", size = 14047121 }, + { url = "https://files.pythonhosted.org/packages/7f/a7/c1f1d978166eb6b98ad009503e4d93a8c1962d0eb14a885c352ee0276a54/numpy-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5062dc1a4e32a10dc2b8b13cedd58988261416e811c1dc4dbdea4f57eea61b0d", size = 16096599 }, + { url = "https://files.pythonhosted.org/packages/3d/6d/0e22afd5fcbb4d8d0091f3f46bf4e8906399c458d4293da23292c0ba5022/numpy-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fce4f615f8ca31b2e61aa0eb5865a21e14f5629515c9151850aa936c02a1ee51", size = 15243932 }, + { url = "https://files.pythonhosted.org/packages/03/39/e4e5832820131ba424092b9610d996b37e5557180f8e2d6aebb05c31ae54/numpy-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:67d4cda6fa6ffa073b08c8372aa5fa767ceb10c9a0587c707505a6d426f4e046", size = 17861032 }, + { url = "https://files.pythonhosted.org/packages/5f/8a/3794313acbf5e70df2d5c7d2aba8718676f8d054a05abe59e48417fb2981/numpy-2.2.1-cp312-cp312-win32.whl", hash = "sha256:32cb94448be47c500d2c7a95f93e2f21a01f1fd05dd2beea1ccd049bb6001cd2", size = 6274018 }, + { url = "https://files.pythonhosted.org/packages/17/c1/c31d3637f2641e25c7a19adf2ae822fdaf4ddd198b05d79a92a9ce7cb63e/numpy-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:ba5511d8f31c033a5fcbda22dd5c813630af98c70b2661f2d2c654ae3cdfcfc8", size = 12613843 }, + { url = "https://files.pythonhosted.org/packages/20/d6/91a26e671c396e0c10e327b763485ee295f5a5a7a48c553f18417e5a0ed5/numpy-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1d09e520217618e76396377c81fba6f290d5f926f50c35f3a5f72b01a0da780", size = 20896464 }, + { url = "https://files.pythonhosted.org/packages/8c/40/5792ccccd91d45e87d9e00033abc4f6ca8a828467b193f711139ff1f1cd9/numpy-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3ecc47cd7f6ea0336042be87d9e7da378e5c7e9b3c8ad0f7c966f714fc10d821", size = 14111350 }, + { url = "https://files.pythonhosted.org/packages/c0/2a/fb0a27f846cb857cef0c4c92bef89f133a3a1abb4e16bba1c4dace2e9b49/numpy-2.2.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f419290bc8968a46c4933158c91a0012b7a99bb2e465d5ef5293879742f8797e", size = 5111629 }, + { url = "https://files.pythonhosted.org/packages/eb/e5/8e81bb9d84db88b047baf4e8b681a3e48d6390bc4d4e4453eca428ecbb49/numpy-2.2.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b6c390bfaef8c45a260554888966618328d30e72173697e5cabe6b285fb2348", size = 6645865 }, + { url = "https://files.pythonhosted.org/packages/7a/1a/a90ceb191dd2f9e2897c69dde93ccc2d57dd21ce2acbd7b0333e8eea4e8d/numpy-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:526fc406ab991a340744aad7e25251dd47a6720a685fa3331e5c59fef5282a59", size = 14043508 }, + { url = "https://files.pythonhosted.org/packages/f1/5a/e572284c86a59dec0871a49cd4e5351e20b9c751399d5f1d79628c0542cb/numpy-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74e6fdeb9a265624ec3a3918430205dff1df7e95a230779746a6af78bc615af", size = 16094100 }, + { url = "https://files.pythonhosted.org/packages/0c/2c/a79d24f364788386d85899dd280a94f30b0950be4b4a545f4fa4ed1d4ca7/numpy-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:53c09385ff0b72ba79d8715683c1168c12e0b6e84fb0372e97553d1ea91efe51", size = 15239691 }, + { url = "https://files.pythonhosted.org/packages/cf/79/1e20fd1c9ce5a932111f964b544facc5bb9bde7865f5b42f00b4a6a9192b/numpy-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3eac17d9ec51be534685ba877b6ab5edc3ab7ec95c8f163e5d7b39859524716", size = 17856571 }, + { url = "https://files.pythonhosted.org/packages/be/5b/cc155e107f75d694f562bdc84a26cc930569f3dfdfbccb3420b626065777/numpy-2.2.1-cp313-cp313-win32.whl", hash = "sha256:9ad014faa93dbb52c80d8f4d3dcf855865c876c9660cb9bd7553843dd03a4b1e", size = 6270841 }, + { url = "https://files.pythonhosted.org/packages/44/be/0e5cd009d2162e4138d79a5afb3b5d2341f0fe4777ab6e675aa3d4a42e21/numpy-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:164a829b6aacf79ca47ba4814b130c4020b202522a93d7bff2202bfb33b61c60", size = 12606618 }, + { url = "https://files.pythonhosted.org/packages/a8/87/04ddf02dd86fb17c7485a5f87b605c4437966d53de1e3745d450343a6f56/numpy-2.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4dfda918a13cc4f81e9118dea249e192ab167a0bb1966272d5503e39234d694e", size = 20921004 }, + { url = "https://files.pythonhosted.org/packages/6e/3e/d0e9e32ab14005425d180ef950badf31b862f3839c5b927796648b11f88a/numpy-2.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:733585f9f4b62e9b3528dd1070ec4f52b8acf64215b60a845fa13ebd73cd0712", size = 14119910 }, + { url = "https://files.pythonhosted.org/packages/b5/5b/aa2d1905b04a8fb681e08742bb79a7bddfc160c7ce8e1ff6d5c821be0236/numpy-2.2.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:89b16a18e7bba224ce5114db863e7029803c179979e1af6ad6a6b11f70545008", size = 5153612 }, + { url = "https://files.pythonhosted.org/packages/ce/35/6831808028df0648d9b43c5df7e1051129aa0d562525bacb70019c5f5030/numpy-2.2.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:676f4eebf6b2d430300f1f4f4c2461685f8269f94c89698d832cdf9277f30b84", size = 6668401 }, + { url = "https://files.pythonhosted.org/packages/b1/38/10ef509ad63a5946cc042f98d838daebfe7eaf45b9daaf13df2086b15ff9/numpy-2.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f5cdf9f493b35f7e41e8368e7d7b4bbafaf9660cba53fb21d2cd174ec09631", size = 14014198 }, + { url = "https://files.pythonhosted.org/packages/df/f8/c80968ae01df23e249ee0a4487fae55a4c0fe2f838dfe9cc907aa8aea0fa/numpy-2.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1ad395cf254c4fbb5b2132fee391f361a6e8c1adbd28f2cd8e79308a615fe9d", size = 16076211 }, + { url = "https://files.pythonhosted.org/packages/09/69/05c169376016a0b614b432967ac46ff14269eaffab80040ec03ae1ae8e2c/numpy-2.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:08ef779aed40dbc52729d6ffe7dd51df85796a702afbf68a4f4e41fafdc8bda5", size = 15220266 }, + { url = "https://files.pythonhosted.org/packages/f1/ff/94a4ce67ea909f41cf7ea712aebbe832dc67decad22944a1020bb398a5ee/numpy-2.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:26c9c4382b19fcfbbed3238a14abf7ff223890ea1936b8890f058e7ba35e8d71", size = 17852844 }, + { url = "https://files.pythonhosted.org/packages/46/72/8a5dbce4020dfc595592333ef2fbb0a187d084ca243b67766d29d03e0096/numpy-2.2.1-cp313-cp313t-win32.whl", hash = "sha256:93cf4e045bae74c90ca833cba583c14b62cb4ba2cba0abd2b141ab52548247e2", size = 6326007 }, + { url = "https://files.pythonhosted.org/packages/7b/9c/4fce9cf39dde2562584e4cfd351a0140240f82c0e3569ce25a250f47037d/numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268", size = 12693107 }, + { url = "https://files.pythonhosted.org/packages/f1/65/d36a76b811ffe0a4515e290cb05cb0e22171b1b0f0db6bee9141cf023545/numpy-2.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7ba9cc93a91d86365a5d270dee221fdc04fb68d7478e6bf6af650de78a8339e3", size = 21044672 }, + { url = "https://files.pythonhosted.org/packages/aa/3f/b644199f165063154df486d95198d814578f13dd4d8c1651e075bf1cb8af/numpy-2.2.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3d03883435a19794e41f147612a77a8f56d4e52822337844fff3d4040a142964", size = 6789873 }, + { url = "https://files.pythonhosted.org/packages/d7/df/2adb0bb98a3cbe8a6c3c6d1019aede1f1d8b83927ced228a46cc56c7a206/numpy-2.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4511d9e6071452b944207c8ce46ad2f897307910b402ea5fa975da32e0102800", size = 16194933 }, + { url = "https://files.pythonhosted.org/packages/13/3e/1959d5219a9e6d200638d924cedda6a606392f7186a4ed56478252e70d55/numpy-2.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5c5cc0cbabe9452038ed984d05ac87910f89370b9242371bd9079cb4af61811e", size = 12820057 }, +] + +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + +[[package]] +name = "pandas" +version = "2.0.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "python-dateutil", marker = "python_full_version < '3.9'" }, + { name = "pytz", marker = "python_full_version < '3.9'" }, + { name = "tzdata", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908 }, + { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486 }, + { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897 }, + { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421 }, + { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792 }, + { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333 }, + { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672 }, + { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229 }, + { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591 }, + { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370 }, + { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935 }, + { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692 }, + { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303 }, + { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932 }, + { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018 }, + { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723 }, + { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403 }, + { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638 }, + { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160 }, + { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752 }, + { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852 }, + { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496 }, + { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766 }, + { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902 }, +] + +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "python-dateutil", marker = "python_full_version >= '3.9'" }, + { name = "pytz", marker = "python_full_version >= '3.9'" }, + { name = "tzdata", marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827 }, + { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897 }, + { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908 }, + { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210 }, + { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292 }, + { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379 }, + { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471 }, + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222 }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274 }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836 }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505 }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420 }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457 }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166 }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445 }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235 }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756 }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643 }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573 }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085 }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809 }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316 }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055 }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175 }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650 }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526 }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013 }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620 }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 }, + { url = "https://files.pythonhosted.org/packages/ca/8c/8848a4c9b8fdf5a534fe2077af948bf53cd713d77ffbcd7bd15710348fd7/pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39", size = 12595535 }, + { url = "https://files.pythonhosted.org/packages/9c/b9/5cead4f63b6d31bdefeb21a679bc5a7f4aaf262ca7e07e2bc1c341b68470/pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30", size = 11319822 }, + { url = "https://files.pythonhosted.org/packages/31/af/89e35619fb573366fa68dc26dad6ad2c08c17b8004aad6d98f1a31ce4bb3/pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c", size = 15625439 }, + { url = "https://files.pythonhosted.org/packages/3d/dd/bed19c2974296661493d7acc4407b1d2db4e2a482197df100f8f965b6225/pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c", size = 13068928 }, + { url = "https://files.pythonhosted.org/packages/31/a3/18508e10a31ea108d746c848b5a05c0711e0278fa0d6f1c52a8ec52b80a5/pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea", size = 16783266 }, + { url = "https://files.pythonhosted.org/packages/c4/a5/3429bd13d82bebc78f4d78c3945efedef63a7cd0c15c17b2eeb838d1121f/pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761", size = 14450871 }, + { url = "https://files.pythonhosted.org/packages/2f/49/5c30646e96c684570925b772eac4eb0a8cb0ca590fa978f56c5d3ae73ea1/pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e", size = 11618011 }, +] + +[[package]] +name = "parso" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 }, +] + +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, +] + +[[package]] +name = "pickleshare" +version = "0.7.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/b6/df3c1c9b616e9c0edbc4fbab6ddd09df9535849c64ba51fcb6531c32d4d8/pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", size = 6161 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/41/220f49aaea88bc6fa6cba8d05ecf24676326156c23b991e80b3f2fc24c77/pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56", size = 6877 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.48" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2d/4f/feb5e137aff82f7c7f3248267b97451da3644f6cdc218edfe549fb354127/prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90", size = 424684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/6a/fd08d94654f7e67c52ca30523a178b3f8ccc4237fce4be90d39c938a831a/prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e", size = 386595 }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, +] + +[[package]] +name = "pyarrow" +version = "17.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/27/4e/ea6d43f324169f8aec0e57569443a38bab4b398d09769ca64f7b4d467de3/pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", size = 1112479 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/5d/78d4b040bc5ff2fc6c3d03e80fca396b742f6c125b8af06bcf7427f931bc/pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", size = 28994846 }, + { url = "https://files.pythonhosted.org/packages/3b/73/8ed168db7642e91180330e4ea9f3ff8bab404678f00d32d7df0871a4933b/pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", size = 27165908 }, + { url = "https://files.pythonhosted.org/packages/81/36/e78c24be99242063f6d0590ef68c857ea07bdea470242c361e9a15bd57a4/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", size = 39264209 }, + { url = "https://files.pythonhosted.org/packages/18/4c/3db637d7578f683b0a8fb8999b436bdbedd6e3517bd4f90c70853cf3ad20/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", size = 39862883 }, + { url = "https://files.pythonhosted.org/packages/81/3c/0580626896c842614a523e66b351181ed5bb14e5dfc263cd68cea2c46d90/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8", size = 38723009 }, + { url = "https://files.pythonhosted.org/packages/ee/fb/c1b47f0ada36d856a352da261a44d7344d8f22e2f7db3945f8c3b81be5dd/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", size = 39855626 }, + { url = "https://files.pythonhosted.org/packages/19/09/b0a02908180a25d57312ab5919069c39fddf30602568980419f4b02393f6/pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", size = 25147242 }, + { url = "https://files.pythonhosted.org/packages/f9/46/ce89f87c2936f5bb9d879473b9663ce7a4b1f4359acc2f0eb39865eaa1af/pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", size = 29028748 }, + { url = "https://files.pythonhosted.org/packages/8d/8e/ce2e9b2146de422f6638333c01903140e9ada244a2a477918a368306c64c/pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", size = 27190965 }, + { url = "https://files.pythonhosted.org/packages/3b/c8/5675719570eb1acd809481c6d64e2136ffb340bc387f4ca62dce79516cea/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", size = 39269081 }, + { url = "https://files.pythonhosted.org/packages/5e/78/3931194f16ab681ebb87ad252e7b8d2c8b23dad49706cadc865dff4a1dd3/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", size = 39864921 }, + { url = "https://files.pythonhosted.org/packages/d8/81/69b6606093363f55a2a574c018901c40952d4e902e670656d18213c71ad7/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", size = 38740798 }, + { url = "https://files.pythonhosted.org/packages/4c/21/9ca93b84b92ef927814cb7ba37f0774a484c849d58f0b692b16af8eebcfb/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", size = 39871877 }, + { url = "https://files.pythonhosted.org/packages/30/d1/63a7c248432c71c7d3ee803e706590a0b81ce1a8d2b2ae49677774b813bb/pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", size = 25151089 }, + { url = "https://files.pythonhosted.org/packages/d4/62/ce6ac1275a432b4a27c55fe96c58147f111d8ba1ad800a112d31859fae2f/pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", size = 29019418 }, + { url = "https://files.pythonhosted.org/packages/8e/0a/dbd0c134e7a0c30bea439675cc120012337202e5fac7163ba839aa3691d2/pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", size = 27152197 }, + { url = "https://files.pythonhosted.org/packages/cb/05/3f4a16498349db79090767620d6dc23c1ec0c658a668d61d76b87706c65d/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", size = 39263026 }, + { url = "https://files.pythonhosted.org/packages/c2/0c/ea2107236740be8fa0e0d4a293a095c9f43546a2465bb7df34eee9126b09/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", size = 39880798 }, + { url = "https://files.pythonhosted.org/packages/f6/b0/b9164a8bc495083c10c281cc65064553ec87b7537d6f742a89d5953a2a3e/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", size = 38715172 }, + { url = "https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", size = 39874508 }, + { url = "https://files.pythonhosted.org/packages/ae/49/baafe2a964f663413be3bd1cf5c45ed98c5e42e804e2328e18f4570027c1/pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", size = 25099235 }, + { url = "https://files.pythonhosted.org/packages/8d/bd/8f52c1d7b430260f80a349cffa2df351750a737b5336313d56dcadeb9ae1/pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", size = 28999345 }, + { url = "https://files.pythonhosted.org/packages/64/d9/51e35550f2f18b8815a2ab25948f735434db32000c0e91eba3a32634782a/pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", size = 27168441 }, + { url = "https://files.pythonhosted.org/packages/18/d8/7161d87d07ea51be70c49f615004c1446d5723622a18b2681f7e4b71bf6e/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", size = 39363163 }, + { url = "https://files.pythonhosted.org/packages/3f/08/bc497130789833de09e345e3ce4647e3ce86517c4f70f2144f0367ca378b/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", size = 39965253 }, + { url = "https://files.pythonhosted.org/packages/d3/2e/493dd7db889402b4c7871ca7dfdd20f2c5deedbff802d3eb8576359930f9/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", size = 38805378 }, + { url = "https://files.pythonhosted.org/packages/e6/c1/4c6bcdf7a820034aa91a8b4d25fef38809be79b42ca7aaa16d4680b0bbac/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", size = 39958364 }, + { url = "https://files.pythonhosted.org/packages/d1/db/42ac644453cfdfc60fe002b46d647fe7a6dfad753ef7b28e99b4c936ad5d/pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", size = 25229211 }, + { url = "https://files.pythonhosted.org/packages/43/e0/a898096d35be240aa61fb2d54db58b86d664b10e1e51256f9300f47565e8/pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", size = 29007881 }, + { url = "https://files.pythonhosted.org/packages/59/22/f7d14907ed0697b5dd488d393129f2738629fa5bcba863e00931b7975946/pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", size = 27178117 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/661211feac0ed48467b1d5c57298c91403809ec3ab78b1d175e1d6ad03cf/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", size = 39273896 }, + { url = "https://files.pythonhosted.org/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", size = 39875438 }, + { url = "https://files.pythonhosted.org/packages/75/63/29d1bfcc57af73cde3fc3baccab2f37548de512dbe0ab294b033cd203516/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", size = 38735092 }, + { url = "https://files.pythonhosted.org/packages/39/f4/90258b4de753df7cc61cefb0312f8abcf226672e96cc64996e66afce817a/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", size = 39867610 }, + { url = "https://files.pythonhosted.org/packages/e7/f6/b75d4816c32f1618ed31a005ee635dd1d91d8164495d94f2ea092f594661/pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", size = 25148611 }, +] + +[[package]] +name = "pyarrow" +version = "18.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size = 29531620 }, + { url = "https://files.pythonhosted.org/packages/30/90/893acfad917533b624a97b9e498c0e8393908508a0a72d624fe935e632bf/pyarrow-18.1.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4", size = 30836521 }, + { url = "https://files.pythonhosted.org/packages/a3/2a/526545a7464b5fb2fa6e2c4bad16ca90e59e1843025c534fd907b7f73e5a/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b", size = 39213905 }, + { url = "https://files.pythonhosted.org/packages/8a/77/4b3fab91a30e19e233e738d0c5eca5a8f6dd05758bc349a2ca262c65de79/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71", size = 40128881 }, + { url = "https://files.pythonhosted.org/packages/aa/e2/a88e16c5e45e562449c52305bd3bc2f9d704295322d3434656e7ccac1444/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470", size = 38627517 }, + { url = "https://files.pythonhosted.org/packages/6d/84/8037c20005ccc7b869726465be0957bd9c29cfc88612962030f08292ad06/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56", size = 40060187 }, + { url = "https://files.pythonhosted.org/packages/2a/38/d6435c723ff73df8ae74626ea778262fbcc2b9b0d1a4f3db915b61711b05/pyarrow-18.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812", size = 25118314 }, + { url = "https://files.pythonhosted.org/packages/9e/4d/a4988e7d82f4fbc797715db4185939a658eeffb07a25bab7262bed1ea076/pyarrow-18.1.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854", size = 29554860 }, + { url = "https://files.pythonhosted.org/packages/59/03/3a42c5c1e4bd4c900ab62aa1ff6b472bdb159ba8f1c3e5deadab7222244f/pyarrow-18.1.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c", size = 30867076 }, + { url = "https://files.pythonhosted.org/packages/75/7e/332055ac913373e89256dce9d14b7708f55f7bd5be631456c897f0237738/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21", size = 39212135 }, + { url = "https://files.pythonhosted.org/packages/8c/64/5099cdb325828722ef7ffeba9a4696f238eb0cdeae227f831c2d77fcf1bd/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6", size = 40125195 }, + { url = "https://files.pythonhosted.org/packages/83/88/1938d783727db1b178ff71bc6a6143d7939e406db83a9ec23cad3dad325c/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe", size = 38641884 }, + { url = "https://files.pythonhosted.org/packages/5e/b5/9e14e9f7590e0eaa435ecea84dabb137284a4dbba7b3c337b58b65b76d95/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0", size = 40076877 }, + { url = "https://files.pythonhosted.org/packages/4d/a3/817ac7fe0891a2d66e247e223080f3a6a262d8aefd77e11e8c27e6acf4e1/pyarrow-18.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a", size = 25119811 }, + { url = "https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size = 29514620 }, + { url = "https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size = 30856494 }, + { url = "https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size = 39203624 }, + { url = "https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size = 40139341 }, + { url = "https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size = 38618629 }, + { url = "https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size = 40078661 }, + { url = "https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size = 25092330 }, + { url = "https://files.pythonhosted.org/packages/cb/87/aa4d249732edef6ad88899399047d7e49311a55749d3c373007d034ee471/pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b", size = 29497406 }, + { url = "https://files.pythonhosted.org/packages/3c/c7/ed6adb46d93a3177540e228b5ca30d99fc8ea3b13bdb88b6f8b6467e2cb7/pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2", size = 30835095 }, + { url = "https://files.pythonhosted.org/packages/41/d7/ed85001edfb96200ff606943cff71d64f91926ab42828676c0fc0db98963/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191", size = 39194527 }, + { url = "https://files.pythonhosted.org/packages/59/16/35e28eab126342fa391593415d79477e89582de411bb95232f28b131a769/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa", size = 40131443 }, + { url = "https://files.pythonhosted.org/packages/0c/95/e855880614c8da20f4cd74fa85d7268c725cf0013dc754048593a38896a0/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c", size = 38608750 }, + { url = "https://files.pythonhosted.org/packages/54/9d/f253554b1457d4fdb3831b7bd5f8f00f1795585a606eabf6fec0a58a9c38/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c", size = 40066690 }, + { url = "https://files.pythonhosted.org/packages/2f/58/8912a2563e6b8273e8aa7b605a345bba5a06204549826f6493065575ebc0/pyarrow-18.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181", size = 25081054 }, + { url = "https://files.pythonhosted.org/packages/82/f9/d06ddc06cab1ada0c2f2fd205ac8c25c2701182de1b9c4bf7a0a44844431/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc", size = 29525542 }, + { url = "https://files.pythonhosted.org/packages/ab/94/8917e3b961810587ecbdaa417f8ebac0abb25105ae667b7aa11c05876976/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386", size = 30829412 }, + { url = "https://files.pythonhosted.org/packages/5e/e3/3b16c3190f3d71d3b10f6758d2d5f7779ef008c4fd367cedab3ed178a9f7/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324", size = 39119106 }, + { url = "https://files.pythonhosted.org/packages/1d/d6/5d704b0d25c3c79532f8c0639f253ec2803b897100f64bcb3f53ced236e5/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8", size = 40090940 }, + { url = "https://files.pythonhosted.org/packages/37/29/366bc7e588220d74ec00e497ac6710c2833c9176f0372fe0286929b2d64c/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9", size = 38548177 }, + { url = "https://files.pythonhosted.org/packages/c8/11/fabf6ecabb1fe5b7d96889228ca2a9158c4c3bb732e3b8ee3f7f6d40b703/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba", size = 40043567 }, + { url = "https://files.pythonhosted.org/packages/fd/9b/60516e3876ec6f25b0909afa70f90a15de83b48c7c0d8042fac4e64c4411/pyarrow-18.1.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:0b331e477e40f07238adc7ba7469c36b908f07c89b95dd4bd3a0ec84a3d1e21e", size = 29543752 }, + { url = "https://files.pythonhosted.org/packages/14/a7/bd08b6f1a2bd2e71dc6bb0451fc1872607e44c83daf1ee63c82764a2d233/pyarrow-18.1.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c4dd0c9010a25ba03e198fe743b1cc03cd33c08190afff371749c52ccbbaf76", size = 30850753 }, + { url = "https://files.pythonhosted.org/packages/84/c9/62ef9c6281c0e5b4ee1afa9d7bd556e72e06da6706b7906c32c15e69b3d6/pyarrow-18.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f97b31b4c4e21ff58c6f330235ff893cc81e23da081b1a4b1c982075e0ed4e9", size = 39226870 }, + { url = "https://files.pythonhosted.org/packages/b2/99/a6e89e71655a38475e76b060777c8bf69c078b772bec3b7daf7361440f05/pyarrow-18.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a4813cb8ecf1809871fd2d64a8eff740a1bd3691bbe55f01a3cf6c5ec869754", size = 40139114 }, + { url = "https://files.pythonhosted.org/packages/64/a9/06d79923890682e4fe7a16524abee307407008a413115354aaf3226b8410/pyarrow-18.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:05a5636ec3eb5cc2a36c6edb534a38ef57b2ab127292a716d00eabb887835f1e", size = 38639231 }, + { url = "https://files.pythonhosted.org/packages/3b/8c/4c3ed19026a00740b81fe1c87f3ff235b2763a0a1ddf5711a9d026b775ce/pyarrow-18.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:73eeed32e724ea3568bb06161cad5fa7751e45bc2228e33dcb10c614044165c7", size = 40070949 }, + { url = "https://files.pythonhosted.org/packages/87/d8/94161a7ca5c55199484e926165e9e33f318ea1d1b0d7cdbcbc3652b933ec/pyarrow-18.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052", size = 25301373 }, +] + +[[package]] +name = "pydata-sphinx-theme" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/d6/3921de802cf1ee771f0e76c9068b52498aeb8eeec6b830ff931c81c7ecf3/pydata_sphinx_theme-0.8.0.tar.gz", hash = "sha256:9f72015d9c572ea92e3007ab221a8325767c426783b6b9941813e65fa988dc90", size = 1123746 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/26/0694318d46c7d90ab602ae27b24431e939f1600f9a4c69d1e727ec57289f/pydata_sphinx_theme-0.8.0-py3-none-any.whl", hash = "sha256:fbcbb833a07d3ad8dd997dd40dc94da18d98b41c68123ab0182b58fe92271204", size = 3284997 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "pytest" +version = "8.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pytz" +version = "2024.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/31/3c70bf7603cc2dca0f19bdc53b4537a797747a58875b552c8c413d963a3f/pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a", size = 319692 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199 }, + { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758 }, + { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463 }, + { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280 }, + { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239 }, + { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802 }, + { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527 }, + { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052 }, + { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774 }, + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, + { url = "https://files.pythonhosted.org/packages/74/d9/323a59d506f12f498c2097488d80d16f4cf965cee1791eab58b56b19f47a/PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", size = 183218 }, + { url = "https://files.pythonhosted.org/packages/74/cc/20c34d00f04d785f2028737e2e2a8254e1425102e730fee1d6396f832577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", size = 728067 }, + { url = "https://files.pythonhosted.org/packages/20/52/551c69ca1501d21c0de51ddafa8c23a0191ef296ff098e98358f69080577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", size = 757812 }, + { url = "https://files.pythonhosted.org/packages/fd/7f/2c3697bba5d4aa5cc2afe81826d73dfae5f049458e44732c7a0938baa673/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", size = 746531 }, + { url = "https://files.pythonhosted.org/packages/8c/ab/6226d3df99900e580091bb44258fde77a8433511a86883bd4681ea19a858/PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", size = 800820 }, + { url = "https://files.pythonhosted.org/packages/a0/99/a9eb0f3e710c06c5d922026f6736e920d431812ace24aae38228d0d64b04/PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", size = 145514 }, + { url = "https://files.pythonhosted.org/packages/75/8a/ee831ad5fafa4431099aa4e078d4c8efd43cd5e48fbc774641d233b683a9/PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", size = 162702 }, + { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777 }, + { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318 }, + { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 }, + { url = "https://files.pythonhosted.org/packages/e9/6c/6e1b7f40181bc4805e2e07f4abc10a88ce4648e7e95ff1abe4ae4014a9b2/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", size = 722614 }, + { url = "https://files.pythonhosted.org/packages/3d/32/e7bd8535d22ea2874cef6a81021ba019474ace0d13a4819c2a4bce79bd6a/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", size = 737360 }, + { url = "https://files.pythonhosted.org/packages/d7/12/7322c1e30b9be969670b672573d45479edef72c9a0deac3bb2868f5d7469/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", size = 699006 }, + { url = "https://files.pythonhosted.org/packages/82/72/04fcad41ca56491995076630c3ec1e834be241664c0c09a64c9a2589b507/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", size = 723577 }, + { url = "https://files.pythonhosted.org/packages/ed/5e/46168b1f2757f1fcd442bc3029cd8767d88a98c9c05770d8b420948743bb/PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", size = 144593 }, + { url = "https://files.pythonhosted.org/packages/19/87/5124b1c1f2412bb95c59ec481eaf936cd32f0fe2a7b16b97b81c4c017a6a/PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", size = 162312 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "ruff" +version = "0.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/67/3e/e89f736f01aa9517a97e2e7e0ce8d34a4d8207087b3cfdec95133fee13b5/ruff-0.9.1.tar.gz", hash = "sha256:fd2b25ecaf907d6458fa842675382c8597b3c746a2dde6717fe3415425df0c17", size = 3498844 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/05/c3a2e0feb3d5d394cdfd552de01df9d3ec8a3a3771bbff247fab7e668653/ruff-0.9.1-py3-none-linux_armv6l.whl", hash = "sha256:84330dda7abcc270e6055551aca93fdde1b0685fc4fd358f26410f9349cf1743", size = 10645241 }, + { url = "https://files.pythonhosted.org/packages/dd/da/59f0a40e5f88ee5c054ad175caaa2319fc96571e1d29ab4730728f2aad4f/ruff-0.9.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3cae39ba5d137054b0e5b472aee3b78a7c884e61591b100aeb544bcd1fc38d4f", size = 10391066 }, + { url = "https://files.pythonhosted.org/packages/b7/fe/85e1c1acf0ba04a3f2d54ae61073da030f7a5dc386194f96f3c6ca444a78/ruff-0.9.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:50c647ff96f4ba288db0ad87048257753733763b409b2faf2ea78b45c8bb7fcb", size = 10012308 }, + { url = "https://files.pythonhosted.org/packages/6f/9b/780aa5d4bdca8dcea4309264b8faa304bac30e1ce0bcc910422bfcadd203/ruff-0.9.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0c8b149e9c7353cace7d698e1656ffcf1e36e50f8ea3b5d5f7f87ff9986a7ca", size = 10881960 }, + { url = "https://files.pythonhosted.org/packages/12/f4/dac4361afbfe520afa7186439e8094e4884ae3b15c8fc75fb2e759c1f267/ruff-0.9.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:beb3298604540c884d8b282fe7625651378e1986c25df51dec5b2f60cafc31ce", size = 10414803 }, + { url = "https://files.pythonhosted.org/packages/f0/a2/057a3cb7999513cb78d6cb33a7d1cc6401c82d7332583786e4dad9e38e44/ruff-0.9.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39d0174ccc45c439093971cc06ed3ac4dc545f5e8bdacf9f067adf879544d969", size = 11464929 }, + { url = "https://files.pythonhosted.org/packages/eb/c6/1ccfcc209bee465ced4874dcfeaadc88aafcc1ea9c9f31ef66f063c187f0/ruff-0.9.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:69572926c0f0c9912288915214ca9b2809525ea263603370b9e00bed2ba56dbd", size = 12170717 }, + { url = "https://files.pythonhosted.org/packages/84/97/4a524027518525c7cf6931e9fd3b2382be5e4b75b2b61bec02681a7685a5/ruff-0.9.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:937267afce0c9170d6d29f01fcd1f4378172dec6760a9f4dface48cdabf9610a", size = 11708921 }, + { url = "https://files.pythonhosted.org/packages/a6/a4/4e77cf6065c700d5593b25fca6cf725b1ab6d70674904f876254d0112ed0/ruff-0.9.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:186c2313de946f2c22bdf5954b8dd083e124bcfb685732cfb0beae0c47233d9b", size = 13058074 }, + { url = "https://files.pythonhosted.org/packages/f9/d6/fcb78e0531e863d0a952c4c5600cc5cd317437f0e5f031cd2288b117bb37/ruff-0.9.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f94942a3bb767675d9a051867c036655fe9f6c8a491539156a6f7e6b5f31831", size = 11281093 }, + { url = "https://files.pythonhosted.org/packages/e4/3b/7235bbeff00c95dc2d073cfdbf2b871b5bbf476754c5d277815d286b4328/ruff-0.9.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:728d791b769cc28c05f12c280f99e8896932e9833fef1dd8756a6af2261fd1ab", size = 10882610 }, + { url = "https://files.pythonhosted.org/packages/2a/66/5599d23257c61cf038137f82999ca8f9d0080d9d5134440a461bef85b461/ruff-0.9.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2f312c86fb40c5c02b44a29a750ee3b21002bd813b5233facdaf63a51d9a85e1", size = 10489273 }, + { url = "https://files.pythonhosted.org/packages/78/85/de4aa057e2532db0f9761e2c2c13834991e087787b93e4aeb5f1cb10d2df/ruff-0.9.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ae017c3a29bee341ba584f3823f805abbe5fe9cd97f87ed07ecbf533c4c88366", size = 11003314 }, + { url = "https://files.pythonhosted.org/packages/00/42/afedcaa089116d81447347f76041ff46025849fedb0ed2b187d24cf70fca/ruff-0.9.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5dc40a378a0e21b4cfe2b8a0f1812a6572fc7b230ef12cd9fac9161aa91d807f", size = 11342982 }, + { url = "https://files.pythonhosted.org/packages/39/c6/fe45f3eb27e3948b41a305d8b768e949bf6a39310e9df73f6c576d7f1d9f/ruff-0.9.1-py3-none-win32.whl", hash = "sha256:46ebf5cc106cf7e7378ca3c28ce4293b61b449cd121b98699be727d40b79ba72", size = 8819750 }, + { url = "https://files.pythonhosted.org/packages/38/8d/580db77c3b9d5c3d9479e55b0b832d279c30c8f00ab0190d4cd8fc67831c/ruff-0.9.1-py3-none-win_amd64.whl", hash = "sha256:342a824b46ddbcdddd3abfbb332fa7fcaac5488bf18073e841236aadf4ad5c19", size = 9701331 }, + { url = "https://files.pythonhosted.org/packages/b2/94/0498cdb7316ed67a1928300dd87d659c933479f44dec51b4f62bfd1f8028/ruff-0.9.1-py3-none-win_arm64.whl", hash = "sha256:1cd76c7f9c679e6e8f2af8f778367dca82b95009bc7b1a85a47f1521ae524fa7", size = 9145708 }, +] + +[[package]] +name = "setuptools" +version = "75.3.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 }, +] + +[[package]] +name = "setuptools" +version = "75.8.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/7b/af302bebf22c749c56c9c3e8ae13190b5b5db37a33d9068652e8f73b7089/snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1", size = 86699 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a", size = 93002 }, +] + +[[package]] +name = "soupsieve" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, +] + +[[package]] +name = "sphinx" +version = "7.1.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "alabaster", version = "0.7.13", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "babel", marker = "python_full_version < '3.9'" }, + { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, + { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "imagesize", marker = "python_full_version < '3.9'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.9'" }, + { name = "jinja2", marker = "python_full_version < '3.9'" }, + { name = "packaging", marker = "python_full_version < '3.9'" }, + { name = "pygments", marker = "python_full_version < '3.9'" }, + { name = "requests", marker = "python_full_version < '3.9'" }, + { name = "snowballstemmer", marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-applehelp", version = "1.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-devhelp", version = "1.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-htmlhelp", version = "2.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-qthelp", version = "1.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinxcontrib-serializinghtml", version = "1.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/01/688bdf9282241dca09fe6e3a1110eda399fa9b10d0672db609e37c2e7a39/sphinx-7.1.2.tar.gz", hash = "sha256:780f4d32f1d7d1126576e0e5ecc19dc32ab76cd24e950228dcf7b1f6d3d9e22f", size = 6828258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/17/325cf6a257d84751a48ae90752b3d8fe0be8f9535b6253add61c49d0d9bc/sphinx-7.1.2-py3-none-any.whl", hash = "sha256:d170a81825b2fcacb6dfd5a0d7f578a053e45d3f2b153fecc948c37344eb4cbe", size = 3169543 }, +] + +[[package]] +name = "sphinx" +version = "7.4.7" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +dependencies = [ + { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "babel", marker = "python_full_version == '3.9.*'" }, + { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "imagesize", marker = "python_full_version == '3.9.*'" }, + { name = "importlib-metadata", marker = "python_full_version == '3.9.*'" }, + { name = "jinja2", marker = "python_full_version == '3.9.*'" }, + { name = "packaging", marker = "python_full_version == '3.9.*'" }, + { name = "pygments", marker = "python_full_version == '3.9.*'" }, + { name = "requests", marker = "python_full_version == '3.9.*'" }, + { name = "snowballstemmer", marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "tomli", marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/ef/153f6803c5d5f8917dbb7f7fcf6d34a871ede3296fa89c2c703f5f8a6c8e/sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239", size = 3401624 }, +] + +[[package]] +name = "sphinx" +version = "8.1.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "babel", marker = "python_full_version >= '3.10'" }, + { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, + { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "imagesize", marker = "python_full_version >= '3.10'" }, + { name = "jinja2", marker = "python_full_version >= '3.10'" }, + { name = "packaging", marker = "python_full_version >= '3.10'" }, + { name = "pygments", marker = "python_full_version >= '3.10'" }, + { name = "requests", marker = "python_full_version >= '3.10'" }, + { name = "snowballstemmer", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "tomli", marker = "python_full_version == '3.10.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125 }, +] + +[[package]] +name = "sphinx-autoapi" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "astroid", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "astroid", version = "3.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "jinja2" }, + { name = "pyyaml" }, + { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "stdlib-list", version = "0.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "stdlib-list", version = "0.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4a/eb/cc243583bb1d518ca3b10998c203d919a8ed90affd4831f2b61ad09043d2/sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c", size = 29292 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/d6/f2acdc2567337fd5f5dc091a4e58d8a0fb14927b9779fc1e5ecee96d9824/sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92", size = 34095 }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/32/df/45e827f4d7e7fcc84e853bcef1d836effd762d63ccb86f43ede4e98b478c/sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e", size = 24766 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228", size = 120601 }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/98/33/dc28393f16385f722c893cb55539c641c9aaec8d1bc1c15b69ce0ac2dbb3/sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4", size = 17398 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", size = 84690 }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/47/64cff68ea3aa450c373301e5bebfbb9fce0a3e70aca245fcadd4af06cd75/sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff", size = 27967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903", size = 99833 }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, +] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/8e/c4846e59f38a5f2b4a0e3b27af38f2fcf904d4bfd82095bf92de0b114ebd/sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", size = 21658 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6", size = 90609 }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.5" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/72/835d6fadb9e5d02304cf39b18f93d227cd93abd3c41ebf58e6853eeb1455/sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952", size = 21019 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", size = 94021 }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 }, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, +] + +[[package]] +name = "stdlib-list" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/39/bb/1cdbc326a5ab0026602e0489cbf02357e78140253c4b57cd866d380eb355/stdlib_list-0.10.0.tar.gz", hash = "sha256:6519c50d645513ed287657bfe856d527f277331540691ddeaf77b25459964a14", size = 59447 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/d9/9085375f0d23a4896b307bf14dcc61b49ec8cc67cb33e06cf95bf3af3966/stdlib_list-0.10.0-py3-none-any.whl", hash = "sha256:b3a911bc441d03e0332dd1a9e7d0870ba3bb0a542a74d7524f54fb431256e214", size = 79814 }, +] + +[[package]] +name = "stdlib-list" +version = "0.11.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/04/6b37a71e92ddca16b190b7df62494ac4779d58ced4787f73584eb32c8f03/stdlib_list-0.11.0.tar.gz", hash = "sha256:b74a7b643a77a12637e907f3f62f0ab9f67300bce4014f6b2d3c8b4c8fd63c66", size = 60335 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/fe/e07300c027a868d32d8ed7a425503401e91a03ff90e7ca525c115c634ffb/stdlib_list-0.11.0-py3-none-any.whl", hash = "sha256:8bf8decfffaaf273d4cfeb5bd852b910a00dec1037dcf163576803622bccf597", size = 83617 }, +] + +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 }, +] + +[[package]] +name = "tomli" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, + { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, + { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, + { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, + { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, + { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, + { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, + { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, + { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, + { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, + { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, + { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, + { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, + { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, + { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, + { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, + { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, + { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, + { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, + { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, + { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 }, + { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 }, + { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 }, + { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 }, + { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 }, + { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 }, + { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 }, + { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 }, + { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 }, + { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, +] + +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + +[[package]] +name = "tzdata" +version = "2024.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, +] + +[[package]] +name = "urllib3" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, +] + +[[package]] +name = "zipp" +version = "3.20.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/54/bf/5c0000c44ebc80123ecbdddba1f5dcd94a5ada602a9c225d84b5aaa55e86/zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29", size = 24199 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 }, +] + +[[package]] +name = "zipp" +version = "3.21.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, +] From dfe4242ce4097a2f923939e443c6686c9d20c0af Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 22 Jan 2025 05:52:32 -0500 Subject: [PATCH 002/206] Update dependencies prior to release (#999) --- Cargo.lock | 247 ++++++++++++++++++++++++++++------------------------- Cargo.toml | 4 +- 2 files changed, 133 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 105cc30c2..5a74a4839 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91839b07e474b3995035fd8ac33ee54f9c9ccbbb1ea33d9909c71bffdf1259d" +checksum = "eaf3437355979f1e93ba84ba108c38be5767713051f3c8ffbf07c094e2e61f9f" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "855c57c4efd26722b044dcd3e348252560e3e0333087fb9f6479dc0bf744054f" +checksum = "31dce77d2985522288edae7206bffd5fc4996491841dda01a13a58415867e681" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd03279cea46569acf9295f6224fbc370c5df184b4d2ecfe97ccb131d5615a7f" +checksum = "2d45fe6d3faed0435b7313e59a02583b14c6c6339fa7729e94c32a20af319a79" dependencies = [ "ahash", "arrow-buffer", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e4a9b9b1d6d7117f6138e13bc4dd5daa7f94e671b70e8c9c4dc37b4f5ecfc16" +checksum = "2b02656a35cc103f28084bc80a0159668e0a680d919cef127bd7e0aaccb06ec1" dependencies = [ "bytes", "half", @@ -244,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc70e39916e60c5b7af7a8e2719e3ae589326039e1e863675a008bee5ffe90fd" +checksum = "c73c6233c5b5d635a56f6010e6eb1ab9e30e94707db21cea03da317f67d84cf3" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "789b2af43c1049b03a8d088ff6b2257cdcea1756cd76b174b1f2600356771b97" +checksum = "ec222848d70fea5a32af9c3602b08f5d740d5e2d33fbd76bf6fd88759b5b13a7" dependencies = [ "arrow-array", "arrow-buffer", @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e75edf21ffd53744a9b8e3ed11101f610e7ceb1a29860432824f1834a1f623" +checksum = "b7f2861ffa86f107b8ab577d86cff7c7a490243eabe961ba1e1af4f27542bb79" dependencies = [ "arrow-buffer", "arrow-schema", @@ -296,9 +296,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d186a909dece9160bf8312f5124d797884f608ef5435a36d9d608e0b2a9bcbf8" +checksum = "0270dc511f11bb5fa98a25020ad51a99ca5b08d8a8dfbd17503bb9dba0388f0b" dependencies = [ "arrow-array", "arrow-buffer", @@ -311,9 +311,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66ff2fedc1222942d0bd2fd391cb14a85baa3857be95c9373179bd616753b85" +checksum = "0eff38eeb8a971ad3a4caf62c5d57f0cff8a48b64a55e3207c4fd696a9234aad" dependencies = [ "arrow-array", "arrow-buffer", @@ -331,9 +331,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece7b5bc1180e6d82d1a60e1688c199829e8842e38497563c3ab6ea813e527fd" +checksum = "c6f202a879d287099139ff0d121e7f55ae5e0efe634b8cf2106ebc27a8715dee" dependencies = [ "arrow-array", "arrow-buffer", @@ -346,9 +346,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "745c114c8f0e8ce211c83389270de6fbe96a9088a7b32c2a041258a443fe83ff" +checksum = "a8f936954991c360ba762dff23f5dda16300774fafd722353d9683abd97630ae" dependencies = [ "ahash", "arrow-array", @@ -360,18 +360,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" +checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e415279094ea70323c032c6e739c48ad8d80e78a09bef7117b8718ad5bf3722" +checksum = "7471ba126d0b0aaa24b50a36bc6c25e4e74869a1fd1a5553357027a0b1c8d1f1" dependencies = [ "ahash", "arrow-array", @@ -383,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d956cae7002eb8d83a27dbd34daaea1cf5b75852f0b84deb4d93a276e92bbf" +checksum = "72993b01cb62507b06f1fb49648d7286c8989ecfabdb7b77a750fcb54410731b" dependencies = [ "arrow-array", "arrow-buffer", @@ -444,7 +444,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -455,7 +455,7 @@ checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -528,9 +528,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "blake2" @@ -635,9 +635,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.7" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" +checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" dependencies = [ "jobserver", "libc", @@ -671,9 +671,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" +checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" dependencies = [ "chrono", "chrono-tz-build", @@ -1189,7 +1189,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1406,7 +1406,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1475,6 +1475,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1540,7 +1546,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1662,6 +1668,11 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -1916,7 +1927,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1994,9 +2005,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", @@ -2152,9 +2163,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" [[package]] name = "lz4_flex" @@ -2218,9 +2229,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" dependencies = [ "adler2", ] @@ -2404,9 +2415,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.3.0" +version = "53.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" +checksum = "8957c0c95a6a1804f3e51a18f69df29be53856a8c5768cc9b6d00fcafcd2917c" dependencies = [ "ahash", "arrow-array", @@ -2579,19 +2590,19 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.27" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -2622,7 +2633,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.95", + "syn 2.0.96", "tempfile", ] @@ -2636,7 +2647,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2726,7 +2737,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2739,7 +2750,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2771,7 +2782,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror 2.0.10", + "thiserror 2.0.11", "tokio", "tracing", ] @@ -2790,7 +2801,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.10", + "thiserror 2.0.11", "tinyvec", "tracing", "web-time", @@ -2866,7 +2877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2875,7 +2886,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] @@ -2915,11 +2926,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" +checksum = "4f56e622c2378013c6c61e2bd776604c46dc1087b2dc5293275a0c20a44f0771" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.15.2", "memchr", ] @@ -3026,7 +3037,7 @@ version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", @@ -3035,9 +3046,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.20" +version = "0.23.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" +checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" dependencies = [ "once_cell", "ring", @@ -3139,7 +3150,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3154,7 +3165,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "core-foundation", "core-foundation-sys", "libc", @@ -3212,7 +3223,7 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3223,14 +3234,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "336a0c23cf42a38d9eaa7cd22c7040d04e1228a19a933890805ffd00a16437d2" dependencies = [ "itoa", "memchr", @@ -3247,7 +3258,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3331,7 +3342,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3374,7 +3385,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3418,7 +3429,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3442,7 +3453,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.95", + "syn 2.0.96", "typify", "walkdir", ] @@ -3466,9 +3477,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.95" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -3492,7 +3503,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3526,11 +3537,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3ac7f54ca534db81081ef1c1e7f6ea8a3ef428d2fc069097c079443d24124d3" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" dependencies = [ - "thiserror-impl 2.0.10", + "thiserror-impl 2.0.11", ] [[package]] @@ -3541,18 +3552,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] name = "thiserror-impl" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9465d30713b56a37ede7185763c3492a91be2f5fa68d958c44e41ab9248beb" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3624,7 +3635,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3696,7 +3707,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3762,7 +3773,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -3796,7 +3807,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.95", + "syn 2.0.96", "thiserror 1.0.69", "unicode-ident", ] @@ -3814,7 +3825,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.95", + "syn 2.0.96", "typify-impl", ] @@ -3879,9 +3890,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" dependencies = [ "getrandom", "serde", @@ -3920,34 +3931,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.49" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", @@ -3958,9 +3970,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3968,22 +3980,25 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-streams" @@ -4000,9 +4015,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", @@ -4211,7 +4226,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "synstructure", ] @@ -4233,7 +4248,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4253,7 +4268,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "synstructure", ] @@ -4282,7 +4297,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 48219414a..10cffccb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.41", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} arrow = { version = "53", features = ["pyarrow"] } @@ -43,7 +43,7 @@ datafusion-substrait = { version = "44.0.0", optional = true } datafusion-proto = { version = "44.0.0" } datafusion-ffi = { version = "44.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` -uuid = { version = "1.11", features = ["v4"] } +uuid = { version = "1.12", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } async-trait = "0.1" futures = "0.3" From 78e72c9445db4e78dcda2562e251beea4f1ad470 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 22 Jan 2025 05:53:13 -0500 Subject: [PATCH 003/206] Apply import ordering in ruff check (#1001) --- benchmarks/db-benchmark/groupby-datafusion.py | 12 +-- benchmarks/db-benchmark/join-datafusion.py | 6 +- benchmarks/tpch/tpch.py | 3 +- dev/release/generate-changelog.py | 5 +- examples/export.py | 1 - .../python/tests/_test_table_provider.py | 2 +- examples/import.py | 3 +- examples/python-udaf.py | 5 +- examples/python-udf-comparisons.py | 6 +- examples/python-udf.py | 3 +- examples/python-udwf.py | 7 +- examples/query-pyarrow-data.py | 3 +- examples/sql-parquet-s3.py | 1 + examples/sql-to-pandas.py | 1 - examples/sql-using-python-udaf.py | 2 +- examples/sql-using-python-udf.py | 2 +- examples/tpch/_tests.py | 6 +- examples/tpch/convert_data_to_parquet.py | 3 +- examples/tpch/q01_pricing_summary_report.py | 3 +- examples/tpch/q02_minimum_cost_supplier.py | 3 +- examples/tpch/q03_shipping_priority.py | 3 +- examples/tpch/q04_order_priority_checking.py | 4 +- examples/tpch/q05_local_supplier_volume.py | 5 +- .../tpch/q06_forecasting_revenue_change.py | 4 +- examples/tpch/q07_volume_shipping.py | 4 +- examples/tpch/q08_market_share.py | 4 +- .../tpch/q09_product_type_profit_measure.py | 3 +- examples/tpch/q10_returned_item_reporting.py | 4 +- .../q11_important_stock_identification.py | 3 +- examples/tpch/q12_ship_mode_order_priority.py | 4 +- examples/tpch/q13_customer_distribution.py | 3 +- examples/tpch/q14_promotion_effect.py | 4 +- examples/tpch/q15_top_supplier.py | 4 +- .../tpch/q16_part_supplier_relationship.py | 3 +- examples/tpch/q17_small_quantity_order.py | 3 +- examples/tpch/q18_large_volume_customer.py | 3 +- examples/tpch/q19_discounted_revenue.py | 3 +- examples/tpch/q20_potential_part_promotion.py | 4 +- .../tpch/q21_suppliers_kept_orders_waiting.py | 3 +- examples/tpch/q22_global_sales_opportunity.py | 3 +- pyproject.toml | 4 +- python/datafusion/__init__.py | 30 +++---- python/datafusion/catalog.py | 4 +- python/datafusion/common.py | 3 +- python/datafusion/context.py | 23 ++--- python/datafusion/dataframe.py | 21 +++-- python/datafusion/expr.py | 5 +- python/datafusion/functions.py | 16 ++-- python/datafusion/input/location.py | 2 +- python/datafusion/plan.py | 4 +- python/datafusion/record_batch.py | 3 +- python/datafusion/substrait.py | 8 +- python/datafusion/udf.py | 9 +- python/tests/conftest.py | 2 +- python/tests/test_aggregation.py | 1 - python/tests/test_config.py | 2 +- python/tests/test_context.py | 3 +- python/tests/test_dataframe.py | 5 +- python/tests/test_functions.py | 6 +- python/tests/test_imports.py | 89 +++++++++---------- python/tests/test_indexing.py | 1 - python/tests/test_input.py | 1 + python/tests/test_plans.py | 2 +- python/tests/test_sql.py | 5 +- python/tests/test_store.py | 1 - python/tests/test_substrait.py | 3 +- python/tests/test_udaf.py | 1 - python/tests/test_udf.py | 2 +- python/tests/test_udwf.py | 6 +- 69 files changed, 221 insertions(+), 189 deletions(-) diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 960c8ba9a..04bf7a149 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -15,21 +15,23 @@ # specific language governing permissions and limitations # under the License. -import os import gc +import os import timeit + import datafusion as df +import pyarrow from datafusion import ( - col, - functions as f, RuntimeEnvBuilder, SessionConfig, SessionContext, + col, +) +from datafusion import ( + functions as f, ) -import pyarrow from pyarrow import csv as pacsv - print("# groupby-datafusion.py", flush=True) exec(open("./_helpers/helpers.py").read()) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index 811ad8707..b45ebf632 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -15,15 +15,15 @@ # specific language governing permissions and limitations # under the License. -import os import gc +import os import timeit + import datafusion as df -from datafusion import functions as f from datafusion import col +from datafusion import functions as f from pyarrow import csv as pacsv - print("# join-datafusion.py", flush=True) exec(open("./_helpers/helpers.py").read()) diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index daa831b55..fb86b12b6 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -16,9 +16,10 @@ # under the License. import argparse -from datafusion import SessionContext import time +from datafusion import SessionContext + def bench(data_path, query_path): with open("results.csv", "w") as results: diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 0f07457d0..2564eea86 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -16,11 +16,12 @@ # limitations under the License. import argparse -import sys -from github import Github import os import re import subprocess +import sys + +from github import Github def print_pulls(repo_name, title, pulls): diff --git a/examples/export.py b/examples/export.py index cc02de52b..c7a387bcb 100644 --- a/examples/export.py +++ b/examples/export.py @@ -17,7 +17,6 @@ import datafusion - # create a context ctx = datafusion.SessionContext() diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/ffi-table-provider/python/tests/_test_table_provider.py index 56c05e4fa..0db3ec561 100644 --- a/examples/ffi-table-provider/python/tests/_test_table_provider.py +++ b/examples/ffi-table-provider/python/tests/_test_table_provider.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa from datafusion import SessionContext from ffi_table_provider import MyTableProvider -import pyarrow as pa def test_table_loading(): diff --git a/examples/import.py b/examples/import.py index c9d2e8cb6..7b5ab5082 100644 --- a/examples/import.py +++ b/examples/import.py @@ -16,10 +16,9 @@ # under the License. import datafusion -import pyarrow as pa import pandas as pd import polars as pl - +import pyarrow as pa # Create a context ctx = datafusion.SessionContext() diff --git a/examples/python-udaf.py b/examples/python-udaf.py index ed705f5a9..538f69571 100644 --- a/examples/python-udaf.py +++ b/examples/python-udaf.py @@ -15,11 +15,10 @@ # specific language governing permissions and limitations # under the License. +import datafusion import pyarrow import pyarrow.compute -import datafusion -from datafusion import udaf, Accumulator -from datafusion import col +from datafusion import Accumulator, col, udaf class MyAccumulator(Accumulator): diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py index 9a84dd730..c5d5ec8dd 100644 --- a/examples/python-udf-comparisons.py +++ b/examples/python-udf-comparisons.py @@ -15,11 +15,13 @@ # specific language governing permissions and limitations # under the License. -from datafusion import SessionContext, col, lit, udf, functions as F import os +import time + import pyarrow as pa import pyarrow.compute as pc -import time +from datafusion import SessionContext, col, lit, udf +from datafusion import functions as F path = os.path.dirname(os.path.abspath(__file__)) filepath = os.path.join(path, "./tpch/data/lineitem.parquet") diff --git a/examples/python-udf.py b/examples/python-udf.py index 30edd4198..fb2bc253e 100644 --- a/examples/python-udf.py +++ b/examples/python-udf.py @@ -16,7 +16,8 @@ # under the License. import pyarrow -from datafusion import udf, SessionContext, functions as f +from datafusion import SessionContext, udf +from datafusion import functions as f def is_null(array: pyarrow.Array) -> pyarrow.Array: diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 55de2bdc7..32f8fadaa 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pyarrow as pa import datafusion -from datafusion import udwf, functions as f, col, lit -from datafusion.udf import WindowEvaluator +import pyarrow as pa +from datafusion import col, lit, udwf +from datafusion import functions as f from datafusion.expr import WindowFrame +from datafusion.udf import WindowEvaluator # This example creates five different examples of user defined window functions in order # to demonstrate the variety of ways a user may need to implement. diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py index 83e6884a7..e3456fb5b 100644 --- a/examples/query-pyarrow-data.py +++ b/examples/query-pyarrow-data.py @@ -16,9 +16,8 @@ # under the License. import datafusion -from datafusion import col import pyarrow - +from datafusion import col # create a context ctx = datafusion.SessionContext() diff --git a/examples/sql-parquet-s3.py b/examples/sql-parquet-s3.py index 61f1e0c50..866e2ac68 100644 --- a/examples/sql-parquet-s3.py +++ b/examples/sql-parquet-s3.py @@ -16,6 +16,7 @@ # under the License. import os + import datafusion from datafusion.object_store import AmazonS3 diff --git a/examples/sql-to-pandas.py b/examples/sql-to-pandas.py index 3e99b22de..34f7bde1b 100644 --- a/examples/sql-to-pandas.py +++ b/examples/sql-to-pandas.py @@ -17,7 +17,6 @@ from datafusion import SessionContext - # Create a DataFusion context ctx = SessionContext() diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py index 7ccf5d3cb..60ab8d134 100644 --- a/examples/sql-using-python-udaf.py +++ b/examples/sql-using-python-udaf.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udaf, SessionContext, Accumulator import pyarrow as pa +from datafusion import Accumulator, SessionContext, udaf # Define a user-defined aggregation function (UDAF) diff --git a/examples/sql-using-python-udf.py b/examples/sql-using-python-udf.py index d6bbe3ab0..2f0a0b67d 100644 --- a/examples/sql-using-python-udf.py +++ b/examples/sql-using-python-udf.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udf, SessionContext import pyarrow as pa +from datafusion import SessionContext, udf # Define a user-defined function (UDF) diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 3ce9cdfe5..c4d872085 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -15,10 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pytest from importlib import import_module + import pyarrow as pa -from datafusion import DataFrame, col, lit, functions as F +import pytest +from datafusion import DataFrame, col, lit +from datafusion import functions as F from util import get_answer_file diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py index cb0b2f0bd..73097fac5 100644 --- a/examples/tpch/convert_data_to_parquet.py +++ b/examples/tpch/convert_data_to_parquet.py @@ -23,8 +23,9 @@ """ import os -import pyarrow + import datafusion +import pyarrow ctx = datafusion.SessionContext() diff --git a/examples/tpch/q01_pricing_summary_report.py b/examples/tpch/q01_pricing_summary_report.py index cb9485a7a..3f97f00dc 100644 --- a/examples/tpch/q01_pricing_summary_report.py +++ b/examples/tpch/q01_pricing_summary_report.py @@ -30,7 +30,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path ctx = SessionContext() diff --git a/examples/tpch/q02_minimum_cost_supplier.py b/examples/tpch/q02_minimum_cost_supplier.py index c4ccf8ad3..7390d0892 100644 --- a/examples/tpch/q02_minimum_cost_supplier.py +++ b/examples/tpch/q02_minimum_cost_supplier.py @@ -30,7 +30,8 @@ """ import datafusion -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # This is the part we're looking for. Values selected here differ from the spec in order to run diff --git a/examples/tpch/q03_shipping_priority.py b/examples/tpch/q03_shipping_priority.py index 5ebab13c0..fc1231e0a 100644 --- a/examples/tpch/q03_shipping_priority.py +++ b/examples/tpch/q03_shipping_priority.py @@ -27,7 +27,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path SEGMENT_OF_INTEREST = "BUILDING" diff --git a/examples/tpch/q04_order_priority_checking.py b/examples/tpch/q04_order_priority_checking.py index 8bf02cb83..426338aea 100644 --- a/examples/tpch/q04_order_priority_checking.py +++ b/examples/tpch/q04_order_priority_checking.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Ideally we could put 3 months into the interval. See note below. diff --git a/examples/tpch/q05_local_supplier_volume.py b/examples/tpch/q05_local_supplier_volume.py index 413a4acb9..fa2b01dea 100644 --- a/examples/tpch/q05_local_supplier_volume.py +++ b/examples/tpch/q05_local_supplier_volume.py @@ -30,11 +30,12 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path - DATE_OF_INTEREST = "1994-01-01" INTERVAL_DAYS = 365 REGION_OF_INTEREST = "ASIA" diff --git a/examples/tpch/q06_forecasting_revenue_change.py b/examples/tpch/q06_forecasting_revenue_change.py index eaf9b0c29..1de5848b1 100644 --- a/examples/tpch/q06_forecasting_revenue_change.py +++ b/examples/tpch/q06_forecasting_revenue_change.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Variables from the example query diff --git a/examples/tpch/q07_volume_shipping.py b/examples/tpch/q07_volume_shipping.py index 18c290d9c..a84cf728a 100644 --- a/examples/tpch/q07_volume_shipping.py +++ b/examples/tpch/q07_volume_shipping.py @@ -29,8 +29,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path # Variables of interest to query over diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index 7138ab65a..d46df30f2 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -28,8 +28,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path supplier_nation = lit("BRAZIL") diff --git a/examples/tpch/q09_product_type_profit_measure.py b/examples/tpch/q09_product_type_profit_measure.py index aa47d76c0..e2abbd095 100644 --- a/examples/tpch/q09_product_type_profit_measure.py +++ b/examples/tpch/q09_product_type_profit_measure.py @@ -30,7 +30,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path part_color = lit("green") diff --git a/examples/tpch/q10_returned_item_reporting.py b/examples/tpch/q10_returned_item_reporting.py index 94b398c1d..ed822e264 100644 --- a/examples/tpch/q10_returned_item_reporting.py +++ b/examples/tpch/q10_returned_item_reporting.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path DATE_START_OF_QUARTER = "1993-10-01" diff --git a/examples/tpch/q11_important_stock_identification.py b/examples/tpch/q11_important_stock_identification.py index 707265e16..22829ab7c 100644 --- a/examples/tpch/q11_important_stock_identification.py +++ b/examples/tpch/q11_important_stock_identification.py @@ -27,7 +27,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path NATION = "GERMANY" diff --git a/examples/tpch/q12_ship_mode_order_priority.py b/examples/tpch/q12_ship_mode_order_priority.py index def2a6c30..f1d894940 100644 --- a/examples/tpch/q12_ship_mode_order_priority.py +++ b/examples/tpch/q12_ship_mode_order_priority.py @@ -30,8 +30,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path SHIP_MODE_1 = "MAIL" diff --git a/examples/tpch/q13_customer_distribution.py b/examples/tpch/q13_customer_distribution.py index 67365a96a..93f082ea3 100644 --- a/examples/tpch/q13_customer_distribution.py +++ b/examples/tpch/q13_customer_distribution.py @@ -28,7 +28,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path WORD_1 = "special" diff --git a/examples/tpch/q14_promotion_effect.py b/examples/tpch/q14_promotion_effect.py index cd26ee2bd..d62f76e3c 100644 --- a/examples/tpch/q14_promotion_effect.py +++ b/examples/tpch/q14_promotion_effect.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path DATE = "1995-09-01" diff --git a/examples/tpch/q15_top_supplier.py b/examples/tpch/q15_top_supplier.py index 0bc316f7a..c321048f2 100644 --- a/examples/tpch/q15_top_supplier.py +++ b/examples/tpch/q15_top_supplier.py @@ -27,8 +27,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path DATE = "1996-01-01" diff --git a/examples/tpch/q16_part_supplier_relationship.py b/examples/tpch/q16_part_supplier_relationship.py index a6a0c43eb..65043ffda 100644 --- a/examples/tpch/q16_part_supplier_relationship.py +++ b/examples/tpch/q16_part_supplier_relationship.py @@ -29,7 +29,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path BRAND = "Brand#45" diff --git a/examples/tpch/q17_small_quantity_order.py b/examples/tpch/q17_small_quantity_order.py index d7b43d498..6d76fe506 100644 --- a/examples/tpch/q17_small_quantity_order.py +++ b/examples/tpch/q17_small_quantity_order.py @@ -28,7 +28,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path BRAND = "Brand#23" diff --git a/examples/tpch/q18_large_volume_customer.py b/examples/tpch/q18_large_volume_customer.py index 165fce033..834d181c9 100644 --- a/examples/tpch/q18_large_volume_customer.py +++ b/examples/tpch/q18_large_volume_customer.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path QUANTITY = 300 diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index 4aed0cbae..2b87e1120 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -27,7 +27,8 @@ """ import pyarrow as pa -from datafusion import SessionContext, col, lit, udf, functions as F +from datafusion import SessionContext, col, lit, udf +from datafusion import functions as F from util import get_data_path items_of_interest = { diff --git a/examples/tpch/q20_potential_part_promotion.py b/examples/tpch/q20_potential_part_promotion.py index d720cdce6..a25188d31 100644 --- a/examples/tpch/q20_potential_part_promotion.py +++ b/examples/tpch/q20_potential_part_promotion.py @@ -28,8 +28,10 @@ """ from datetime import datetime + import pyarrow as pa -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path COLOR_OF_INTEREST = "forest" diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index 27cf816fa..9bbaad779 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, col, lit, functions as F +from datafusion import SessionContext, col, lit +from datafusion import functions as F from util import get_data_path NATION_OF_INTEREST = "SAUDI ARABIA" diff --git a/examples/tpch/q22_global_sales_opportunity.py b/examples/tpch/q22_global_sales_opportunity.py index 72dce5289..c4d115b74 100644 --- a/examples/tpch/q22_global_sales_opportunity.py +++ b/examples/tpch/q22_global_sales_opportunity.py @@ -26,7 +26,8 @@ as part of their TPC Benchmark H Specification revision 2.18.0. """ -from datafusion import SessionContext, WindowFrame, col, lit, functions as F +from datafusion import SessionContext, WindowFrame, col, lit +from datafusion import functions as F from util import get_data_path NATION_CODES = [13, 31, 23, 29, 30, 18, 17] diff --git a/pyproject.toml b/pyproject.toml index 6e8acfe71..32bb28d21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "D", "W"] +select = ["E4", "E7", "E9", "F", "D", "W", "I"] [tool.ruff.lint.pydocstyle] convention = "google" @@ -100,4 +100,4 @@ docs = [ "pickleshare>=0.7.5", "sphinx-autoapi>=3.4.0", "setuptools>=75.3.0", -] \ No newline at end of file +] diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 2d8db42c8..85aefcce7 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,36 +26,28 @@ except ImportError: import importlib_metadata -from .context import ( - SessionContext, - SessionConfig, - RuntimeEnvBuilder, - SQLOptions, -) - -from .catalog import Catalog, Database, Table +from . import functions, object_store, substrait # The following imports are okay to remain as opaque to the user. from ._internal import Config - -from .record_batch import RecordBatchStream, RecordBatch - -from .udf import ScalarUDF, AggregateUDF, Accumulator, WindowUDF - +from .catalog import Catalog, Database, Table from .common import ( DFSchema, ) - +from .context import ( + RuntimeEnvBuilder, + SessionConfig, + SessionContext, + SQLOptions, +) from .dataframe import DataFrame - from .expr import ( Expr, WindowFrame, ) - -from .plan import LogicalPlan, ExecutionPlan - -from . import functions, object_store, substrait +from .plan import ExecutionPlan, LogicalPlan +from .record_batch import RecordBatch, RecordBatchStream +from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF __version__ = importlib_metadata.version(__name__) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index acd28f33d..703037665 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -19,10 +19,10 @@ from __future__ import annotations -import datafusion._internal as df_internal - from typing import TYPE_CHECKING +import datafusion._internal as df_internal + if TYPE_CHECKING: import pyarrow diff --git a/python/datafusion/common.py b/python/datafusion/common.py index 7db8333f2..a2298c634 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -16,9 +16,10 @@ # under the License. """Common data types used throughout the DataFusion project.""" -from ._internal import common as common_internal from enum import Enum +from ._internal import common as common_internal + # TODO these should all have proper wrapper classes DFSchema = common_internal.DFSchema diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 3c284c9f9..864ef1c8b 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -19,26 +19,29 @@ from __future__ import annotations -from ._internal import SessionConfig as SessionConfigInternal -from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal -from ._internal import SQLOptions as SQLOptionsInternal -from ._internal import SessionContext as SessionContextInternal +from typing import TYPE_CHECKING, Any, Protocol + +from typing_extensions import deprecated from datafusion.catalog import Catalog, Table from datafusion.dataframe import DataFrame from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream -from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF +from datafusion.udf import AggregateUDF, ScalarUDF, WindowUDF -from typing import Any, TYPE_CHECKING, Protocol -from typing_extensions import deprecated +from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal +from ._internal import SessionConfig as SessionConfigInternal +from ._internal import SessionContext as SessionContextInternal +from ._internal import SQLOptions as SQLOptionsInternal if TYPE_CHECKING: - import pyarrow + import pathlib + import pandas import polars - import pathlib - from datafusion.plan import LogicalPlan, ExecutionPlan + import pyarrow + + from datafusion.plan import ExecutionPlan, LogicalPlan class ArrowStreamExportable(Protocol): diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index b0c1abdad..7413a5fa3 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -20,31 +20,36 @@ """ from __future__ import annotations + import warnings from typing import ( + TYPE_CHECKING, Any, Iterable, List, - TYPE_CHECKING, Literal, - overload, Optional, Union, + overload, ) -from datafusion.record_batch import RecordBatchStream + from typing_extensions import deprecated -from datafusion.plan import LogicalPlan, ExecutionPlan + +from datafusion.plan import ExecutionPlan, LogicalPlan +from datafusion.record_batch import RecordBatchStream if TYPE_CHECKING: - import pyarrow as pa - import pandas as pd - import polars as pl import pathlib from typing import Callable, Sequence + import pandas as pd + import polars as pl + import pyarrow as pa + +from enum import Enum + from datafusion._internal import DataFrame as DataFrameInternal from datafusion.expr import Expr, SortExpr, sort_or_default -from enum import Enum # excerpt from deltalake diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 16add16f4..68ddd7c9a 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -22,12 +22,13 @@ from __future__ import annotations -from typing import Any, Optional, Type, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional, Type import pyarrow as pa -from datafusion.common import DataTypeMap, NullTreatment, RexType from typing_extensions import deprecated +from datafusion.common import DataTypeMap, NullTreatment, RexType + from ._internal import expr as expr_internal from ._internal import functions as functions_internal diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index c0097c6ab..7c2fa9a8f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -18,21 +18,21 @@ from __future__ import annotations +from typing import Any, Optional + +import pyarrow as pa + from datafusion._internal import functions as f +from datafusion.common import NullTreatment +from datafusion.context import SessionContext from datafusion.expr import ( CaseBuilder, Expr, - WindowFrame, SortExpr, - sort_list_to_raw_sort_list, + WindowFrame, expr_list_to_raw_expr_list, + sort_list_to_raw_sort_list, ) -from datafusion.context import SessionContext -from datafusion.common import NullTreatment - -from typing import Any, Optional - -import pyarrow as pa __all__ = [ "abs", diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index b274539fc..a8252b53c 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -17,8 +17,8 @@ """The default input source for DataFusion.""" -import os import glob +import os from typing import Any from datafusion.common import DataTypeMap, SqlTable diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index a71965f41..133fc446d 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -19,9 +19,9 @@ from __future__ import annotations -import datafusion._internal as df_internal +from typing import TYPE_CHECKING, Any, List -from typing import List, Any, TYPE_CHECKING +import datafusion._internal as df_internal if TYPE_CHECKING: from datafusion.context import SessionContext diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py index 75e58998f..772cd9089 100644 --- a/python/datafusion/record_batch.py +++ b/python/datafusion/record_batch.py @@ -27,9 +27,10 @@ if TYPE_CHECKING: import pyarrow - import datafusion._internal as df_internal import typing_extensions + import datafusion._internal as df_internal + class RecordBatch: """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`.""" diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index dea47acca..402184d3f 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -23,13 +23,15 @@ from __future__ import annotations -from ._internal import substrait as substrait_internal - +import pathlib from typing import TYPE_CHECKING + from typing_extensions import deprecated -import pathlib + from datafusion.plan import LogicalPlan +from ._internal import substrait as substrait_internal + if TYPE_CHECKING: from datafusion.context import SessionContext diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index d9d994b22..c97f453d0 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -19,14 +19,15 @@ from __future__ import annotations -import datafusion._internal as df_internal -from datafusion.expr import Expr -from typing import Callable, TYPE_CHECKING, TypeVar from abc import ABCMeta, abstractmethod -from typing import List, Optional from enum import Enum +from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar + import pyarrow +import datafusion._internal as df_internal +from datafusion.expr import Expr + if TYPE_CHECKING: _R = TypeVar("_R", bound=pyarrow.DataType) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 1cc07e500..9548fbfe4 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa import pytest from datafusion import SessionContext -import pyarrow as pa from pyarrow.csv import write_csv diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 243a8c3c9..5ef46131b 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -18,7 +18,6 @@ import numpy as np import pyarrow as pa import pytest - from datafusion import SessionContext, column, lit from datafusion import functions as f from datafusion.common import NullTreatment diff --git a/python/tests/test_config.py b/python/tests/test_config.py index 12d9fc3ff..c1d7f97e1 100644 --- a/python/tests/test_config.py +++ b/python/tests/test_config.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import Config import pytest +from datafusion import Config @pytest.fixture diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 10e8ad0e9..91046e6b8 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -14,15 +14,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import datetime as dt import gzip import os -import datetime as dt import pathlib import pyarrow as pa import pyarrow.dataset as ds import pytest - from datafusion import ( DataFrame, RuntimeEnvBuilder, diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index a1a871e9a..5bc3fb094 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -18,11 +18,8 @@ from typing import Any import pyarrow as pa -from pyarrow.csv import write_csv import pyarrow.parquet as pq import pytest - -from datafusion import functions as f from datafusion import ( DataFrame, SessionContext, @@ -30,7 +27,9 @@ column, literal, ) +from datafusion import functions as f from datafusion.expr import Window +from pyarrow.csv import write_csv @pytest.fixture diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index add170c17..ad6aa7c0a 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,15 +15,13 @@ # specific language governing permissions and limitations # under the License. import math +from datetime import datetime import numpy as np import pyarrow as pa import pytest -from datetime import datetime - -from datafusion import SessionContext, column +from datafusion import SessionContext, column, literal, string_literal from datafusion import functions as f -from datafusion import literal, string_literal np.seterr(invalid="ignore") diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 6ea77b15f..0c155cbde 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -15,72 +15,69 @@ # specific language governing permissions and limitations # under the License. -import pytest - import datafusion +import pytest from datafusion import ( AggregateUDF, DataFrame, - SessionContext, ScalarUDF, + SessionContext, functions, ) - from datafusion.common import ( DFSchema, ) - from datafusion.expr import ( - Expr, - Column, - Literal, - BinaryExpr, - AggregateFunction, - Projection, - TableScan, - Filter, - Limit, Aggregate, - Sort, - Analyze, - Join, - JoinType, - JoinConstraint, - Union, - Like, - ILike, - SimilarTo, - ScalarVariable, + AggregateFunction, Alias, - Not, - IsNotNull, - IsTrue, - IsFalse, - IsUnknown, - IsNotTrue, - IsNotFalse, - IsNotUnknown, - Negative, - InList, - Exists, - Subquery, - InSubquery, - ScalarSubquery, - GroupingSet, - Placeholder, + Analyze, + Between, + BinaryExpr, Case, Cast, - TryCast, - SubqueryAlias, - Between, - Explain, - Extension, + Column, CreateMemoryTable, CreateView, Distinct, DropTable, - Repartition, + Exists, + Explain, + Expr, + Extension, + Filter, + GroupingSet, + ILike, + InList, + InSubquery, + IsFalse, + IsNotFalse, + IsNotNull, + IsNotTrue, + IsNotUnknown, + IsTrue, + IsUnknown, + Join, + JoinConstraint, + JoinType, + Like, + Limit, + Literal, + Negative, + Not, Partitioning, + Placeholder, + Projection, + Repartition, + ScalarSubquery, + ScalarVariable, + SimilarTo, + Sort, + Subquery, + SubqueryAlias, + TableScan, + TryCast, + Union, ) diff --git a/python/tests/test_indexing.py b/python/tests/test_indexing.py index 8ca3eab19..5b0d08610 100644 --- a/python/tests/test_indexing.py +++ b/python/tests/test_indexing.py @@ -17,7 +17,6 @@ import pyarrow as pa import pytest - from datafusion import SessionContext diff --git a/python/tests/test_input.py b/python/tests/test_input.py index fb53d86e5..806471357 100644 --- a/python/tests/test_input.py +++ b/python/tests/test_input.py @@ -16,6 +16,7 @@ # under the License. import os + from datafusion.input.location import LocationInputPlugin diff --git a/python/tests/test_plans.py b/python/tests/test_plans.py index 0283a4e6a..396acbe97 100644 --- a/python/tests/test_plans.py +++ b/python/tests/test_plans.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from datafusion import SessionContext, LogicalPlan, ExecutionPlan import pytest +from datafusion import ExecutionPlan, LogicalPlan, SessionContext # Note: We must use CSV because memory tables are currently not supported for diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index a2521dd09..862f745bf 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -19,12 +19,11 @@ import numpy as np import pyarrow as pa -from pyarrow.csv import write_csv import pyarrow.dataset as ds import pytest +from datafusion import col, udf from datafusion.object_store import Http - -from datafusion import udf, col +from pyarrow.csv import write_csv from . import generic as helpers diff --git a/python/tests/test_store.py b/python/tests/test_store.py index f85b28311..53ffc3acf 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -18,7 +18,6 @@ import os import pytest - from datafusion import SessionContext diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index 2071c8f3b..feada7cde 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -16,10 +16,9 @@ # under the License. import pyarrow as pa - +import pytest from datafusion import SessionContext from datafusion import substrait as ss -import pytest @pytest.fixture diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 8f31748e0..0005a3da8 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -20,7 +20,6 @@ import pyarrow as pa import pyarrow.compute as pc import pytest - from datafusion import Accumulator, column, udaf diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index 568a66dbb..3a5dce6d6 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. -from datafusion import udf, column import pyarrow as pa import pytest +from datafusion import column, udf @pytest.fixture diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 2099ac9bc..0ffa04179 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -17,10 +17,10 @@ import pyarrow as pa import pytest - -from datafusion import SessionContext, column, udwf, lit, functions as f -from datafusion.udf import WindowEvaluator +from datafusion import SessionContext, column, lit, udwf +from datafusion import functions as f from datafusion.expr import WindowFrame +from datafusion.udf import WindowEvaluator class ExponentialSmoothDefault(WindowEvaluator): From 8b513906315a0749b9f5cd6f34bf259ab4dd1add Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 1 Feb 2025 08:29:48 -0500 Subject: [PATCH 004/206] feat: remove DataFusion pyarrow feat (#1000) * Add developer instructions to speed up build processes * Remove pyarrow dep from datafusion. Add in PyScalarValue wrapper and rename DataFusionError to PyDataFusionError to be less confusing * Removed unnecessary cloning of scalar value when going from rust to python. Also removed the rust unit tests copied over from upstream repo that were failing due to #941 in pyo3 * Change return types to PyDataFusionError to simplify code * Update exception handling to fix build errors in recent rust toolchains --- Cargo.lock | 145 +++++++++++------- Cargo.toml | 2 +- .../source/contributor-guide/introduction.rst | 53 +++++++ python/tests/test_indexing.py | 3 +- src/catalog.rs | 8 +- src/common/data_type.rs | 14 ++ src/config.rs | 11 +- src/context.rs | 136 ++++++++-------- src/dataframe.rs | 119 +++++++------- src/dataset_exec.rs | 6 +- src/errors.rs | 42 ++--- src/expr.rs | 38 ++--- src/expr/conditional_expr.rs | 6 +- src/expr/literal.rs | 4 +- src/expr/window.rs | 13 +- src/functions.rs | 55 ++++--- src/lib.rs | 1 + src/physical_plan.rs | 13 +- src/pyarrow_filter_expression.rs | 24 +-- src/pyarrow_util.rs | 61 ++++++++ src/record_batch.rs | 3 +- src/sql/exceptions.rs | 16 +- src/sql/logical.rs | 14 +- src/substrait.rs | 54 ++++--- src/udaf.rs | 21 ++- src/udwf.rs | 4 +- src/utils.rs | 6 +- 27 files changed, 524 insertions(+), 348 deletions(-) create mode 100644 src/pyarrow_util.rs diff --git a/Cargo.lock b/Cargo.lock index 5a74a4839..c6590fd21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,7 +79,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", - "getrandom", + "getrandom 0.2.15", "once_cell", "version_check", "zerocopy", @@ -449,9 +449,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.85" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", @@ -576,9 +576,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.1" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -586,9 +586,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "byteorder" @@ -635,9 +635,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.10" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" +checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" dependencies = [ "jobserver", "libc", @@ -692,9 +692,9 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.52" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" +checksum = "e24a03c8b52922d68a1589ad61032f2c1aa5a8158d2aa0d93c6e9534944bbad6" dependencies = [ "cc", ] @@ -725,7 +725,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom", + "getrandom 0.2.15", "once_cell", "tiny-keccak", ] @@ -784,9 +784,9 @@ checksum = "69f3b219d28b6e3b4ac87bc1fc522e0803ab22e055da177bff0068c4150c61a6" [[package]] name = "cpufeatures" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -817,9 +817,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "crypto-common" @@ -961,7 +961,6 @@ dependencies = [ "object_store", "parquet", "paste", - "pyo3", "recursive", "sqlparser", "tokio", @@ -1411,9 +1410,9 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35" [[package]] name = "either" @@ -1607,10 +1606,22 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets", +] + [[package]] name = "gimli" version = "0.31.1" @@ -1722,9 +1733,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" [[package]] name = "humantime" @@ -1734,9 +1745,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.5.2" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ "bytes", "futures-channel", @@ -1953,9 +1964,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1975,9 +1986,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ipnet" -version = "2.10.1" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "itertools" @@ -2243,7 +2254,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -2377,9 +2388,9 @@ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "ordered-float" @@ -2661,9 +2672,9 @@ dependencies = [ [[package]] name = "protobuf-src" -version = "2.1.0+27.1" +version = "2.1.1+27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7edafa3bcc668fa93efafcbdf58d7821bbda0f4b458ac7fae3d57ec0fec8167" +checksum = "6217c3504da19b85a3a4b2e9a5183d635822d83507ba0986624b5c05b83bfc40" dependencies = [ "cmake", ] @@ -2794,7 +2805,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", - "getrandom", + "getrandom 0.2.15", "rand", "ring", "rustc-hash", @@ -2857,7 +2868,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -2926,9 +2937,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f56e622c2378013c6c61e2bd776604c46dc1087b2dc5293275a0c20a44f0771" +checksum = "78ef7fa9ed0256d64a688a3747d0fef7a88851c18a5e1d57f115f38ec2e09366" dependencies = [ "hashbrown 0.15.2", "memchr", @@ -2997,7 +3008,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", "cfg-if", - "getrandom", + "getrandom 0.2.15", "libc", "spin", "untrusted", @@ -3033,9 +3044,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags 2.8.0", "errno", @@ -3046,9 +3057,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.21" +version = "0.23.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" +checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" dependencies = [ "once_cell", "ring", @@ -3081,9 +3092,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" dependencies = [ "web-time", ] @@ -3107,9 +3118,9 @@ checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "same-file" @@ -3184,9 +3195,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" dependencies = [ "serde", ] @@ -3239,9 +3250,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.136" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "336a0c23cf42a38d9eaa7cd22c7040d04e1228a19a933890805ffd00a16437d2" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ "itoa", "memchr", @@ -3514,13 +3525,13 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.15.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" +checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" dependencies = [ "cfg-if", "fastrand", - "getrandom", + "getrandom 0.3.1", "once_cell", "rustix", "windows-sys 0.59.0", @@ -3831,9 +3842,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-segmentation" @@ -3890,11 +3901,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ - "getrandom", + "getrandom 0.2.15", "serde", ] @@ -3929,6 +3940,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -4185,6 +4205,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/Cargo.toml b/Cargo.toml index 10cffccb1..003ba36e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "44.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion = { version = "44.0.0", features = ["avro", "unicode_expressions"] } datafusion-substrait = { version = "44.0.0", optional = true } datafusion-proto = { version = "44.0.0" } datafusion-ffi = { version = "44.0.0" } diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index fb98cfd1d..25f2c21a4 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -95,3 +95,56 @@ To update dependencies, run .. code-block:: shell uv sync --dev --no-install-package datafusion + +Improving Build Speed +--------------------- + +The `pyo3 `_ dependency of this project contains a ``build.rs`` file which +can cause it to rebuild frequently. You can prevent this from happening by defining a ``PYO3_CONFIG_FILE`` +environment variable that points to a file with your build configuration. Whenever your build configuration +changes, such as during some major version updates, you will need to regenerate this file. This variable +should point to a fully resolved path on your build machine. + +To generate this file, use the following command: + +.. code-block:: shell + + PYO3_PRINT_CONFIG=1 cargo build + +This will generate some output that looks like the following. You will want to copy these contents intro +a file. If you place this file in your project directory with filename ``.pyo3_build_config`` it will +be ignored by ``git``. + +.. code-block:: + + implementation=CPython + version=3.8 + shared=true + abi3=true + lib_name=python3.12 + lib_dir=/opt/homebrew/opt/python@3.12/Frameworks/Python.framework/Versions/3.12/lib + executable=/Users/myusername/src/datafusion-python/.venv/bin/python + pointer_width=64 + build_flags= + suppress_build_script_link_lines=false + +Add the environment variable to your system. + +.. code-block:: shell + + export PYO3_CONFIG_FILE="/Users//myusername/src/datafusion-python/.pyo3_build_config" + +If you are on a Mac and you use VS Code for your IDE, you will want to add these variables +to your settings. You can find the appropriate rust flags by looking in the +``.cargo/config.toml`` file. + +.. code-block:: + + "rust-analyzer.cargo.extraEnv": { + "RUSTFLAGS": "-C link-arg=-undefined -C link-arg=dynamic_lookup", + "PYO3_CONFIG_FILE": "/Users/myusername/src/datafusion-python/.pyo3_build_config" + }, + "rust-analyzer.runnables.extraEnv": { + "RUSTFLAGS": "-C link-arg=-undefined -C link-arg=dynamic_lookup", + "PYO3_CONFIG_FILE": "/Users/myusername/src/personal/datafusion-python/.pyo3_build_config" + } diff --git a/python/tests/test_indexing.py b/python/tests/test_indexing.py index 5b0d08610..327decd2f 100644 --- a/python/tests/test_indexing.py +++ b/python/tests/test_indexing.py @@ -43,7 +43,8 @@ def test_err(df): with pytest.raises(Exception) as e_info: df["c"] - assert "Schema error: No field named c." in e_info.value.args[0] + for e in ["SchemaError", "FieldNotFound", 'name: "c"']: + assert e in e_info.value.args[0] with pytest.raises(Exception) as e_info: df[1] diff --git a/src/catalog.rs b/src/catalog.rs index 1ce66a4dc..1e189a5aa 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use pyo3::exceptions::PyKeyError; use pyo3::prelude::*; -use crate::errors::DataFusionError; +use crate::errors::{PyDataFusionError, PyDataFusionResult}; use crate::utils::wait_for_future; use datafusion::{ arrow::pyarrow::ToPyArrow, @@ -96,11 +96,13 @@ impl PyDatabase { self.database.table_names().into_iter().collect() } - fn table(&self, name: &str, py: Python) -> PyResult { + fn table(&self, name: &str, py: Python) -> PyDataFusionResult { if let Some(table) = wait_for_future(py, self.database.table(name))? { Ok(PyTable::new(table)) } else { - Err(DataFusionError::Common(format!("Table not found: {name}")).into()) + Err(PyDataFusionError::Common(format!( + "Table not found: {name}" + ))) } } diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 7f9c75bfd..f5f8a6b06 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -23,6 +23,20 @@ use pyo3::{exceptions::PyValueError, prelude::*}; use crate::errors::py_datafusion_err; +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd)] +pub struct PyScalarValue(pub ScalarValue); + +impl From for PyScalarValue { + fn from(value: ScalarValue) -> Self { + Self(value) + } +} +impl From for ScalarValue { + fn from(value: PyScalarValue) -> Self { + value.0 + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[pyclass(eq, eq_int, name = "RexType", module = "datafusion.common")] pub enum RexType { diff --git a/src/config.rs b/src/config.rs index 3f2a05580..cc725b9a3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -21,6 +21,8 @@ use pyo3::types::*; use datafusion::common::ScalarValue; use datafusion::config::ConfigOptions; +use crate::errors::PyDataFusionResult; + #[pyclass(name = "Config", module = "datafusion", subclass)] #[derive(Clone)] pub(crate) struct PyConfig { @@ -38,7 +40,7 @@ impl PyConfig { /// Get configurations from environment variables #[staticmethod] - pub fn from_env() -> PyResult { + pub fn from_env() -> PyDataFusionResult { Ok(Self { config: ConfigOptions::from_env()?, }) @@ -56,11 +58,10 @@ impl PyConfig { } /// Set a configuration option - pub fn set(&mut self, key: &str, value: PyObject, py: Python) -> PyResult<()> { + pub fn set(&mut self, key: &str, value: PyObject, py: Python) -> PyDataFusionResult<()> { let scalar_value = py_obj_to_scalar_value(py, value); - self.config - .set(key, scalar_value.to_string().as_str()) - .map_err(|e| e.into()) + self.config.set(key, scalar_value.to_string().as_str())?; + Ok(()) } /// Get all configuration options diff --git a/src/context.rs b/src/context.rs index bab7fd42a..f53b15576 100644 --- a/src/context.rs +++ b/src/context.rs @@ -28,16 +28,17 @@ use object_store::ObjectStore; use url::Url; use uuid::Uuid; -use pyo3::exceptions::{PyKeyError, PyNotImplementedError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; use crate::catalog::{PyCatalog, PyTable}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; -use crate::errors::{py_datafusion_err, DataFusionError}; +use crate::errors::{py_datafusion_err, PyDataFusionResult}; use crate::expr::sort_expr::PySortExpr; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; +use crate::sql::exceptions::py_value_err; use crate::sql::logical::PyLogicalPlan; use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; @@ -277,7 +278,7 @@ impl PySessionContext { pub fn new( config: Option, runtime: Option, - ) -> PyResult { + ) -> PyDataFusionResult { let config = if let Some(c) = config { c.config } else { @@ -348,7 +349,7 @@ impl PySessionContext { schema: Option>, file_sort_order: Option>>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let options = ListingOptions::new(Arc::new(ParquetFormat::new())) .with_file_extension(file_extension) .with_table_partition_cols(convert_table_partition_cols(table_partition_cols)?) @@ -365,7 +366,7 @@ impl PySessionContext { None => { let state = self.ctx.state(); let schema = options.infer_schema(&state, &table_path); - wait_for_future(py, schema).map_err(DataFusionError::from)? + wait_for_future(py, schema)? } }; let config = ListingTableConfig::new(table_path) @@ -382,9 +383,9 @@ impl PySessionContext { } /// Returns a PyDataFrame whose plan corresponds to the SQL statement. - pub fn sql(&mut self, query: &str, py: Python) -> PyResult { + pub fn sql(&mut self, query: &str, py: Python) -> PyDataFusionResult { let result = self.ctx.sql(query); - let df = wait_for_future(py, result).map_err(DataFusionError::from)?; + let df = wait_for_future(py, result)?; Ok(PyDataFrame::new(df)) } @@ -394,14 +395,14 @@ impl PySessionContext { query: &str, options: Option, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let options = if let Some(options) = options { options.options } else { SQLOptions::new() }; let result = self.ctx.sql_with_options(query, options); - let df = wait_for_future(py, result).map_err(DataFusionError::from)?; + let df = wait_for_future(py, result)?; Ok(PyDataFrame::new(df)) } @@ -412,14 +413,14 @@ impl PySessionContext { name: Option<&str>, schema: Option>, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let schema = if let Some(schema) = schema { SchemaRef::from(schema.0) } else { partitions.0[0][0].schema() }; - let table = MemTable::try_new(schema, partitions.0).map_err(DataFusionError::from)?; + let table = MemTable::try_new(schema, partitions.0)?; // generate a random (unique) name for this table if none is provided // table name cannot start with numeric digit @@ -433,11 +434,9 @@ impl PySessionContext { } }; - self.ctx - .register_table(&*table_name, Arc::new(table)) - .map_err(DataFusionError::from)?; + self.ctx.register_table(&*table_name, Arc::new(table))?; - let table = wait_for_future(py, self._table(&table_name)).map_err(DataFusionError::from)?; + let table = wait_for_future(py, self._table(&table_name))?; let df = PyDataFrame::new(table); Ok(df) @@ -495,15 +494,14 @@ impl PySessionContext { data: Bound<'_, PyAny>, name: Option<&str>, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let (schema, batches) = if let Ok(stream_reader) = ArrowArrayStreamReader::from_pyarrow_bound(&data) { // Works for any object that implements __arrow_c_stream__ in pycapsule. let schema = stream_reader.schema().as_ref().to_owned(); let batches = stream_reader - .collect::, arrow::error::ArrowError>>() - .map_err(DataFusionError::from)?; + .collect::, arrow::error::ArrowError>>()?; (schema, batches) } else if let Ok(array) = RecordBatch::from_pyarrow_bound(&data) { @@ -512,8 +510,8 @@ impl PySessionContext { (array.schema().as_ref().to_owned(), vec![array]) } else { - return Err(PyTypeError::new_err( - "Expected either a Arrow Array or Arrow Stream in from_arrow().", + return Err(crate::errors::PyDataFusionError::Common( + "Expected either a Arrow Array or Arrow Stream in from_arrow().".to_string(), )); }; @@ -559,17 +557,13 @@ impl PySessionContext { Ok(df) } - pub fn register_table(&mut self, name: &str, table: &PyTable) -> PyResult<()> { - self.ctx - .register_table(name, table.table()) - .map_err(DataFusionError::from)?; + pub fn register_table(&mut self, name: &str, table: &PyTable) -> PyDataFusionResult<()> { + self.ctx.register_table(name, table.table())?; Ok(()) } - pub fn deregister_table(&mut self, name: &str) -> PyResult<()> { - self.ctx - .deregister_table(name) - .map_err(DataFusionError::from)?; + pub fn deregister_table(&mut self, name: &str) -> PyDataFusionResult<()> { + self.ctx.deregister_table(name)?; Ok(()) } @@ -578,10 +572,10 @@ impl PySessionContext { &mut self, name: &str, provider: Bound<'_, PyAny>, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { if provider.hasattr("__datafusion_table_provider__")? { let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; - let capsule = capsule.downcast::()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; validate_pycapsule(capsule, "datafusion_table_provider")?; let provider = unsafe { capsule.reference::() }; @@ -591,8 +585,9 @@ impl PySessionContext { Ok(()) } else { - Err(PyNotImplementedError::new_err( - "__datafusion_table_provider__ does not exist on Table Provider object.", + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_table_provider__ does not exist on Table Provider object." + .to_string(), )) } } @@ -601,12 +596,10 @@ impl PySessionContext { &mut self, name: &str, partitions: PyArrowType>>, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let schema = partitions.0[0][0].schema(); let table = MemTable::try_new(schema, partitions.0)?; - self.ctx - .register_table(name, Arc::new(table)) - .map_err(DataFusionError::from)?; + self.ctx.register_table(name, Arc::new(table))?; Ok(()) } @@ -628,7 +621,7 @@ impl PySessionContext { schema: Option>, file_sort_order: Option>>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let mut options = ParquetReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) .parquet_pruning(parquet_pruning) @@ -642,7 +635,7 @@ impl PySessionContext { .collect(); let result = self.ctx.register_parquet(name, path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; Ok(()) } @@ -666,12 +659,12 @@ impl PySessionContext { file_extension: &str, file_compression_type: Option, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let delimiter = delimiter.as_bytes(); if delimiter.len() != 1 { - return Err(PyValueError::new_err( + return Err(crate::errors::PyDataFusionError::PythonError(py_value_err( "Delimiter must be a single character", - )); + ))); } let mut options = CsvReadOptions::new() @@ -685,11 +678,11 @@ impl PySessionContext { if path.is_instance_of::() { let paths = path.extract::>()?; let result = self.register_csv_from_multiple_paths(name, paths, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; } else { let path = path.extract::()?; let result = self.ctx.register_csv(name, &path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; } Ok(()) @@ -713,7 +706,7 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_compression_type: Option, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; @@ -726,7 +719,7 @@ impl PySessionContext { options.schema = schema.as_ref().map(|x| &x.0); let result = self.ctx.register_json(name, path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; Ok(()) } @@ -745,7 +738,7 @@ impl PySessionContext { file_extension: &str, table_partition_cols: Vec<(String, String)>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; @@ -756,7 +749,7 @@ impl PySessionContext { options.schema = schema.as_ref().map(|x| &x.0); let result = self.ctx.register_avro(name, path, options); - wait_for_future(py, result).map_err(DataFusionError::from)?; + wait_for_future(py, result)?; Ok(()) } @@ -767,12 +760,10 @@ impl PySessionContext { name: &str, dataset: &Bound<'_, PyAny>, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { let table: Arc = Arc::new(Dataset::new(dataset, py)?); - self.ctx - .register_table(name, table) - .map_err(DataFusionError::from)?; + self.ctx.register_table(name, table)?; Ok(()) } @@ -824,11 +815,11 @@ impl PySessionContext { Ok(PyDataFrame::new(x)) } - pub fn table_exist(&self, name: &str) -> PyResult { + pub fn table_exist(&self, name: &str) -> PyDataFusionResult { Ok(self.ctx.table_exist(name)?) } - pub fn empty_table(&self) -> PyResult { + pub fn empty_table(&self) -> PyDataFusionResult { Ok(PyDataFrame::new(self.ctx.read_empty()?)) } @@ -847,7 +838,7 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_compression_type: Option, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let path = path .to_str() .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; @@ -859,10 +850,10 @@ impl PySessionContext { let df = if let Some(schema) = schema { options.schema = Some(&schema.0); let result = self.ctx.read_json(path, options); - wait_for_future(py, result).map_err(DataFusionError::from)? + wait_for_future(py, result)? } else { let result = self.ctx.read_json(path, options); - wait_for_future(py, result).map_err(DataFusionError::from)? + wait_for_future(py, result)? }; Ok(PyDataFrame::new(df)) } @@ -888,12 +879,12 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_compression_type: Option, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let delimiter = delimiter.as_bytes(); if delimiter.len() != 1 { - return Err(PyValueError::new_err( + return Err(crate::errors::PyDataFusionError::PythonError(py_value_err( "Delimiter must be a single character", - )); + ))); }; let mut options = CsvReadOptions::new() @@ -909,12 +900,12 @@ impl PySessionContext { let paths = path.extract::>()?; let paths = paths.iter().map(|p| p as &str).collect::>(); let result = self.ctx.read_csv(paths, options); - let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); + let df = PyDataFrame::new(wait_for_future(py, result)?); Ok(df) } else { let path = path.extract::()?; let result = self.ctx.read_csv(path, options); - let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); + let df = PyDataFrame::new(wait_for_future(py, result)?); Ok(df) } } @@ -938,7 +929,7 @@ impl PySessionContext { schema: Option>, file_sort_order: Option>>, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let mut options = ParquetReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?) .parquet_pruning(parquet_pruning) @@ -952,7 +943,7 @@ impl PySessionContext { .collect(); let result = self.ctx.read_parquet(path, options); - let df = PyDataFrame::new(wait_for_future(py, result).map_err(DataFusionError::from)?); + let df = PyDataFrame::new(wait_for_future(py, result)?); Ok(df) } @@ -965,26 +956,23 @@ impl PySessionContext { table_partition_cols: Vec<(String, String)>, file_extension: &str, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let mut options = AvroReadOptions::default() .table_partition_cols(convert_table_partition_cols(table_partition_cols)?); options.file_extension = file_extension; let df = if let Some(schema) = schema { options.schema = Some(&schema.0); let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future).map_err(DataFusionError::from)? + wait_for_future(py, read_future)? } else { let read_future = self.ctx.read_avro(path, options); - wait_for_future(py, read_future).map_err(DataFusionError::from)? + wait_for_future(py, read_future)? }; Ok(PyDataFrame::new(df)) } - pub fn read_table(&self, table: &PyTable) -> PyResult { - let df = self - .ctx - .read_table(table.table()) - .map_err(DataFusionError::from)?; + pub fn read_table(&self, table: &PyTable) -> PyDataFusionResult { + let df = self.ctx.read_table(table.table())?; Ok(PyDataFrame::new(df)) } @@ -1011,7 +999,7 @@ impl PySessionContext { plan: PyExecutionPlan, part: usize, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let ctx: TaskContext = TaskContext::from(&self.ctx.state()); // create a Tokio runtime to run the async code let rt = &get_tokio_runtime().0; @@ -1071,13 +1059,13 @@ impl PySessionContext { pub fn convert_table_partition_cols( table_partition_cols: Vec<(String, String)>, -) -> Result, DataFusionError> { +) -> PyDataFusionResult> { table_partition_cols .into_iter() .map(|(name, ty)| match ty.as_str() { "string" => Ok((name, DataType::Utf8)), "int" => Ok((name, DataType::Int32)), - _ => Err(DataFusionError::Common(format!( + _ => Err(crate::errors::PyDataFusionError::Common(format!( "Unsupported data type '{ty}' for partition column. Supported types are 'string' and 'int'" ))), }) diff --git a/src/dataframe.rs b/src/dataframe.rs index b875480a7..6fb08ba25 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -33,20 +33,20 @@ use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; -use pyo3::exceptions::{PyTypeError, PyValueError}; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyCapsule, PyTuple, PyTupleMethods}; use tokio::task::JoinHandle; -use crate::errors::py_datafusion_err; +use crate::errors::{py_datafusion_err, PyDataFusionError}; use crate::expr::sort_expr::to_sort_expressions; use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; use crate::{ - errors::DataFusionError, + errors::PyDataFusionResult, expr::{sort_expr::PySortExpr, PyExpr}, }; @@ -69,7 +69,7 @@ impl PyDataFrame { #[pymethods] impl PyDataFrame { /// Enable selection for `df[col]`, `df[col1, col2, col3]`, and `df[[col1, col2, col3]]` - fn __getitem__(&self, key: Bound<'_, PyAny>) -> PyResult { + fn __getitem__(&self, key: Bound<'_, PyAny>) -> PyDataFusionResult { if let Ok(key) = key.extract::() { // df[col] self.select_columns(vec![key]) @@ -84,12 +84,12 @@ impl PyDataFrame { // df[[col1, col2, col3]] self.select_columns(keys) } else { - let message = "DataFrame can only be indexed by string index or indices"; - Err(PyTypeError::new_err(message)) + let message = "DataFrame can only be indexed by string index or indices".to_string(); + Err(PyDataFusionError::Common(message)) } } - fn __repr__(&self, py: Python) -> PyResult { + fn __repr__(&self, py: Python) -> PyDataFusionResult { let df = self.df.as_ref().clone().limit(0, Some(10))?; let batches = wait_for_future(py, df.collect())?; let batches_as_string = pretty::pretty_format_batches(&batches); @@ -99,7 +99,7 @@ impl PyDataFrame { } } - fn _repr_html_(&self, py: Python) -> PyResult { + fn _repr_html_(&self, py: Python) -> PyDataFusionResult { let mut html_str = "\n".to_string(); let df = self.df.as_ref().clone().limit(0, Some(10))?; @@ -145,7 +145,7 @@ impl PyDataFrame { } /// Calculate summary statistics for a DataFrame - fn describe(&self, py: Python) -> PyResult { + fn describe(&self, py: Python) -> PyDataFusionResult { let df = self.df.as_ref().clone(); let stat_df = wait_for_future(py, df.describe())?; Ok(Self::new(stat_df)) @@ -157,37 +157,37 @@ impl PyDataFrame { } #[pyo3(signature = (*args))] - fn select_columns(&self, args: Vec) -> PyResult { + fn select_columns(&self, args: Vec) -> PyDataFusionResult { let args = args.iter().map(|s| s.as_ref()).collect::>(); let df = self.df.as_ref().clone().select_columns(&args)?; Ok(Self::new(df)) } #[pyo3(signature = (*args))] - fn select(&self, args: Vec) -> PyResult { + fn select(&self, args: Vec) -> PyDataFusionResult { let expr = args.into_iter().map(|e| e.into()).collect(); let df = self.df.as_ref().clone().select(expr)?; Ok(Self::new(df)) } #[pyo3(signature = (*args))] - fn drop(&self, args: Vec) -> PyResult { + fn drop(&self, args: Vec) -> PyDataFusionResult { let cols = args.iter().map(|s| s.as_ref()).collect::>(); let df = self.df.as_ref().clone().drop_columns(&cols)?; Ok(Self::new(df)) } - fn filter(&self, predicate: PyExpr) -> PyResult { + fn filter(&self, predicate: PyExpr) -> PyDataFusionResult { let df = self.df.as_ref().clone().filter(predicate.into())?; Ok(Self::new(df)) } - fn with_column(&self, name: &str, expr: PyExpr) -> PyResult { + fn with_column(&self, name: &str, expr: PyExpr) -> PyDataFusionResult { let df = self.df.as_ref().clone().with_column(name, expr.into())?; Ok(Self::new(df)) } - fn with_columns(&self, exprs: Vec) -> PyResult { + fn with_columns(&self, exprs: Vec) -> PyDataFusionResult { let mut df = self.df.as_ref().clone(); for expr in exprs { let expr: Expr = expr.into(); @@ -199,7 +199,7 @@ impl PyDataFrame { /// Rename one column by applying a new projection. This is a no-op if the column to be /// renamed does not exist. - fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyResult { + fn with_column_renamed(&self, old_name: &str, new_name: &str) -> PyDataFusionResult { let df = self .df .as_ref() @@ -208,7 +208,7 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyResult { + fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyDataFusionResult { let group_by = group_by.into_iter().map(|e| e.into()).collect(); let aggs = aggs.into_iter().map(|e| e.into()).collect(); let df = self.df.as_ref().clone().aggregate(group_by, aggs)?; @@ -216,14 +216,14 @@ impl PyDataFrame { } #[pyo3(signature = (*exprs))] - fn sort(&self, exprs: Vec) -> PyResult { + fn sort(&self, exprs: Vec) -> PyDataFusionResult { let exprs = to_sort_expressions(exprs); let df = self.df.as_ref().clone().sort(exprs)?; Ok(Self::new(df)) } #[pyo3(signature = (count, offset=0))] - fn limit(&self, count: usize, offset: usize) -> PyResult { + fn limit(&self, count: usize, offset: usize) -> PyDataFusionResult { let df = self.df.as_ref().clone().limit(offset, Some(count))?; Ok(Self::new(df)) } @@ -232,14 +232,15 @@ impl PyDataFrame { /// Unless some order is specified in the plan, there is no /// guarantee of the order of the result. fn collect(&self, py: Python) -> PyResult> { - let batches = wait_for_future(py, self.df.as_ref().clone().collect())?; + let batches = wait_for_future(py, self.df.as_ref().clone().collect()) + .map_err(PyDataFusionError::from)?; // cannot use PyResult> return type due to // https://github.com/PyO3/pyo3/issues/1813 batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() } /// Cache DataFrame. - fn cache(&self, py: Python) -> PyResult { + fn cache(&self, py: Python) -> PyDataFusionResult { let df = wait_for_future(py, self.df.as_ref().clone().cache())?; Ok(Self::new(df)) } @@ -247,7 +248,8 @@ impl PyDataFrame { /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch /// maintaining the input partitioning. fn collect_partitioned(&self, py: Python) -> PyResult>> { - let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned())?; + let batches = wait_for_future(py, self.df.as_ref().clone().collect_partitioned()) + .map_err(PyDataFusionError::from)?; batches .into_iter() @@ -257,13 +259,13 @@ impl PyDataFrame { /// Print the result, 20 lines by default #[pyo3(signature = (num=20))] - fn show(&self, py: Python, num: usize) -> PyResult<()> { + fn show(&self, py: Python, num: usize) -> PyDataFusionResult<()> { let df = self.df.as_ref().clone().limit(0, Some(num))?; print_dataframe(py, df) } /// Filter out duplicate rows - fn distinct(&self) -> PyResult { + fn distinct(&self) -> PyDataFusionResult { let df = self.df.as_ref().clone().distinct()?; Ok(Self::new(df)) } @@ -274,7 +276,7 @@ impl PyDataFrame { how: &str, left_on: Vec, right_on: Vec, - ) -> PyResult { + ) -> PyDataFusionResult { let join_type = match how { "inner" => JoinType::Inner, "left" => JoinType::Left, @@ -283,10 +285,9 @@ impl PyDataFrame { "semi" => JoinType::LeftSemi, "anti" => JoinType::LeftAnti, how => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "The join type {how} does not exist or is not implemented" - )) - .into()); + ))); } }; @@ -303,7 +304,12 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn join_on(&self, right: PyDataFrame, on_exprs: Vec, how: &str) -> PyResult { + fn join_on( + &self, + right: PyDataFrame, + on_exprs: Vec, + how: &str, + ) -> PyDataFusionResult { let join_type = match how { "inner" => JoinType::Inner, "left" => JoinType::Left, @@ -312,10 +318,9 @@ impl PyDataFrame { "semi" => JoinType::LeftSemi, "anti" => JoinType::LeftAnti, how => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "The join type {how} does not exist or is not implemented" - )) - .into()); + ))); } }; let exprs: Vec = on_exprs.into_iter().map(|e| e.into()).collect(); @@ -330,7 +335,7 @@ impl PyDataFrame { /// Print the query plan #[pyo3(signature = (verbose=false, analyze=false))] - fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { + fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyDataFusionResult<()> { let df = self.df.as_ref().clone().explain(verbose, analyze)?; print_dataframe(py, df) } @@ -341,18 +346,18 @@ impl PyDataFrame { } /// Get the optimized logical plan for this `DataFrame` - fn optimized_logical_plan(&self) -> PyResult { + fn optimized_logical_plan(&self) -> PyDataFusionResult { Ok(self.df.as_ref().clone().into_optimized_plan()?.into()) } /// Get the execution plan for this `DataFrame` - fn execution_plan(&self, py: Python) -> PyResult { + fn execution_plan(&self, py: Python) -> PyDataFusionResult { let plan = wait_for_future(py, self.df.as_ref().clone().create_physical_plan())?; Ok(plan.into()) } /// Repartition a `DataFrame` based on a logical partitioning scheme. - fn repartition(&self, num: usize) -> PyResult { + fn repartition(&self, num: usize) -> PyDataFusionResult { let new_df = self .df .as_ref() @@ -363,7 +368,7 @@ impl PyDataFrame { /// Repartition a `DataFrame` based on a logical partitioning scheme. #[pyo3(signature = (*args, num))] - fn repartition_by_hash(&self, args: Vec, num: usize) -> PyResult { + fn repartition_by_hash(&self, args: Vec, num: usize) -> PyDataFusionResult { let expr = args.into_iter().map(|py_expr| py_expr.into()).collect(); let new_df = self .df @@ -376,7 +381,7 @@ impl PyDataFrame { /// Calculate the union of two `DataFrame`s, preserving duplicate rows.The /// two `DataFrame`s must have exactly the same schema #[pyo3(signature = (py_df, distinct=false))] - fn union(&self, py_df: PyDataFrame, distinct: bool) -> PyResult { + fn union(&self, py_df: PyDataFrame, distinct: bool) -> PyDataFusionResult { let new_df = if distinct { self.df .as_ref() @@ -391,7 +396,7 @@ impl PyDataFrame { /// Calculate the distinct union of two `DataFrame`s. The /// two `DataFrame`s must have exactly the same schema - fn union_distinct(&self, py_df: PyDataFrame) -> PyResult { + fn union_distinct(&self, py_df: PyDataFrame) -> PyDataFusionResult { let new_df = self .df .as_ref() @@ -401,7 +406,7 @@ impl PyDataFrame { } #[pyo3(signature = (column, preserve_nulls=true))] - fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyResult { + fn unnest_column(&self, column: &str, preserve_nulls: bool) -> PyDataFusionResult { // TODO: expose RecursionUnnestOptions // REF: https://github.com/apache/datafusion/pull/11577 let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); @@ -414,7 +419,11 @@ impl PyDataFrame { } #[pyo3(signature = (columns, preserve_nulls=true))] - fn unnest_columns(&self, columns: Vec, preserve_nulls: bool) -> PyResult { + fn unnest_columns( + &self, + columns: Vec, + preserve_nulls: bool, + ) -> PyDataFusionResult { // TODO: expose RecursionUnnestOptions // REF: https://github.com/apache/datafusion/pull/11577 let unnest_options = UnnestOptions::default().with_preserve_nulls(preserve_nulls); @@ -428,7 +437,7 @@ impl PyDataFrame { } /// Calculate the intersection of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema - fn intersect(&self, py_df: PyDataFrame) -> PyResult { + fn intersect(&self, py_df: PyDataFrame) -> PyDataFusionResult { let new_df = self .df .as_ref() @@ -438,13 +447,13 @@ impl PyDataFrame { } /// Calculate the exception of two `DataFrame`s. The two `DataFrame`s must have exactly the same schema - fn except_all(&self, py_df: PyDataFrame) -> PyResult { + fn except_all(&self, py_df: PyDataFrame) -> PyDataFusionResult { let new_df = self.df.as_ref().clone().except(py_df.df.as_ref().clone())?; Ok(Self::new(new_df)) } /// Write a `DataFrame` to a CSV file. - fn write_csv(&self, path: &str, with_header: bool, py: Python) -> PyResult<()> { + fn write_csv(&self, path: &str, with_header: bool, py: Python) -> PyDataFusionResult<()> { let csv_options = CsvOptions { has_header: Some(with_header), ..Default::default() @@ -472,7 +481,7 @@ impl PyDataFrame { compression: &str, compression_level: Option, py: Python, - ) -> PyResult<()> { + ) -> PyDataFusionResult<()> { fn verify_compression_level(cl: Option) -> Result { cl.ok_or(PyValueError::new_err("compression_level is not defined")) } @@ -496,7 +505,7 @@ impl PyDataFrame { "lz4_raw" => Compression::LZ4_RAW, "uncompressed" => Compression::UNCOMPRESSED, _ => { - return Err(PyValueError::new_err(format!( + return Err(PyDataFusionError::Common(format!( "Unrecognized compression type {compression}" ))); } @@ -522,7 +531,7 @@ impl PyDataFrame { } /// Executes a query and writes the results to a partitioned JSON file. - fn write_json(&self, path: &str, py: Python) -> PyResult<()> { + fn write_json(&self, path: &str, py: Python) -> PyDataFusionResult<()> { wait_for_future( py, self.df @@ -551,7 +560,7 @@ impl PyDataFrame { &'py mut self, py: Python<'py>, requested_schema: Option>, - ) -> PyResult> { + ) -> PyDataFusionResult> { let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())?; let mut schema: Schema = self.df.schema().to_owned().into(); @@ -559,15 +568,14 @@ impl PyDataFrame { validate_pycapsule(&schema_capsule, "arrow_schema")?; let schema_ptr = unsafe { schema_capsule.reference::() }; - let desired_schema = Schema::try_from(schema_ptr).map_err(DataFusionError::from)?; + let desired_schema = Schema::try_from(schema_ptr)?; - schema = project_schema(schema, desired_schema).map_err(DataFusionError::ArrowError)?; + schema = project_schema(schema, desired_schema)?; batches = batches .into_iter() .map(|record_batch| record_batch_into_schema(record_batch, &schema)) - .collect::, ArrowError>>() - .map_err(DataFusionError::ArrowError)?; + .collect::, ArrowError>>()?; } let batches_wrapped = batches.into_iter().map(Ok); @@ -578,9 +586,10 @@ impl PyDataFrame { let ffi_stream = FFI_ArrowArrayStream::new(reader); let stream_capsule_name = CString::new("arrow_array_stream").unwrap(); PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name)) + .map_err(PyDataFusionError::from) } - fn execute_stream(&self, py: Python) -> PyResult { + fn execute_stream(&self, py: Python) -> PyDataFusionResult { // create a Tokio runtime to run the async code let rt = &get_tokio_runtime().0; let df = self.df.as_ref().clone(); @@ -647,13 +656,13 @@ impl PyDataFrame { } // Executes this DataFrame to get the total number of rows. - fn count(&self, py: Python) -> PyResult { + fn count(&self, py: Python) -> PyDataFusionResult { Ok(wait_for_future(py, self.df.as_ref().clone().count())?) } } /// Print DataFrame -fn print_dataframe(py: Python, df: DataFrame) -> PyResult<()> { +fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> { // Get string representation of record batches let batches = wait_for_future(py, df.collect())?; let batches_as_string = pretty::pretty_format_batches(&batches); diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 9d2559429..ace42115b 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -42,7 +42,7 @@ use datafusion::physical_plan::{ SendableRecordBatchStream, Statistics, }; -use crate::errors::DataFusionError; +use crate::errors::PyDataFusionResult; use crate::pyarrow_filter_expression::PyArrowFilterExpression; struct PyArrowBatchesAdapter { @@ -83,8 +83,8 @@ impl DatasetExec { dataset: &Bound<'_, PyAny>, projection: Option>, filters: &[Expr], - ) -> Result { - let columns: Option, DataFusionError>> = projection.map(|p| { + ) -> PyDataFusionResult { + let columns: Option>> = projection.map(|p| { p.iter() .map(|index| { let name: String = dataset diff --git a/src/errors.rs b/src/errors.rs index d12b6ade1..b02b754a2 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -24,10 +24,10 @@ use datafusion::error::DataFusionError as InnerDataFusionError; use prost::EncodeError; use pyo3::{exceptions::PyException, PyErr}; -pub type Result = std::result::Result; +pub type PyDataFusionResult = std::result::Result; #[derive(Debug)] -pub enum DataFusionError { +pub enum PyDataFusionError { ExecutionError(InnerDataFusionError), ArrowError(ArrowError), Common(String), @@ -35,46 +35,46 @@ pub enum DataFusionError { EncodeError(EncodeError), } -impl fmt::Display for DataFusionError { +impl fmt::Display for PyDataFusionError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e:?}"), - DataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), - DataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), - DataFusionError::Common(e) => write!(f, "{e}"), - DataFusionError::EncodeError(e) => write!(f, "Failed to encode substrait plan: {e}"), + PyDataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e:?}"), + PyDataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), + PyDataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), + PyDataFusionError::Common(e) => write!(f, "{e}"), + PyDataFusionError::EncodeError(e) => write!(f, "Failed to encode substrait plan: {e}"), } } } -impl From for DataFusionError { - fn from(err: ArrowError) -> DataFusionError { - DataFusionError::ArrowError(err) +impl From for PyDataFusionError { + fn from(err: ArrowError) -> PyDataFusionError { + PyDataFusionError::ArrowError(err) } } -impl From for DataFusionError { - fn from(err: InnerDataFusionError) -> DataFusionError { - DataFusionError::ExecutionError(err) +impl From for PyDataFusionError { + fn from(err: InnerDataFusionError) -> PyDataFusionError { + PyDataFusionError::ExecutionError(err) } } -impl From for DataFusionError { - fn from(err: PyErr) -> DataFusionError { - DataFusionError::PythonError(err) +impl From for PyDataFusionError { + fn from(err: PyErr) -> PyDataFusionError { + PyDataFusionError::PythonError(err) } } -impl From for PyErr { - fn from(err: DataFusionError) -> PyErr { +impl From for PyErr { + fn from(err: PyDataFusionError) -> PyErr { match err { - DataFusionError::PythonError(py_err) => py_err, + PyDataFusionError::PythonError(py_err) => py_err, _ => PyException::new_err(err.to_string()), } } } -impl Error for DataFusionError {} +impl Error for PyDataFusionError {} pub fn py_type_err(e: impl Debug) -> PyErr { PyErr::new::(format!("{e:?}")) diff --git a/src/expr.rs b/src/expr.rs index bca0cd3fa..1e9983d42 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -24,7 +24,6 @@ use std::convert::{From, Into}; use std::sync::Arc; use window::PyWindowFrame; -use arrow::pyarrow::ToPyArrow; use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::functions::core::expr_ext::FieldAccessor; @@ -33,15 +32,17 @@ use datafusion::logical_expr::{ expr::{AggregateFunction, InList, InSubquery, ScalarFunction, WindowFunction}, lit, Between, BinaryExpr, Case, Cast, Expr, Like, Operator, TryCast, }; -use datafusion::scalar::ScalarValue; -use crate::common::data_type::{DataTypeMap, NullTreatment, RexType}; -use crate::errors::{py_runtime_err, py_type_err, py_unsupported_variant_err, DataFusionError}; +use crate::common::data_type::{DataTypeMap, NullTreatment, PyScalarValue, RexType}; +use crate::errors::{ + py_runtime_err, py_type_err, py_unsupported_variant_err, PyDataFusionError, PyDataFusionResult, +}; use crate::expr::aggregate_expr::PyAggregateFunction; use crate::expr::binary_expr::PyBinaryExpr; use crate::expr::column::PyColumn; use crate::expr::literal::PyLiteral; use crate::functions::add_builder_fns_to_window; +use crate::pyarrow_util::scalar_to_pyarrow; use crate::sql::logical::PyLogicalPlan; use self::alias::PyAlias; @@ -261,8 +262,8 @@ impl PyExpr { } #[staticmethod] - pub fn literal(value: ScalarValue) -> PyExpr { - lit(value).into() + pub fn literal(value: PyScalarValue) -> PyExpr { + lit(value.0).into() } #[staticmethod] @@ -356,7 +357,7 @@ impl PyExpr { /// Extracts the Expr value into a PyObject that can be shared with Python pub fn python_value(&self, py: Python) -> PyResult { match &self.expr { - Expr::Literal(scalar_value) => Ok(scalar_value.to_pyarrow(py)?), + Expr::Literal(scalar_value) => scalar_to_pyarrow(scalar_value, py), _ => Err(py_type_err(format!( "Non Expr::Literal encountered in types: {:?}", &self.expr @@ -568,7 +569,7 @@ impl PyExpr { window_frame: Option, order_by: Option>, null_treatment: Option, - ) -> PyResult { + ) -> PyDataFusionResult { match &self.expr { Expr::AggregateFunction(agg_fn) => { let window_fn = Expr::WindowFunction(WindowFunction::new( @@ -592,10 +593,9 @@ impl PyExpr { null_treatment, ), _ => Err( - DataFusionError::ExecutionError(datafusion::error::DataFusionError::Plan( + PyDataFusionError::ExecutionError(datafusion::error::DataFusionError::Plan( format!("Using {} with `over` is not allowed. Must use an aggregate or window function.", self.expr.variant_name()), )) - .into(), ), } } @@ -649,34 +649,26 @@ impl PyExprFuncBuilder { .into() } - pub fn build(&self) -> PyResult { - self.builder - .clone() - .build() - .map(|expr| expr.into()) - .map_err(|err| err.into()) + pub fn build(&self) -> PyDataFusionResult { + Ok(self.builder.clone().build().map(|expr| expr.into())?) } } impl PyExpr { - pub fn _column_name(&self, plan: &LogicalPlan) -> Result { + pub fn _column_name(&self, plan: &LogicalPlan) -> PyDataFusionResult { let field = Self::expr_to_field(&self.expr, plan)?; Ok(field.name().to_owned()) } /// Create a [Field] representing an [Expr], given an input [LogicalPlan] to resolve against - pub fn expr_to_field( - expr: &Expr, - input_plan: &LogicalPlan, - ) -> Result, DataFusionError> { + pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> PyDataFusionResult> { match expr { Expr::Wildcard { .. } => { // Since * could be any of the valid column names just return the first one Ok(Arc::new(input_plan.schema().field(0).clone())) } _ => { - let fields = - exprlist_to_fields(&[expr.clone()], input_plan).map_err(PyErr::from)?; + let fields = exprlist_to_fields(&[expr.clone()], input_plan)?; Ok(fields[0].1.clone()) } } diff --git a/src/expr/conditional_expr.rs b/src/expr/conditional_expr.rs index a8a885c54..fe3af2e25 100644 --- a/src/expr/conditional_expr.rs +++ b/src/expr/conditional_expr.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::expr::PyExpr; +use crate::{errors::PyDataFusionResult, expr::PyExpr}; use datafusion::logical_expr::conditional_expressions::CaseBuilder; use pyo3::prelude::*; @@ -44,11 +44,11 @@ impl PyCaseBuilder { } } - fn otherwise(&mut self, else_expr: PyExpr) -> PyResult { + fn otherwise(&mut self, else_expr: PyExpr) -> PyDataFusionResult { Ok(self.case_builder.otherwise(else_expr.expr)?.clone().into()) } - fn end(&mut self) -> PyResult { + fn end(&mut self) -> PyDataFusionResult { Ok(self.case_builder.end()?.clone().into()) } } diff --git a/src/expr/literal.rs b/src/expr/literal.rs index 43084ba4b..2cb2079f1 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::errors::DataFusionError; +use crate::errors::PyDataFusionError; use datafusion::common::ScalarValue; use pyo3::prelude::*; @@ -154,5 +154,5 @@ impl PyLiteral { } fn unexpected_literal_value(value: &ScalarValue) -> PyErr { - DataFusionError::Common(format!("getValue() - Unexpected value: {value}")).into() + PyDataFusionError::Common(format!("getValue() - Unexpected value: {value}")).into() } diff --git a/src/expr/window.rs b/src/expr/window.rs index 6486dbb32..4dc6cb9c9 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -21,8 +21,9 @@ use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, Wind use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; +use crate::common::data_type::PyScalarValue; use crate::common::df_schema::PyDFSchema; -use crate::errors::py_type_err; +use crate::errors::{py_type_err, PyDataFusionResult}; use crate::expr::logical_node::LogicalNode; use crate::expr::sort_expr::{py_sort_expr_list, PySortExpr}; use crate::expr::PyExpr; @@ -171,8 +172,8 @@ impl PyWindowFrame { #[pyo3(signature=(unit, start_bound, end_bound))] pub fn new( unit: &str, - start_bound: Option, - end_bound: Option, + start_bound: Option, + end_bound: Option, ) -> PyResult { let units = unit.to_ascii_lowercase(); let units = match units.as_str() { @@ -187,7 +188,7 @@ impl PyWindowFrame { } }; let start_bound = match start_bound { - Some(start_bound) => WindowFrameBound::Preceding(start_bound), + Some(start_bound) => WindowFrameBound::Preceding(start_bound.0), None => match units { WindowFrameUnits::Range => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), @@ -200,7 +201,7 @@ impl PyWindowFrame { }, }; let end_bound = match end_bound { - Some(end_bound) => WindowFrameBound::Following(end_bound), + Some(end_bound) => WindowFrameBound::Following(end_bound.0), None => match units { WindowFrameUnits::Rows => WindowFrameBound::Following(ScalarValue::UInt64(None)), WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), @@ -253,7 +254,7 @@ impl PyWindowFrameBound { matches!(self.frame_bound, WindowFrameBound::Following(_)) } /// Returns the offset of the window frame - pub fn get_offset(&self) -> PyResult> { + pub fn get_offset(&self) -> PyDataFusionResult> { match &self.frame_bound { WindowFrameBound::Preceding(val) | WindowFrameBound::Following(val) => match val { x if x.is_null() => Ok(None), diff --git a/src/functions.rs b/src/functions.rs index ae032d702..46c748cf8 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -22,8 +22,10 @@ use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; use crate::common::data_type::NullTreatment; +use crate::common::data_type::PyScalarValue; use crate::context::PySessionContext; -use crate::errors::DataFusionError; +use crate::errors::PyDataFusionError; +use crate::errors::PyDataFusionResult; use crate::expr::conditional_expr::PyCaseBuilder; use crate::expr::sort_expr::to_sort_expressions; use crate::expr::sort_expr::PySortExpr; @@ -44,7 +46,7 @@ fn add_builder_fns_to_aggregate( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { // Since ExprFuncBuilder::new() is private, we can guarantee initializing // a builder with an `null_treatment` with option None let mut builder = agg_fn.null_treatment(None); @@ -228,7 +230,10 @@ fn when(when: PyExpr, then: PyExpr) -> PyResult { /// 1) If no function has been found, search default aggregate functions. /// /// NOTE: we search the built-ins first because the `UDAF` versions currently do not have the same behavior. -fn find_window_fn(name: &str, ctx: Option) -> PyResult { +fn find_window_fn( + name: &str, + ctx: Option, +) -> PyDataFusionResult { if let Some(ctx) = ctx { // search UDAFs let udaf = ctx @@ -284,7 +289,9 @@ fn find_window_fn(name: &str, ctx: Option) -> PyResult, order_by: Option>, null_treatment: Option - ) -> PyResult { + ) -> PyDataFusionResult { let agg_fn = functions_aggregate::expr_fn::$NAME($($arg.into()),*); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) @@ -362,7 +369,7 @@ macro_rules! aggregate_function_vec_args { filter: Option, order_by: Option>, null_treatment: Option - ) -> PyResult { + ) -> PyDataFusionResult { let agg_fn = functions_aggregate::expr_fn::$NAME(vec![$($arg.into()),*]); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) @@ -642,7 +649,7 @@ pub fn approx_percentile_cont( percentile: f64, num_centroids: Option, // enforces optional arguments at the end, currently filter: Option, -) -> PyResult { +) -> PyDataFusionResult { let args = if let Some(num_centroids) = num_centroids { vec![expression.expr, lit(percentile), lit(num_centroids)] } else { @@ -661,7 +668,7 @@ pub fn approx_percentile_cont_with_weight( weight: PyExpr, percentile: f64, filter: Option, -) -> PyResult { +) -> PyDataFusionResult { let agg_fn = functions_aggregate::expr_fn::approx_percentile_cont_with_weight( expression.expr, weight.expr, @@ -683,7 +690,7 @@ pub fn first_value( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { // If we initialize the UDAF with order_by directly, then it gets over-written by the builder let agg_fn = functions_aggregate::expr_fn::first_value(expr.expr, None); @@ -700,7 +707,7 @@ pub fn nth_value( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { let agg_fn = datafusion::functions_aggregate::nth_value::nth_value(expr.expr, n, vec![]); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) } @@ -715,7 +722,7 @@ pub fn string_agg( filter: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { let agg_fn = datafusion::functions_aggregate::string_agg::string_agg(expr.expr, lit(delimiter)); add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) } @@ -726,7 +733,7 @@ pub(crate) fn add_builder_fns_to_window( window_frame: Option, order_by: Option>, null_treatment: Option, -) -> PyResult { +) -> PyDataFusionResult { let null_treatment = null_treatment.map(|n| n.into()); let mut builder = window_fn.null_treatment(null_treatment); @@ -748,7 +755,7 @@ pub(crate) fn add_builder_fns_to_window( builder = builder.window_frame(window_frame.into()); } - builder.build().map(|e| e.into()).map_err(|err| err.into()) + Ok(builder.build().map(|e| e.into())?) } #[pyfunction] @@ -756,10 +763,11 @@ pub(crate) fn add_builder_fns_to_window( pub fn lead( arg: PyExpr, shift_offset: i64, - default_value: Option, + default_value: Option, partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { + let default_value = default_value.map(|v| v.into()); let window_fn = functions_window::expr_fn::lead(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -770,10 +778,11 @@ pub fn lead( pub fn lag( arg: PyExpr, shift_offset: i64, - default_value: Option, + default_value: Option, partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { + let default_value = default_value.map(|v| v.into()); let window_fn = functions_window::expr_fn::lag(arg.expr, Some(shift_offset), default_value); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -784,7 +793,7 @@ pub fn lag( pub fn row_number( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::row_number(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -795,7 +804,7 @@ pub fn row_number( pub fn rank( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -806,7 +815,7 @@ pub fn rank( pub fn dense_rank( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::dense_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -817,7 +826,7 @@ pub fn dense_rank( pub fn percent_rank( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::percent_rank(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -828,7 +837,7 @@ pub fn percent_rank( pub fn cume_dist( partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::cume_dist(); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) @@ -840,7 +849,7 @@ pub fn ntile( arg: PyExpr, partition_by: Option>, order_by: Option>, -) -> PyResult { +) -> PyDataFusionResult { let window_fn = functions_window::expr_fn::ntile(arg.into()); add_builder_fns_to_window(window_fn, partition_by, None, order_by, None) diff --git a/src/lib.rs b/src/lib.rs index 1111d5d06..317c3a49a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,6 +48,7 @@ pub mod expr; mod functions; pub mod physical_plan; mod pyarrow_filter_expression; +pub mod pyarrow_util; mod record_batch; pub mod sql; pub mod store; diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 9ef2f0ebb..295908dc7 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyBytes}; -use crate::{context::PySessionContext, errors::DataFusionError}; +use crate::{context::PySessionContext, errors::PyDataFusionResult}; #[pyclass(name = "ExecutionPlan", module = "datafusion", subclass)] #[derive(Debug, Clone)] @@ -58,7 +58,7 @@ impl PyExecutionPlan { format!("{}", d.indent(false)) } - pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyResult> { + pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyDataFusionResult> { let codec = DefaultPhysicalExtensionCodec {}; let proto = datafusion_proto::protobuf::PhysicalPlanNode::try_from_physical_plan( self.plan.clone(), @@ -70,7 +70,10 @@ impl PyExecutionPlan { } #[staticmethod] - pub fn from_proto(ctx: PySessionContext, proto_msg: Bound<'_, PyBytes>) -> PyResult { + pub fn from_proto( + ctx: PySessionContext, + proto_msg: Bound<'_, PyBytes>, + ) -> PyDataFusionResult { let bytes: &[u8] = proto_msg.extract()?; let proto_plan = datafusion_proto::protobuf::PhysicalPlanNode::decode(bytes).map_err(|e| { @@ -81,9 +84,7 @@ impl PyExecutionPlan { })?; let codec = DefaultPhysicalExtensionCodec {}; - let plan = proto_plan - .try_into_physical_plan(&ctx.ctx, &ctx.ctx.runtime_env(), &codec) - .map_err(DataFusionError::from)?; + let plan = proto_plan.try_into_physical_plan(&ctx.ctx, &ctx.ctx.runtime_env(), &codec)?; Ok(Self::new(plan)) } diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index 0f97ea442..314eebf4f 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -21,11 +21,11 @@ use pyo3::prelude::*; use std::convert::TryFrom; use std::result::Result; -use arrow::pyarrow::ToPyArrow; use datafusion::common::{Column, ScalarValue}; use datafusion::logical_expr::{expr::InList, Between, BinaryExpr, Expr, Operator}; -use crate::errors::DataFusionError; +use crate::errors::{PyDataFusionError, PyDataFusionResult}; +use crate::pyarrow_util::scalar_to_pyarrow; #[derive(Debug)] #[repr(transparent)] @@ -34,7 +34,7 @@ pub(crate) struct PyArrowFilterExpression(PyObject); fn operator_to_py<'py>( operator: &Operator, op: &Bound<'py, PyModule>, -) -> Result, DataFusionError> { +) -> PyDataFusionResult> { let py_op: Bound<'_, PyAny> = match operator { Operator::Eq => op.getattr("eq")?, Operator::NotEq => op.getattr("ne")?, @@ -45,7 +45,7 @@ fn operator_to_py<'py>( Operator::And => op.getattr("and_")?, Operator::Or => op.getattr("or_")?, _ => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "Unsupported operator {operator:?}" ))) } @@ -53,8 +53,8 @@ fn operator_to_py<'py>( Ok(py_op) } -fn extract_scalar_list(exprs: &[Expr], py: Python) -> Result, DataFusionError> { - let ret: Result, DataFusionError> = exprs +fn extract_scalar_list(exprs: &[Expr], py: Python) -> PyDataFusionResult> { + let ret = exprs .iter() .map(|expr| match expr { // TODO: should we also leverage `ScalarValue::to_pyarrow` here? @@ -71,11 +71,11 @@ fn extract_scalar_list(exprs: &[Expr], py: Python) -> Result, Data ScalarValue::Float32(Some(f)) => Ok(f.into_py(py)), ScalarValue::Float64(Some(f)) => Ok(f.into_py(py)), ScalarValue::Utf8(Some(s)) => Ok(s.into_py(py)), - _ => Err(DataFusionError::Common(format!( + _ => Err(PyDataFusionError::Common(format!( "PyArrow can't handle ScalarValue: {v:?}" ))), }, - _ => Err(DataFusionError::Common(format!( + _ => Err(PyDataFusionError::Common(format!( "Only a list of Literals are supported got {expr:?}" ))), }) @@ -90,7 +90,7 @@ impl PyArrowFilterExpression { } impl TryFrom<&Expr> for PyArrowFilterExpression { - type Error = DataFusionError; + type Error = PyDataFusionError; // Converts a Datafusion filter Expr into an expression string that can be evaluated by Python // Note that pyarrow.compute.{field,scalar} are put into Python globals() when evaluated @@ -100,9 +100,9 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { Python::with_gil(|py| { let pc = Python::import_bound(py, "pyarrow.compute")?; let op_module = Python::import_bound(py, "operator")?; - let pc_expr: Result, DataFusionError> = match expr { + let pc_expr: PyDataFusionResult> = match expr { Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?), - Expr::Literal(scalar) => Ok(scalar.to_pyarrow(py)?.into_bound(py)), + Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)), Expr::BinaryExpr(BinaryExpr { left, op, right }) => { let operator = operator_to_py(op, &op_module)?; let left = PyArrowFilterExpression::try_from(left.as_ref())?.0; @@ -167,7 +167,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { Ok(if *negated { invert.call1((ret,))? } else { ret }) } - _ => Err(DataFusionError::Common(format!( + _ => Err(PyDataFusionError::Common(format!( "Unsupported Datafusion expression {expr:?}" ))), }; diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs new file mode 100644 index 000000000..2b31467f8 --- /dev/null +++ b/src/pyarrow_util.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Conversions between PyArrow and DataFusion types + +use arrow::array::{Array, ArrayData}; +use arrow::pyarrow::{FromPyArrow, ToPyArrow}; +use datafusion::scalar::ScalarValue; +use pyo3::types::{PyAnyMethods, PyList}; +use pyo3::{Bound, FromPyObject, PyAny, PyObject, PyResult, Python}; + +use crate::common::data_type::PyScalarValue; +use crate::errors::PyDataFusionError; + +impl FromPyArrow for PyScalarValue { + fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> PyResult { + let py = value.py(); + let typ = value.getattr("type")?; + let val = value.call_method0("as_py")?; + + // construct pyarrow array from the python value and pyarrow type + let factory = py.import_bound("pyarrow")?.getattr("array")?; + let args = PyList::new_bound(py, [val]); + let array = factory.call1((args, typ))?; + + // convert the pyarrow array to rust array using C data interface + let array = arrow::array::make_array(ArrayData::from_pyarrow_bound(&array)?); + let scalar = ScalarValue::try_from_array(&array, 0).map_err(PyDataFusionError::from)?; + + Ok(PyScalarValue(scalar)) + } +} + +impl<'source> FromPyObject<'source> for PyScalarValue { + fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult { + Self::from_pyarrow_bound(value) + } +} + +pub fn scalar_to_pyarrow(scalar: &ScalarValue, py: Python) -> PyResult { + let array = scalar.to_array().map_err(PyDataFusionError::from)?; + // convert to pyarrow array using C data interface + let pyarray = array.to_data().to_pyarrow(py)?; + let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?; + + Ok(pyscalar) +} diff --git a/src/record_batch.rs b/src/record_batch.rs index eacdb5867..ec61c263f 100644 --- a/src/record_batch.rs +++ b/src/record_batch.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::errors::PyDataFusionError; use crate::utils::wait_for_future; use datafusion::arrow::pyarrow::ToPyArrow; use datafusion::arrow::record_batch::RecordBatch; @@ -90,7 +91,7 @@ async fn next_stream( let mut stream = stream.lock().await; match stream.next().await { Some(Ok(batch)) => Ok(batch.into()), - Some(Err(e)) => Err(e.into()), + Some(Err(e)) => Err(PyDataFusionError::from(e))?, None => { // Depending on whether the iteration is sync or not, we raise either a // StopIteration or a StopAsyncIteration diff --git a/src/sql/exceptions.rs b/src/sql/exceptions.rs index c458402a0..cfb02274b 100644 --- a/src/sql/exceptions.rs +++ b/src/sql/exceptions.rs @@ -17,13 +17,7 @@ use std::fmt::{Debug, Display}; -use pyo3::{create_exception, PyErr}; - -// Identifies exceptions that occur while attempting to generate a `LogicalPlan` from a SQL string -create_exception!(rust, ParsingException, pyo3::exceptions::PyException); - -// Identifies exceptions that occur during attempts to optimization an existing `LogicalPlan` -create_exception!(rust, OptimizationException, pyo3::exceptions::PyException); +use pyo3::PyErr; pub fn py_type_err(e: impl Debug + Display) -> PyErr { PyErr::new::(format!("{e}")) @@ -33,10 +27,6 @@ pub fn py_runtime_err(e: impl Debug + Display) -> PyErr { PyErr::new::(format!("{e}")) } -pub fn py_parsing_exp(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) -} - -pub fn py_optimization_exp(e: impl Debug + Display) -> PyErr { - PyErr::new::(format!("{e}")) +pub fn py_value_err(e: impl Debug + Display) -> PyErr { + PyErr::new::(format!("{e}")) } diff --git a/src/sql/logical.rs b/src/sql/logical.rs index a541889c7..1be33b75f 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::errors::PyDataFusionResult; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; use crate::expr::distinct::PyDistinct; @@ -34,7 +35,7 @@ use crate::expr::table_scan::PyTableScan; use crate::expr::unnest::PyUnnest; use crate::expr::window::PyWindowExpr; use crate::{context::PySessionContext, errors::py_unsupported_variant_err}; -use datafusion::{error::DataFusionError, logical_expr::LogicalPlan}; +use datafusion::logical_expr::LogicalPlan; use datafusion_proto::logical_plan::{AsLogicalPlan, DefaultLogicalExtensionCodec}; use prost::Message; use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyBytes}; @@ -125,7 +126,7 @@ impl PyLogicalPlan { format!("{}", self.plan.display_graphviz()) } - pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyResult> { + pub fn to_proto<'py>(&'py self, py: Python<'py>) -> PyDataFusionResult> { let codec = DefaultLogicalExtensionCodec {}; let proto = datafusion_proto::protobuf::LogicalPlanNode::try_from_logical_plan(&self.plan, &codec)?; @@ -135,7 +136,10 @@ impl PyLogicalPlan { } #[staticmethod] - pub fn from_proto(ctx: PySessionContext, proto_msg: Bound<'_, PyBytes>) -> PyResult { + pub fn from_proto( + ctx: PySessionContext, + proto_msg: Bound<'_, PyBytes>, + ) -> PyDataFusionResult { let bytes: &[u8] = proto_msg.extract()?; let proto_plan = datafusion_proto::protobuf::LogicalPlanNode::decode(bytes).map_err(|e| { @@ -146,9 +150,7 @@ impl PyLogicalPlan { })?; let codec = DefaultLogicalExtensionCodec {}; - let plan = proto_plan - .try_into_logical_plan(&ctx.ctx, &codec) - .map_err(DataFusionError::from)?; + let plan = proto_plan.try_into_logical_plan(&ctx.ctx, &codec)?; Ok(Self::new(plan)) } } diff --git a/src/substrait.rs b/src/substrait.rs index 16e8c9507..8dcf3e8a7 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -18,7 +18,7 @@ use pyo3::{prelude::*, types::PyBytes}; use crate::context::PySessionContext; -use crate::errors::{py_datafusion_err, DataFusionError}; +use crate::errors::{py_datafusion_err, PyDataFusionError, PyDataFusionResult}; use crate::sql::logical::PyLogicalPlan; use crate::utils::wait_for_future; @@ -39,7 +39,7 @@ impl PyPlan { let mut proto_bytes = Vec::::new(); self.plan .encode(&mut proto_bytes) - .map_err(DataFusionError::EncodeError)?; + .map_err(PyDataFusionError::EncodeError)?; Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) } } @@ -66,41 +66,47 @@ pub struct PySubstraitSerializer; #[pymethods] impl PySubstraitSerializer { #[staticmethod] - pub fn serialize(sql: &str, ctx: PySessionContext, path: &str, py: Python) -> PyResult<()> { - wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path)) - .map_err(DataFusionError::from)?; + pub fn serialize( + sql: &str, + ctx: PySessionContext, + path: &str, + py: Python, + ) -> PyDataFusionResult<()> { + wait_for_future(py, serializer::serialize(sql, &ctx.ctx, path))?; Ok(()) } #[staticmethod] - pub fn serialize_to_plan(sql: &str, ctx: PySessionContext, py: Python) -> PyResult { - match PySubstraitSerializer::serialize_bytes(sql, ctx, py) { - Ok(proto_bytes) => { - let proto_bytes = proto_bytes.bind(py).downcast::().unwrap(); - PySubstraitSerializer::deserialize_bytes(proto_bytes.as_bytes().to_vec(), py) - } - Err(e) => Err(py_datafusion_err(e)), - } + pub fn serialize_to_plan( + sql: &str, + ctx: PySessionContext, + py: Python, + ) -> PyDataFusionResult { + PySubstraitSerializer::serialize_bytes(sql, ctx, py).and_then(|proto_bytes| { + let proto_bytes = proto_bytes.bind(py).downcast::().unwrap(); + PySubstraitSerializer::deserialize_bytes(proto_bytes.as_bytes().to_vec(), py) + }) } #[staticmethod] - pub fn serialize_bytes(sql: &str, ctx: PySessionContext, py: Python) -> PyResult { - let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx)) - .map_err(DataFusionError::from)?; + pub fn serialize_bytes( + sql: &str, + ctx: PySessionContext, + py: Python, + ) -> PyDataFusionResult { + let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?; Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) } #[staticmethod] - pub fn deserialize(path: &str, py: Python) -> PyResult { - let plan = - wait_for_future(py, serializer::deserialize(path)).map_err(DataFusionError::from)?; + pub fn deserialize(path: &str, py: Python) -> PyDataFusionResult { + let plan = wait_for_future(py, serializer::deserialize(path))?; Ok(PyPlan { plan: *plan }) } #[staticmethod] - pub fn deserialize_bytes(proto_bytes: Vec, py: Python) -> PyResult { - let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes)) - .map_err(DataFusionError::from)?; + pub fn deserialize_bytes(proto_bytes: Vec, py: Python) -> PyDataFusionResult { + let plan = wait_for_future(py, serializer::deserialize_bytes(proto_bytes))?; Ok(PyPlan { plan: *plan }) } } @@ -134,10 +140,10 @@ impl PySubstraitConsumer { ctx: &mut PySessionContext, plan: PyPlan, py: Python, - ) -> PyResult { + ) -> PyDataFusionResult { let session_state = ctx.ctx.state(); let result = consumer::from_substrait_plan(&session_state, &plan.plan); - let logical_plan = wait_for_future(py, result).map_err(DataFusionError::from)?; + let logical_plan = wait_for_future(py, result)?; Ok(PyLogicalPlan::new(logical_plan)) } } diff --git a/src/udaf.rs b/src/udaf.rs index a6aa59ac3..5f21533e0 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -28,6 +28,7 @@ use datafusion::logical_expr::{ create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, }; +use crate::common::data_type::PyScalarValue; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -44,13 +45,25 @@ impl RustAccumulator { impl Accumulator for RustAccumulator { fn state(&mut self) -> Result> { - Python::with_gil(|py| self.accum.bind(py).call_method0("state")?.extract()) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) + Python::with_gil(|py| { + self.accum + .bind(py) + .call_method0("state")? + .extract::>() + }) + .map(|v| v.into_iter().map(|x| x.0).collect()) + .map_err(|e| DataFusionError::Execution(format!("{e}"))) } fn evaluate(&mut self) -> Result { - Python::with_gil(|py| self.accum.bind(py).call_method0("evaluate")?.extract()) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) + Python::with_gil(|py| { + self.accum + .bind(py) + .call_method0("evaluate")? + .extract::() + }) + .map(|v| v.0) + .map_err(|e| DataFusionError::Execution(format!("{e}"))) } fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { diff --git a/src/udwf.rs b/src/udwf.rs index 689eb79e3..04a4a1640 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -26,6 +26,7 @@ use datafusion::scalar::ScalarValue; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use crate::common::data_type::PyScalarValue; use crate::expr::PyExpr; use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; @@ -133,7 +134,8 @@ impl PartitionEvaluator for RustPartitionEvaluator { self.evaluator .bind(py) .call_method1("evaluate", py_args) - .and_then(|v| v.extract()) + .and_then(|v| v.extract::()) + .map(|v| v.0) .map_err(|e| DataFusionError::Execution(format!("{e}"))) }) } diff --git a/src/utils.rs b/src/utils.rs index 795589752..ed224b364 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::errors::DataFusionError; +use crate::errors::{PyDataFusionError, PyDataFusionResult}; use crate::TokioRuntime; use datafusion::logical_expr::Volatility; use pyo3::exceptions::PyValueError; @@ -47,13 +47,13 @@ where py.allow_threads(|| runtime.block_on(f)) } -pub(crate) fn parse_volatility(value: &str) -> Result { +pub(crate) fn parse_volatility(value: &str) -> PyDataFusionResult { Ok(match value { "immutable" => Volatility::Immutable, "stable" => Volatility::Stable, "volatile" => Volatility::Volatile, value => { - return Err(DataFusionError::Common(format!( + return Err(PyDataFusionError::Common(format!( "Unsupportad volatility type: `{value}`, supported \ values are: immutable, stable and volatile." ))) From d3c4dabe3c24d419911106bdde3dfe1244e1224c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 09:42:03 -0500 Subject: [PATCH 005/206] Fix verify-release-candidate script by removing reference to requirements-310.txt (#1012) * Fix verify-release-candidate script by removing requirements.txt * Update dev/release/verify-release-candidate.sh Co-authored-by: Kevin Liu --------- Co-authored-by: Kevin Liu --- dev/release/verify-release-candidate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 1a9104b55..2bfce0e2d 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -128,7 +128,7 @@ test_source_distribution() { python3 -m venv .venv source .venv/bin/activate python3 -m pip install -U pip - python3 -m pip install -r requirements-310.txt + python3 -m pip install -U maturin maturin develop #TODO: we should really run tests here as well From 93ac6a820353b3ddea014be1eddad8bd004b0fce Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 7 Feb 2025 10:39:51 -0500 Subject: [PATCH 006/206] Prepare release 44.0.0 (#1009) --- Cargo.lock | 2 +- Cargo.toml | 2 +- dev/changelog/44.0.0.md | 58 ++++++++ pyproject.toml | 1 + uv.lock | 301 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 dev/changelog/44.0.0.md diff --git a/Cargo.lock b/Cargo.lock index c6590fd21..50809696b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1327,7 +1327,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "43.0.0" +version = "44.0.0" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 003ba36e5..44e6e2244 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "43.0.0" +version = "44.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/44.0.0.md b/dev/changelog/44.0.0.md new file mode 100644 index 000000000..c5ed4bdb0 --- /dev/null +++ b/dev/changelog/44.0.0.md @@ -0,0 +1,58 @@ + + +# Apache DataFusion Python 44.0.0 Changelog + +This release consists of 12 commits from 5 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: support enable_url_table config [#980](https://github.com/apache/datafusion-python/pull/980) (chenkovsky) +- feat: remove DataFusion pyarrow feat [#1000](https://github.com/apache/datafusion-python/pull/1000) (timsaucer) + +**Fixed bugs:** + +- fix: correct LZ0 to LZO in compression options [#995](https://github.com/apache/datafusion-python/pull/995) (kosiew) + +**Other:** + +- Add arrow cast [#962](https://github.com/apache/datafusion-python/pull/962) (kosiew) +- Fix small issues in pyproject.toml [#976](https://github.com/apache/datafusion-python/pull/976) (kylebarron) +- chore: set validation and type hint for ffi tableprovider [#983](https://github.com/apache/datafusion-python/pull/983) (ion-elgreco) +- Support async iteration of RecordBatchStream [#975](https://github.com/apache/datafusion-python/pull/975) (kylebarron) +- Chore/upgrade datafusion 44 [#973](https://github.com/apache/datafusion-python/pull/973) (timsaucer) +- Default to ZSTD compression when writing Parquet [#981](https://github.com/apache/datafusion-python/pull/981) (kosiew) +- Feat/use uv python management [#994](https://github.com/apache/datafusion-python/pull/994) (timsaucer) +- minor: Update dependencies prior to release [#999](https://github.com/apache/datafusion-python/pull/999) (timsaucer) +- Apply import ordering in ruff check [#1001](https://github.com/apache/datafusion-python/pull/1001) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 5 Tim Saucer + 3 kosiew + 2 Kyle Barron + 1 Chongchen Chen + 1 Ion Koutsouris +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/pyproject.toml b/pyproject.toml index 32bb28d21..f416e02a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ dev = [ "pytest>=7.4.4", "ruff>=0.9.1", "toml>=0.10.2", + "pygithub==2.5.0", ] docs = [ "sphinx>=7.1.2", diff --git a/uv.lock b/uv.lock index 75d9ed018..587ddc8b7 100644 --- a/uv.lock +++ b/uv.lock @@ -139,6 +139,83 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a5/32/8f6669fc4798494966bf446c8c4a162e0b5d893dff088afddf76414f70e1/certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56", size = 164927 }, ] +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/07/f44ca684db4e4f08a3fdc6eeb9a0d15dc6883efc7b8c90357fdbf74e186c/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", size = 182191 }, + { url = "https://files.pythonhosted.org/packages/08/fd/cc2fedbd887223f9f5d170c96e57cbf655df9831a6546c1727ae13fa977a/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", size = 178592 }, + { url = "https://files.pythonhosted.org/packages/de/cc/4635c320081c78d6ffc2cab0a76025b691a91204f4aa317d568ff9280a2d/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", size = 426024 }, + { url = "https://files.pythonhosted.org/packages/b6/7b/3b2b250f3aab91abe5f8a51ada1b717935fdaec53f790ad4100fe2ec64d1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", size = 448188 }, + { url = "https://files.pythonhosted.org/packages/d3/48/1b9283ebbf0ec065148d8de05d647a986c5f22586b18120020452fff8f5d/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", size = 455571 }, + { url = "https://files.pythonhosted.org/packages/40/87/3b8452525437b40f39ca7ff70276679772ee7e8b394934ff60e63b7b090c/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", size = 436687 }, + { url = "https://files.pythonhosted.org/packages/8d/fb/4da72871d177d63649ac449aec2e8a29efe0274035880c7af59101ca2232/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", size = 446211 }, + { url = "https://files.pythonhosted.org/packages/ab/a0/62f00bcb411332106c02b663b26f3545a9ef136f80d5df746c05878f8c4b/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", size = 461325 }, + { url = "https://files.pythonhosted.org/packages/36/83/76127035ed2e7e27b0787604d99da630ac3123bfb02d8e80c633f218a11d/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", size = 438784 }, + { url = "https://files.pythonhosted.org/packages/21/81/a6cd025db2f08ac88b901b745c163d884641909641f9b826e8cb87645942/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", size = 461564 }, + { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804 }, + { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299 }, + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264 }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651 }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259 }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200 }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235 }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721 }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242 }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999 }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242 }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604 }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727 }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400 }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989 }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802 }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792 }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893 }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810 }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200 }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447 }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358 }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, + { url = "https://files.pythonhosted.org/packages/48/08/15bf6b43ae9bd06f6b00ad8a91f5a8fe1069d4c9fab550a866755402724e/cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", size = 182457 }, + { url = "https://files.pythonhosted.org/packages/c2/5b/f1523dd545f92f7df468e5f653ffa4df30ac222f3c884e51e139878f1cb5/cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", size = 425932 }, + { url = "https://files.pythonhosted.org/packages/53/93/7e547ab4105969cc8c93b38a667b82a835dd2cc78f3a7dad6130cfd41e1d/cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", size = 448585 }, + { url = "https://files.pythonhosted.org/packages/56/c4/a308f2c332006206bb511de219efeff090e9d63529ba0a77aae72e82248b/cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", size = 456268 }, + { url = "https://files.pythonhosted.org/packages/ca/5b/b63681518265f2f4060d2b60755c1c77ec89e5e045fc3773b72735ddaad5/cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", size = 436592 }, + { url = "https://files.pythonhosted.org/packages/bb/19/b51af9f4a4faa4a8ac5a0e5d5c2522dcd9703d07fac69da34a36c4d960d3/cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", size = 446512 }, + { url = "https://files.pythonhosted.org/packages/e2/63/2bed8323890cb613bbecda807688a31ed11a7fe7afe31f8faaae0206a9a3/cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", size = 171576 }, + { url = "https://files.pythonhosted.org/packages/2f/70/80c33b044ebc79527447fd4fbc5455d514c3bb840dede4455de97da39b4d/cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", size = 181229 }, + { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220 }, + { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605 }, + { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910 }, + { url = "https://files.pythonhosted.org/packages/42/7a/9d086fab7c66bd7c4d0f27c57a1b6b068ced810afc498cc8c49e0088661c/cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", size = 447200 }, + { url = "https://files.pythonhosted.org/packages/da/63/1785ced118ce92a993b0ec9e0d0ac8dc3e5dbfbcaa81135be56c69cabbb6/cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", size = 454565 }, + { url = "https://files.pythonhosted.org/packages/74/06/90b8a44abf3556599cdec107f7290277ae8901a58f75e6fe8f970cd72418/cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", size = 435635 }, + { url = "https://files.pythonhosted.org/packages/bd/62/a1f468e5708a70b1d86ead5bab5520861d9c7eacce4a885ded9faa7729c3/cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", size = 445218 }, + { url = "https://files.pythonhosted.org/packages/5b/95/b34462f3ccb09c2594aa782d90a90b045de4ff1f70148ee79c69d37a0a5a/cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", size = 460486 }, + { url = "https://files.pythonhosted.org/packages/fc/fc/a1e4bebd8d680febd29cf6c8a40067182b64f00c7d105f8f26b5bc54317b/cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", size = 437911 }, + { url = "https://files.pythonhosted.org/packages/e6/c3/21cab7a6154b6a5ea330ae80de386e7665254835b9e98ecc1340b3a7de9a/cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", size = 460632 }, + { url = "https://files.pythonhosted.org/packages/cb/b5/fd9f8b5a84010ca169ee49f4e4ad6f8c05f4e3545b72ee041dbbcb159882/cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", size = 171820 }, + { url = "https://files.pythonhosted.org/packages/8c/52/b08750ce0bce45c143e1b5d7357ee8c55341b52bdef4b0f081af1eb248c2/cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", size = 181290 }, +] + [[package]] name = "charset-normalizer" version = "3.4.1" @@ -235,9 +312,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "cryptography" +version = "44.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/4c/45dfa6829acffa344e3967d6006ee4ae8be57af746ae2eba1c431949b32c/cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02", size = 710657 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/09/8cc67f9b84730ad330b3b72cf867150744bf07ff113cda21a15a1c6d2c7c/cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123", size = 6541833 }, + { url = "https://files.pythonhosted.org/packages/7e/5b/3759e30a103144e29632e7cb72aec28cedc79e514b2ea8896bb17163c19b/cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092", size = 3922710 }, + { url = "https://files.pythonhosted.org/packages/5f/58/3b14bf39f1a0cfd679e753e8647ada56cddbf5acebffe7db90e184c76168/cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f", size = 4137546 }, + { url = "https://files.pythonhosted.org/packages/98/65/13d9e76ca19b0ba5603d71ac8424b5694415b348e719db277b5edc985ff5/cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb", size = 3915420 }, + { url = "https://files.pythonhosted.org/packages/b1/07/40fe09ce96b91fc9276a9ad272832ead0fddedcba87f1190372af8e3039c/cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b", size = 4154498 }, + { url = "https://files.pythonhosted.org/packages/75/ea/af65619c800ec0a7e4034207aec543acdf248d9bffba0533342d1bd435e1/cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543", size = 3932569 }, + { url = "https://files.pythonhosted.org/packages/c7/af/d1deb0c04d59612e3d5e54203159e284d3e7a6921e565bb0eeb6269bdd8a/cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e", size = 4016721 }, + { url = "https://files.pythonhosted.org/packages/bd/69/7ca326c55698d0688db867795134bdfac87136b80ef373aaa42b225d6dd5/cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e", size = 4240915 }, + { url = "https://files.pythonhosted.org/packages/ef/d4/cae11bf68c0f981e0413906c6dd03ae7fa864347ed5fac40021df1ef467c/cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053", size = 2757925 }, + { url = "https://files.pythonhosted.org/packages/64/b1/50d7739254d2002acae64eed4fc43b24ac0cc44bf0a0d388d1ca06ec5bb1/cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd", size = 3202055 }, + { url = "https://files.pythonhosted.org/packages/11/18/61e52a3d28fc1514a43b0ac291177acd1b4de00e9301aaf7ef867076ff8a/cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591", size = 6542801 }, + { url = "https://files.pythonhosted.org/packages/1a/07/5f165b6c65696ef75601b781a280fc3b33f1e0cd6aa5a92d9fb96c410e97/cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7", size = 3922613 }, + { url = "https://files.pythonhosted.org/packages/28/34/6b3ac1d80fc174812486561cf25194338151780f27e438526f9c64e16869/cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc", size = 4137925 }, + { url = "https://files.pythonhosted.org/packages/d0/c7/c656eb08fd22255d21bc3129625ed9cd5ee305f33752ef2278711b3fa98b/cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289", size = 3915417 }, + { url = "https://files.pythonhosted.org/packages/ef/82/72403624f197af0db6bac4e58153bc9ac0e6020e57234115db9596eee85d/cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7", size = 4155160 }, + { url = "https://files.pythonhosted.org/packages/a2/cd/2f3c440913d4329ade49b146d74f2e9766422e1732613f57097fea61f344/cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c", size = 3932331 }, + { url = "https://files.pythonhosted.org/packages/7f/df/8be88797f0a1cca6e255189a57bb49237402b1880d6e8721690c5603ac23/cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64", size = 4017372 }, + { url = "https://files.pythonhosted.org/packages/af/36/5ccc376f025a834e72b8e52e18746b927f34e4520487098e283a719c205e/cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285", size = 4239657 }, + { url = "https://files.pythonhosted.org/packages/46/b0/f4f7d0d0bcfbc8dd6296c1449be326d04217c57afb8b2594f017eed95533/cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417", size = 2758672 }, + { url = "https://files.pythonhosted.org/packages/97/9b/443270b9210f13f6ef240eff73fd32e02d381e7103969dc66ce8e89ee901/cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede", size = 3202071 }, + { url = "https://files.pythonhosted.org/packages/77/d4/fea74422326388bbac0c37b7489a0fcb1681a698c3b875959430ba550daa/cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731", size = 3338857 }, + { url = "https://files.pythonhosted.org/packages/1a/aa/ba8a7467c206cb7b62f09b4168da541b5109838627f582843bbbe0235e8e/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4", size = 3850615 }, + { url = "https://files.pythonhosted.org/packages/89/fa/b160e10a64cc395d090105be14f399b94e617c879efd401188ce0fea39ee/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756", size = 4081622 }, + { url = "https://files.pythonhosted.org/packages/47/8f/20ff0656bb0cf7af26ec1d01f780c5cfbaa7666736063378c5f48558b515/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c", size = 3867546 }, + { url = "https://files.pythonhosted.org/packages/38/d9/28edf32ee2fcdca587146bcde90102a7319b2f2c690edfa627e46d586050/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa", size = 4090937 }, + { url = "https://files.pythonhosted.org/packages/cc/9d/37e5da7519de7b0b070a3fedd4230fe76d50d2a21403e0f2153d70ac4163/cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c", size = 3128774 }, +] + [[package]] name = "datafusion" -version = "43.0.0" +version = "44.0.0" source = { editable = "." } dependencies = [ { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, @@ -249,6 +363,7 @@ dependencies = [ dev = [ { name = "maturin" }, { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pygithub" }, { name = "pytest" }, { name = "ruff" }, { name = "toml" }, @@ -282,6 +397,7 @@ requires-dist = [ dev = [ { name = "maturin", specifier = ">=1.8.1" }, { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" }, + { name = "pygithub", specifier = "==2.5.0" }, { name = "pytest", specifier = ">=7.4.4" }, { name = "ruff", specifier = ">=0.9.1" }, { name = "toml", specifier = ">=0.10.2" }, @@ -307,6 +423,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, ] +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, +] + [[package]] name = "docutils" version = "0.20.1" @@ -1189,6 +1317,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/d8/94161a7ca5c55199484e926165e9e33f318ea1d1b0d7cdbcbc3652b933ec/pyarrow-18.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052", size = 25301373 }, ] +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + [[package]] name = "pydata-sphinx-theme" version = "0.8.0" @@ -1206,6 +1343,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/26/0694318d46c7d90ab602ae27b24431e939f1600f9a4c69d1e727ec57289f/pydata_sphinx_theme-0.8.0-py3-none-any.whl", hash = "sha256:fbcbb833a07d3ad8dd997dd40dc94da18d98b41c68123ab0182b58fe92271204", size = 3284997 }, ] +[[package]] +name = "pygithub" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "pyjwt", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version < '3.9'" }, + { name = "pyjwt", version = "2.10.1", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version >= '3.9'" }, + { name = "pynacl" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/05/bfbdbbc5d8aafd8dae9b3b6877edca561fccd8528ef5edc4e7b6d23721b5/PyGithub-2.5.0-py3-none-any.whl", hash = "sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2", size = 375935 }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -1215,6 +1371,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pyjwt" +version = "2.9.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/68/ce067f09fca4abeca8771fe667d89cc347d1e99da3e093112ac329c6020e/pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c", size = 78825 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/84/0fdf9b18ba31d69877bd39c9cd6052b47f3761e9910c15de788e519f079f/PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850", size = 22344 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography", marker = "python_full_version < '3.9'" }, +] + +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", + "python_full_version == '3.9.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography", marker = "python_full_version >= '3.9'" }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + [[package]] name = "pytest" version = "8.3.4" @@ -1817,6 +2030,92 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486 }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777 }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314 }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947 }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778 }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716 }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548 }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334 }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427 }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774 }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488 }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776 }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420 }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307 }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025 }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879 }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419 }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773 }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799 }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821 }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919 }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721 }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899 }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222 }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707 }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685 }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567 }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672 }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865 }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/0c/66/95b9e90e6e1274999b183c9c3f984996d870e933ca9560115bd1cd1d6f77/wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9", size = 53234 }, + { url = "https://files.pythonhosted.org/packages/a4/b6/6eced5e2db5924bf6d9223d2bb96b62e00395aae77058e6a9e11bf16b3bd/wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119", size = 38462 }, + { url = "https://files.pythonhosted.org/packages/5d/a4/c8472fe2568978b5532df84273c53ddf713f689d408a4335717ab89547e0/wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6", size = 38730 }, + { url = "https://files.pythonhosted.org/packages/3c/70/1d259c6b1ad164eb23ff70e3e452dd1950f96e6473f72b7207891d0fd1f0/wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9", size = 86225 }, + { url = "https://files.pythonhosted.org/packages/a9/68/6b83367e1afb8de91cbea4ef8e85b58acdf62f034f05d78c7b82afaa23d8/wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a", size = 78055 }, + { url = "https://files.pythonhosted.org/packages/0d/21/09573d2443916705c57fdab85d508f592c0a58d57becc53e15755d67fba2/wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2", size = 85592 }, + { url = "https://files.pythonhosted.org/packages/45/ce/700e17a852dd5dec894e241c72973ea82363486bcc1fb05d47b4fbd1d683/wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a", size = 83906 }, + { url = "https://files.pythonhosted.org/packages/37/14/bd210faf0a66faeb8529d42b6b45a25d6aa6ce25ddfc19168e4161aed227/wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04", size = 76763 }, + { url = "https://files.pythonhosted.org/packages/34/0c/85af70d291f44659c422416f0272046109e785bf6db8c081cfeeae5715c5/wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f", size = 83573 }, + { url = "https://files.pythonhosted.org/packages/f8/1e/b215068e824878f69ea945804fa26c176f7c2735a3ad5367d78930bd076a/wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7", size = 36408 }, + { url = "https://files.pythonhosted.org/packages/52/27/3dd9ad5f1097b33c95d05929e409cc86d7c765cb5437b86694dc8f8e9af0/wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3", size = 38737 }, + { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 }, + { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/cf/cb/7a07b51762dcd59bdbe07aa97f87b3169766cadf240f48d1cbe70a1be9db/wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9", size = 83050 }, + { url = "https://files.pythonhosted.org/packages/a5/51/a42757dd41032afd6d8037617aa3bc6803ba971850733b24dfb7d5c627c4/wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f", size = 74718 }, + { url = "https://files.pythonhosted.org/packages/bf/bb/d552bfe47db02fcfc950fc563073a33500f8108efa5f7b41db2f83a59028/wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b", size = 82590 }, + { url = "https://files.pythonhosted.org/packages/77/99/77b06b3c3c410dbae411105bf22496facf03a5496bfaca8fbcf9da381889/wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f", size = 81462 }, + { url = "https://files.pythonhosted.org/packages/2d/21/cf0bd85ae66f92600829ea1de8e1da778e5e9f6e574ccbe74b66db0d95db/wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8", size = 74309 }, + { url = "https://files.pythonhosted.org/packages/6d/16/112d25e9092398a0dd6fec50ab7ac1b775a0c19b428f049785096067ada9/wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9", size = 81081 }, + { url = "https://files.pythonhosted.org/packages/2b/49/364a615a0cc0872685646c495c7172e4fc7bf1959e3b12a1807a03014e05/wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb", size = 36423 }, + { url = "https://files.pythonhosted.org/packages/00/ad/5d2c1b34ba3202cd833d9221833e74d6500ce66730974993a8dc9a94fb8c/wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb", size = 38772 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +] + [[package]] name = "zipp" version = "3.20.2" From d635d56ecdc0cf2667c01cfcc51f26733ec796dc Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 7 Feb 2025 14:36:54 -0500 Subject: [PATCH 007/206] Chore/upgrade datafusion 45 (#1010) * upgrade dep * resolve errors * match new pyo3 version * upgrade dep * back to 43 * use released v45 * remove unnecessary pyarrow feature * Update unit test return type * fix test_relational_expr --------- Co-authored-by: Tim Saucer --- Cargo.lock | 400 +++++++++++--------- Cargo.toml | 16 +- examples/ffi-table-provider/Cargo.lock | 493 ++++++++++++++----------- examples/ffi-table-provider/Cargo.toml | 14 +- examples/ffi-table-provider/src/lib.rs | 2 +- python/tests/test_expr.py | 3 +- python/tests/test_functions.py | 2 +- src/context.rs | 2 +- src/dataframe.rs | 2 +- 9 files changed, 514 insertions(+), 420 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50809696b..f1b1ed50a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf3437355979f1e93ba84ba108c38be5767713051f3c8ffbf07c094e2e61f9f" +checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" dependencies = [ "arrow-arith", "arrow-array", @@ -201,24 +201,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31dce77d2985522288edae7206bffd5fc4996491841dda01a13a58415867e681" +checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half", "num", ] [[package]] name = "arrow-array" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d45fe6d3faed0435b7313e59a02583b14c6c6339fa7729e94c32a20af319a79" +checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" dependencies = [ "ahash", "arrow-buffer", @@ -233,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b02656a35cc103f28084bc80a0159668e0a680d919cef127bd7e0aaccb06ec1" +checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" dependencies = [ "bytes", "half", @@ -244,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c73c6233c5b5d635a56f6010e6eb1ab9e30e94707db21cea03da317f67d84cf3" +checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,28 +264,25 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec222848d70fea5a32af9c3602b08f5d740d5e2d33fbd76bf6fd88759b5b13a7" +checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" dependencies = [ "arrow-array", - "arrow-buffer", "arrow-cast", - "arrow-data", "arrow-schema", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", "regex", ] [[package]] name = "arrow-data" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f2861ffa86f107b8ab577d86cff7c7a490243eabe961ba1e1af4f27542bb79" +checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" dependencies = [ "arrow-buffer", "arrow-schema", @@ -296,13 +292,12 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0270dc511f11bb5fa98a25020ad51a99ca5b08d8a8dfbd17503bb9dba0388f0b" +checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-schema", "flatbuffers", @@ -311,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eff38eeb8a971ad3a4caf62c5d57f0cff8a48b64a55e3207c4fd696a9234aad" +checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" dependencies = [ "arrow-array", "arrow-buffer", @@ -331,26 +326,23 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6f202a879d287099139ff0d121e7f55ae5e0efe634b8cf2106ebc27a8715dee" +checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half", - "num", ] [[package]] name = "arrow-row" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f936954991c360ba762dff23f5dda16300774fafd722353d9683abd97630ae" +checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" dependencies = [ - "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -360,18 +352,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e" +checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7471ba126d0b0aaa24b50a36bc6c25e4e74869a1fd1a5553357027a0b1c8d1f1" +checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" dependencies = [ "ahash", "arrow-array", @@ -383,9 +375,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72993b01cb62507b06f1fb49648d7286c8989ecfabdb7b77a750fcb54410731b" +checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" dependencies = [ "arrow-array", "arrow-buffer", @@ -444,7 +436,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -455,7 +447,7 @@ checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -598,9 +590,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "bzip2" @@ -635,9 +627,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.11" +version = "1.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" +checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2" dependencies = [ "jobserver", "libc", @@ -874,9 +866,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9" +checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" dependencies = [ "apache-avro", "arrow", @@ -888,7 +880,6 @@ dependencies = [ "bytes", "bzip2 0.5.0", "chrono", - "dashmap", "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", @@ -908,7 +899,7 @@ dependencies = [ "flate2", "futures", "glob", - "itertools", + "itertools 0.14.0", "log", "num-traits", "object_store", @@ -928,31 +919,39 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e" +checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" dependencies = [ - "arrow-schema", + "arrow", "async-trait", + "dashmap", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", "parking_lot", + "sqlparser", ] [[package]] name = "datafusion-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b" +checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" dependencies = [ "ahash", "apache-avro", "arrow", "arrow-array", "arrow-buffer", + "arrow-ipc", "arrow-schema", + "base64 0.22.1", "half", "hashbrown 0.14.5", "indexmap", @@ -969,9 +968,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72fbf14d4079f7ce5306393084fe5057dddfdc2113577e0049310afa12e94281" +checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" dependencies = [ "log", "tokio", @@ -979,15 +978,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3" +checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" [[package]] name = "datafusion-execution" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486" +checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" dependencies = [ "arrow", "dashmap", @@ -1004,9 +1003,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c" +checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" dependencies = [ "arrow", "chrono", @@ -1025,23 +1024,26 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55" +checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" dependencies = [ "arrow", "datafusion-common", - "itertools", + "itertools 0.14.0", + "paste", ] [[package]] name = "datafusion-ffi" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "114e944790756b84c2cc5971eae24f5430980149345601939ac222885d4db5f7" +checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" dependencies = [ "abi_stable", "arrow", + "arrow-array", + "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1049,13 +1051,15 @@ dependencies = [ "futures", "log", "prost", + "semver", + "tokio", ] [[package]] name = "datafusion-functions" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da" +checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" dependencies = [ "arrow", "arrow-buffer", @@ -1071,7 +1075,7 @@ dependencies = [ "datafusion-macros", "hashbrown 0.14.5", "hex", - "itertools", + "itertools 0.14.0", "log", "md-5", "rand", @@ -1083,12 +1087,13 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9" +checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" dependencies = [ "ahash", "arrow", + "arrow-buffer", "arrow-schema", "datafusion-common", "datafusion-doc", @@ -1105,9 +1110,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c" +checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" dependencies = [ "ahash", "arrow", @@ -1118,9 +1123,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5" +checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" dependencies = [ "arrow", "arrow-array", @@ -1128,21 +1133,23 @@ dependencies = [ "arrow-ord", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-macros", "datafusion-physical-expr-common", - "itertools", + "itertools 0.14.0", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1" +checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" dependencies = [ "arrow", "async-trait", @@ -1156,9 +1163,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8" +checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1173,9 +1180,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6" +checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1183,19 +1190,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5de3c8f386ea991696553afe241a326ecbc3c98a12c562867e4be754d3a060c" +checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" dependencies = [ + "datafusion-expr", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "datafusion-optimizer" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75" +checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" dependencies = [ "arrow", "chrono", @@ -1203,7 +1211,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "indexmap", - "itertools", + "itertools 0.14.0", "log", "recursive", "regex", @@ -1212,9 +1220,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd" +checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" dependencies = [ "ahash", "arrow", @@ -1229,48 +1237,54 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.7.1", ] [[package]] name = "datafusion-physical-expr-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5" +checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" dependencies = [ "ahash", "arrow", + "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "itertools", + "itertools 0.14.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351" +checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" dependencies = [ "arrow", + "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools", + "futures", + "itertools 0.14.0", "log", "recursive", + "url", ] [[package]] name = "datafusion-physical-plan" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da" +checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" dependencies = [ "ahash", "arrow", @@ -1291,7 +1305,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", @@ -1300,9 +1314,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e23b0998195e495bfa7b37cdceb317129a6c40522219f6872d2e0c9ae9f4fcb" +checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" dependencies = [ "arrow", "chrono", @@ -1316,9 +1330,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfc59992a29eed2d2c1dd779deac99083b217774ebcf90ee121840607a4d866f" +checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" dependencies = [ "arrow", "datafusion-common", @@ -1350,9 +1364,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc" +checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" dependencies = [ "arrow", "arrow-array", @@ -1369,16 +1383,16 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "44.0.0" +version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ec36dd38512b1ecc7a3bb92e72046b944611b2f0d709445c1e51b0143bffd4" +checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944" dependencies = [ "arrow-buffer", "async-recursion", "async-trait", "chrono", "datafusion", - "itertools", + "itertools 0.14.0", "object_store", "pbjson-types", "prost", @@ -1405,7 +1419,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1448,6 +1462,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flatbuffers" version = "24.12.23" @@ -1545,7 +1565,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1938,7 +1958,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1999,6 +2019,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.14" @@ -2362,7 +2391,7 @@ dependencies = [ "httparse", "humantime", "hyper", - "itertools", + "itertools 0.13.0", "md-5", "parking_lot", "percent-encoding", @@ -2382,9 +2411,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" [[package]] name = "openssl-probe" @@ -2426,9 +2455,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.4.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8957c0c95a6a1804f3e51a18f69df29be53856a8c5768cc9b6d00fcafcd2917c" +checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" dependencies = [ "ahash", "arrow-array", @@ -2452,6 +2481,7 @@ dependencies = [ "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", @@ -2492,7 +2522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ "heck", - "itertools", + "itertools 0.13.0", "prost", "prost-types", ] @@ -2524,7 +2554,17 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset", + "fixedbitset 0.4.2", + "indexmap", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset 0.5.7", "indexmap", ] @@ -2606,7 +2646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2635,16 +2675,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" dependencies = [ "heck", - "itertools", + "itertools 0.13.0", "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.6.5", "prettyplease", "prost", "prost-types", "regex", - "syn 2.0.96", + "syn 2.0.98", "tempfile", ] @@ -2655,10 +2695,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2690,9 +2730,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" dependencies = [ "cfg-if", "indoc", @@ -2708,9 +2748,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes" -version = "0.22.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2529f0be73ffd2be0cc43c013a640796558aa12d7ca0aab5cc14f375b4733031" +checksum = "977dc837525cfd22919ba6a831413854beb7c99a256c03bf8624ad707e45810e" dependencies = [ "futures", "once_cell", @@ -2721,9 +2761,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" dependencies = [ "once_cell", "target-lexicon", @@ -2731,9 +2771,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" dependencies = [ "libc", "pyo3-build-config", @@ -2741,27 +2781,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2888,7 +2928,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3029,9 +3069,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustc_version" @@ -3161,7 +3201,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3234,7 +3274,7 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3245,7 +3285,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3269,7 +3309,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3314,6 +3354,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "1.0.1" @@ -3353,7 +3399,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3396,7 +3442,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3440,14 +3486,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "substrait" -version = "0.50.4" +version = "0.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1772d041c37cc7e6477733c76b2acf4ee36bd52b2ae4d9ea0ec9c87d003db32" +checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756" dependencies = [ "heck", "pbjson", @@ -3464,7 +3510,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.96", + "syn 2.0.98", "typify", "walkdir", ] @@ -3488,9 +3534,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.96" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -3514,7 +3560,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3563,7 +3609,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3574,7 +3620,7 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3646,7 +3692,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3718,7 +3764,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3784,7 +3830,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3795,9 +3841,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typify" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c644dda9862f0fef3a570d8ddb3c2cfb1d5ac824a1f2ddfa7bc8f071a5ad8a" +checksum = "e03ba3643450cfd95a1aca2e1938fef63c1c1994489337998aff4ad771f21ef8" dependencies = [ "typify-impl", "typify-macro", @@ -3805,9 +3851,9 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ab345b6c0d8ae9500b9ff334a4c7c0d316c1c628dc55726b95887eb8dbd11" +checksum = "bce48219a2f3154aaa2c56cbf027728b24a3c8fe0a47ed6399781de2b3f3eeaf" dependencies = [ "heck", "log", @@ -3818,16 +3864,16 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.96", - "thiserror 1.0.69", + "syn 2.0.98", + "thiserror 2.0.11", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "785e2cdcef0df8160fdd762ed548a637aaec1e83704fdbc14da0df66013ee8d0" +checksum = "68b5780d745920ed73c5b7447496a9b5c42ed2681a9b70859377aec423ecf02b" dependencies = [ "proc-macro2", "quote", @@ -3836,7 +3882,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.96", + "syn 2.0.98", "typify-impl", ] @@ -3901,11 +3947,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.12.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" +checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.3.1", "serde", ] @@ -3971,7 +4017,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "wasm-bindgen-shared", ] @@ -4006,7 +4052,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4255,7 +4301,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "synstructure", ] @@ -4277,7 +4323,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4297,7 +4343,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "synstructure", ] @@ -4326,7 +4372,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 44e6e2244..d18e0e8f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,13 +35,13 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } -pyo3 = { version = "0.22", features = ["extension-module", "abi3", "abi3-py38"] } -pyo3-async-runtimes = { version = "0.22", features = ["tokio-runtime"]} -arrow = { version = "53", features = ["pyarrow"] } -datafusion = { version = "44.0.0", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "44.0.0", optional = true } -datafusion-proto = { version = "44.0.0" } -datafusion-ffi = { version = "44.0.0" } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} +arrow = { version = "54", features = ["pyarrow"] } +datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "45.0.0", optional = true } +datafusion-proto = { version = "45.0.0" } +datafusion-ffi = { version = "45.0.0" } prost = "0.13" # keep in line with `datafusion-substrait` uuid = { version = "1.12", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } @@ -52,7 +52,7 @@ url = "2" [build-dependencies] prost-types = "0.13" # keep in line with `datafusion-substrait` -pyo3-build-config = "0.22" +pyo3-build-config = "0.23" [lib] name = "datafusion_python" diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock index 3b57cac75..32af85180 100644 --- a/examples/ffi-table-provider/Cargo.lock +++ b/examples/ffi-table-provider/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "abi_stable" @@ -144,9 +144,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caf25cdc4a985f91df42ed9e9308e1adbcd341a31a72605c697033fcef163e3" +checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" dependencies = [ "arrow-arith", "arrow-array", @@ -165,24 +165,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91f2dfd1a7ec0aca967dfaa616096aec49779adc8eccec005e2f5e4111b1192a" +checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "half", "num", ] [[package]] name = "arrow-array" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" +checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" dependencies = [ "ahash", "arrow-buffer", @@ -191,15 +190,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "num", ] [[package]] name = "arrow-buffer" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" +checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" dependencies = [ "bytes", "half", @@ -208,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" +checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" dependencies = [ "arrow-array", "arrow-buffer", @@ -229,28 +228,25 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07b5232be87d115fde73e32f2ca7f1b353bff1b44ac422d3c6fc6ae38f11f0d" +checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" dependencies = [ "arrow-array", - "arrow-buffer", "arrow-cast", - "arrow-data", "arrow-schema", "chrono", "csv", "csv-core", "lazy_static", - "lexical-core", "regex", ] [[package]] name = "arrow-data" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" +checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" dependencies = [ "arrow-buffer", "arrow-schema", @@ -260,13 +256,12 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" +checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-schema", "flatbuffers", @@ -275,9 +270,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0471f51260a5309307e5d409c9dc70aede1cd9cf1d4ff0f0a1e8e1a2dd0e0d3c" +checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" dependencies = [ "arrow-array", "arrow-buffer", @@ -295,26 +290,23 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2883d7035e0b600fb4c30ce1e50e66e53d8656aa729f2bfa4b51d359cf3ded52" +checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "half", - "num", ] [[package]] name = "arrow-row" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552907e8e587a6fde4f8843fd7a27a576a260f65dab6c065741ea79f633fc5be" +checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" dependencies = [ - "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -324,18 +316,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" +checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" +checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" dependencies = [ "ahash", "arrow-array", @@ -347,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3179ccbd18ebf04277a095ba7321b93fd1f774f18816bd5f6b3ce2f594edb6c" +checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" dependencies = [ "arrow-array", "arrow-buffer", @@ -380,10 +372,9 @@ version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ - "bzip2", + "bzip2 0.4.4", "flate2", "futures-core", - "futures-io", "memchr", "pin-project-lite", "tokio", @@ -448,6 +439,19 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bigdecimal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -540,6 +544,16 @@ dependencies = [ "libc", ] +[[package]] +name = "bzip2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -751,11 +765,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", @@ -763,9 +775,8 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.5.0", "chrono", - "dashmap", "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", @@ -774,6 +785,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -784,18 +796,13 @@ dependencies = [ "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", - "itertools", + "itertools 0.14.0", "log", - "num_cpus", "object_store", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", + "regex", "sqlparser", "tempfile", "tokio", @@ -808,67 +815,74 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ - "arrow-schema", + "arrow", "async-trait", + "dashmap", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", "parking_lot", + "sqlparser", ] [[package]] name = "datafusion-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", + "arrow-ipc", "arrow-schema", - "chrono", + "base64", "half", "hashbrown 0.14.5", "indexmap", - "instant", "libc", - "num_cpus", + "log", "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" + [[package]] name = "datafusion-execution" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store", "parking_lot", @@ -879,63 +893,59 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", - "strum", - "strum_macros", ] [[package]] name = "datafusion-expr-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "datafusion-common", - "itertools", + "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e923c459b53a26d92a8806d1f6a37fdf48bde51507a39eaed6f42a60f2bfd160" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "abi_stable", "arrow", + "arrow-array", + "arrow-schema", "async-ffi", "async-trait", "datafusion", "datafusion-proto", - "doc-comment", "futures", "log", "prost", + "semver", + "tokio", ] [[package]] name = "datafusion-functions" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-buffer", @@ -944,11 +954,14 @@ dependencies = [ "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", "hashbrown 0.14.5", "hex", - "itertools", + "itertools 0.14.0", "log", "md-5", "rand", @@ -960,44 +973,42 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", + "arrow-buffer", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap", "log", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-array", @@ -1005,26 +1016,43 @@ dependencies = [ "arrow-ord", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-macros", "datafusion-physical-expr-common", - "itertools", + "itertools 0.14.0", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", "datafusion-functions-window-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "log", @@ -1033,48 +1061,51 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.87", +] + [[package]] name = "datafusion-optimizer" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", - "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1083,7 +1114,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "paste", "petgraph", @@ -1091,39 +1122,43 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", + "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.14.0", ] [[package]] name = "datafusion-physical-optimizer" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", + "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools", + "futures", + "itertools 0.14.0", + "log", + "recursive", + "url", ] [[package]] name = "datafusion-physical-plan" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "ahash", "arrow", @@ -1137,7 +1172,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1145,20 +1179,17 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-proto" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f730f7fc5a20134d4e5ecdf7bbf392002ac58163d58423ea28a702dc077b06e1" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "chrono", @@ -1172,33 +1203,30 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12c225fe49e4f943e35446b263613ada7a9e9f8d647544e6b07037b9803567df" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", - "chrono", "datafusion-common", - "object_store", "prost", ] [[package]] name = "datafusion-sql" -version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +version = "45.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" dependencies = [ "arrow", "arrow-array", "arrow-schema", + "bigdecimal", "datafusion-common", "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", - "strum", ] [[package]] @@ -1223,12 +1251,6 @@ dependencies = [ "syn 2.0.87", ] -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "either" version = "1.13.0" @@ -1272,15 +1294,15 @@ dependencies = [ [[package]] name = "fixedbitset" -version = "0.4.2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "24.3.25" +version = "24.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1469,12 +1491,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hex" version = "0.4.3" @@ -1651,9 +1667,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.1", @@ -1665,18 +1681,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -1692,6 +1696,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -1964,16 +1977,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.36.5" @@ -1994,7 +1997,7 @@ dependencies = [ "chrono", "futures", "humantime", - "itertools", + "itertools 0.13.0", "parking_lot", "percent-encoding", "snafu", @@ -2044,9 +2047,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.2.0" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" +checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" dependencies = [ "ahash", "arrow-array", @@ -2063,13 +2066,14 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "lz4_flex", "num", "num-bigint", "object_store", "paste", "seq-macro", + "simdutf8", "snap", "thrift", "tokio", @@ -2101,9 +2105,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.6.5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", "indexmap", @@ -2206,17 +2210,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.87", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" dependencies = [ "cfg-if", "indoc", @@ -2232,9 +2245,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" dependencies = [ "once_cell", "target-lexicon", @@ -2242,9 +2255,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" dependencies = [ "libc", "pyo3-build-config", @@ -2252,9 +2265,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2264,9 +2277,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" dependencies = [ "heck", "proc-macro2", @@ -2314,6 +2327,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.87", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -2418,9 +2451,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.23" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] name = "seq-macro" @@ -2477,6 +2510,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "0.3.11" @@ -2527,9 +2566,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.51.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" dependencies = [ "log", "sqlparser_derive", @@ -2537,9 +2576,9 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", @@ -2552,6 +2591,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -2563,9 +2615,6 @@ name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros", -] [[package]] name = "strum_macros" @@ -2798,9 +2847,9 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.5.3" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -2906,10 +2955,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] -name = "web-sys" -version = "0.3.72" +name = "web-time" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml index 4e6f91f33..0e558fdd0 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-table-provider/Cargo.toml @@ -21,15 +21,15 @@ version = "0.1.0" edition = "2021" [dependencies] -datafusion = { version = "44.0.0" } -datafusion-ffi = { version = "44.0.0" } -pyo3 = { version = "0.22.6", features = ["extension-module", "abi3", "abi3-py38"] } -arrow = { version = "53.2.0" } -arrow-array = { version = "53.2.0" } -arrow-schema = { version = "53.2.0" } +datafusion = { version = "45.0.0" } +datafusion-ffi = { version = "45.0.0" } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +arrow = { version = "54" } +arrow-array = { version = "54" } +arrow-schema = { version = "54" } [build-dependencies] -pyo3-build-config = "0.22.6" +pyo3-build-config = "0.23" [lib] name = "ffi_table_provider" diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/ffi-table-provider/src/lib.rs index 473244d88..88deeece2 100644 --- a/examples/ffi-table-provider/src/lib.rs +++ b/examples/ffi-table-provider/src/lib.rs @@ -102,7 +102,7 @@ impl MyTableProvider { let provider = self .create_table() .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; - let provider = FFI_TableProvider::new(Arc::new(provider), false); + let provider = FFI_TableProvider::new(Arc::new(provider), false, None); PyCapsule::new_bound(py, provider, Some(name.clone())) } diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 77f88aa44..354c7e180 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -148,8 +148,7 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") == "beta").count() == 1 assert df.filter(col("b") != "beta").count() == 2 - with pytest.raises(Exception): - df.filter(col("a") == "beta").count() + assert df.filter(col("a") == "beta").count() == 0 def test_expr_to_variant(): diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index ad6aa7c0a..796b1f76e 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -732,7 +732,7 @@ def test_array_function_obj_tests(stmt, py_expr): ), ( f.regexp_match(column("a"), literal("(ell|orl)")), - pa.array([["ell"], ["orl"], None]), + pa.array([["ell"], ["orl"], None], type=pa.list_(pa.string_view())), ), ( f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), diff --git a/src/context.rs b/src/context.rs index f53b15576..ebe7db230 100644 --- a/src/context.rs +++ b/src/context.rs @@ -48,7 +48,7 @@ use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog_common::TableReference; +use datafusion::common::TableReference; use datafusion::common::{exec_err, ScalarValue}; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; use datafusion::datasource::file_format::parquet::ParquetFormat; diff --git a/src/dataframe.rs b/src/dataframe.rs index 6fb08ba25..13d7ae838 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -546,7 +546,7 @@ impl PyDataFrame { /// Collect the batches and pass to Arrow Table fn to_arrow_table(&self, py: Python<'_>) -> PyResult { let batches = self.collect(py)?.to_object(py); - let schema: PyObject = self.schema().into_py(py); + let schema: PyObject = self.schema().into_pyobject(py)?.to_object(py); // Instantiate pyarrow Table object and use its from_batches method let table_class = py.import_bound("pyarrow")?.getattr("Table")?; From 40a61c150adee6beb9961302fece81c33639082e Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Sun, 16 Feb 2025 02:31:00 +0800 Subject: [PATCH 008/206] add to_timestamp_nanos (#1020) --- python/datafusion/functions.py | 1 + python/tests/test_functions.py | 4 ++++ src/functions.rs | 2 ++ 3 files changed, 7 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 7c2fa9a8f..5c260aade 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -252,6 +252,7 @@ "to_hex", "to_timestamp", "to_timestamp_micros", + "to_timestamp_nanos", "to_timestamp_millis", "to_timestamp_seconds", "to_unixtime", diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 796b1f76e..b1a739b49 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -871,6 +871,7 @@ def test_temporal_functions(df): f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), f.extract(literal("day"), column("d")), + f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")), ) result = df.collect() assert len(result) == 1 @@ -909,6 +910,9 @@ def test_temporal_functions(df): [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) assert result.column(10) == pa.array([31, 26, 2], type=pa.int32()) + assert result.column(11) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) def test_arrow_cast(df): diff --git a/src/functions.rs b/src/functions.rs index 46c748cf8..6a8abb18d 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -553,6 +553,7 @@ expr_fn!( expr_fn!(now); expr_fn_vec!(to_timestamp); expr_fn_vec!(to_timestamp_millis); +expr_fn_vec!(to_timestamp_nanos); expr_fn_vec!(to_timestamp_micros); expr_fn_vec!(to_timestamp_seconds); expr_fn_vec!(to_unixtime); @@ -977,6 +978,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(to_hex))?; m.add_wrapped(wrap_pyfunction!(to_timestamp))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?; + m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_micros))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_seconds))?; m.add_wrapped(wrap_pyfunction!(to_unixtime))?; From 3584bec8900bcfb33bcae4b85a3c47a46b82c72e Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Wed, 19 Feb 2025 20:50:31 -0500 Subject: [PATCH 009/206] [infra] Fail Clippy on rust build warnings (#1029) * pyo3 update required changes to deprecated interfaces * Substrait feature clippy updates * PyTuple was called twice * add -D warnings option --------- Co-authored-by: Tim Saucer --- .github/workflows/test.yaml | 2 +- .pre-commit-config.yaml | 2 +- src/config.rs | 10 +++--- src/context.rs | 12 +++---- src/dataframe.rs | 17 +++++---- src/dataset.rs | 2 +- src/dataset_exec.rs | 8 ++--- src/errors.rs | 4 +++ src/expr.rs | 61 ++++++++++++++++---------------- src/expr/aggregate.rs | 6 ++-- src/expr/analyze.rs | 6 ++-- src/expr/create_memory_table.rs | 6 ++-- src/expr/create_view.rs | 6 ++-- src/expr/distinct.rs | 6 ++-- src/expr/drop_table.rs | 6 ++-- src/expr/empty_relation.rs | 6 ++-- src/expr/explain.rs | 6 ++-- src/expr/extension.rs | 6 ++-- src/expr/filter.rs | 6 ++-- src/expr/join.rs | 6 ++-- src/expr/limit.rs | 6 ++-- src/expr/literal.rs | 6 ++-- src/expr/logical_node.rs | 4 +-- src/expr/projection.rs | 6 ++-- src/expr/repartition.rs | 6 ++-- src/expr/sort.rs | 6 ++-- src/expr/subquery.rs | 6 ++-- src/expr/subquery_alias.rs | 6 ++-- src/expr/table_scan.rs | 6 ++-- src/expr/union.rs | 6 ++-- src/expr/unnest.rs | 6 ++-- src/expr/window.rs | 6 ++-- src/lib.rs | 10 +++--- src/physical_plan.rs | 2 +- src/pyarrow_filter_expression.rs | 36 ++++++++++--------- src/pyarrow_util.rs | 4 +-- src/sql/logical.rs | 4 +-- src/substrait.rs | 4 +-- src/udaf.rs | 5 +-- src/udf.rs | 5 +-- src/udwf.rs | 44 +++++++++++------------ 41 files changed, 188 insertions(+), 180 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c93d4c06f..c1d9ac838 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -71,7 +71,7 @@ jobs: - name: Run Clippy if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} - run: cargo clippy --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure + run: cargo clippy --all-targets --all-features -- -D clippy::all -D warnings -A clippy::redundant_closure - name: Install dependencies and build uses: astral-sh/setup-uv@v5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e20fedf5c..b548ff18f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: - id: rust-clippy name: Rust clippy description: Run cargo clippy on files included in the commit. clippy should be installed before-hand. - entry: cargo clippy --all-targets --all-features -- -Dclippy::all -Aclippy::redundant_closure + entry: cargo clippy --all-targets --all-features -- -Dclippy::all -D warnings -Aclippy::redundant_closure pass_filenames: false types: [file, rust] language: system diff --git a/src/config.rs b/src/config.rs index cc725b9a3..667d5c590 100644 --- a/src/config.rs +++ b/src/config.rs @@ -47,14 +47,14 @@ impl PyConfig { } /// Get a configuration option - pub fn get(&mut self, key: &str, py: Python) -> PyResult { + pub fn get<'py>(&mut self, key: &str, py: Python<'py>) -> PyResult> { let options = self.config.to_owned(); for entry in options.entries() { if entry.key == key { - return Ok(entry.value.into_py(py)); + return Ok(entry.value.into_pyobject(py)?); } } - Ok(None::.into_py(py)) + Ok(None::.into_pyobject(py)?) } /// Set a configuration option @@ -66,10 +66,10 @@ impl PyConfig { /// Get all configuration options pub fn get_all(&mut self, py: Python) -> PyResult { - let dict = PyDict::new_bound(py); + let dict = PyDict::new(py); let options = self.config.to_owned(); for entry in options.entries() { - dict.set_item(entry.key, entry.value.clone().into_py(py))?; + dict.set_item(entry.key, entry.value.clone().into_pyobject(py)?)?; } Ok(dict.into()) } diff --git a/src/context.rs b/src/context.rs index ebe7db230..0f962638e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -458,8 +458,8 @@ impl PySessionContext { let py = data.py(); // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[data]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[data])?; let table = table_class.call_method1("from_pylist", args)?; // Convert Arrow Table to datafusion DataFrame @@ -478,8 +478,8 @@ impl PySessionContext { let py = data.py(); // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[data]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[data])?; let table = table_class.call_method1("from_pydict", args)?; // Convert Arrow Table to datafusion DataFrame @@ -533,8 +533,8 @@ impl PySessionContext { let py = data.py(); // Instantiate pyarrow Table object & convert to Arrow Table - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[data]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[data])?; let table = table_class.call_method1("from_pandas", args)?; // Convert Arrow Table to datafusion DataFrame diff --git a/src/dataframe.rs b/src/dataframe.rs index 13d7ae838..ed9578a71 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -545,12 +545,12 @@ impl PyDataFrame { /// Convert to Arrow Table /// Collect the batches and pass to Arrow Table fn to_arrow_table(&self, py: Python<'_>) -> PyResult { - let batches = self.collect(py)?.to_object(py); - let schema: PyObject = self.schema().into_pyobject(py)?.to_object(py); + let batches = self.collect(py)?.into_pyobject(py)?; + let schema = self.schema().into_pyobject(py)?; // Instantiate pyarrow Table object and use its from_batches method - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; - let args = PyTuple::new_bound(py, &[batches, schema]); + let table_class = py.import("pyarrow")?.getattr("Table")?; + let args = PyTuple::new(py, &[batches, schema])?; let table: PyObject = table_class.call_method1("from_batches", args)?.into(); Ok(table) } @@ -585,8 +585,7 @@ impl PyDataFrame { let ffi_stream = FFI_ArrowArrayStream::new(reader); let stream_capsule_name = CString::new("arrow_array_stream").unwrap(); - PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name)) - .map_err(PyDataFusionError::from) + PyCapsule::new(py, ffi_stream, Some(stream_capsule_name)).map_err(PyDataFusionError::from) } fn execute_stream(&self, py: Python) -> PyDataFusionResult { @@ -649,8 +648,8 @@ impl PyDataFrame { /// Collect the batches, pass to Arrow Table & then convert to polars DataFrame fn to_polars(&self, py: Python<'_>) -> PyResult { let table = self.to_arrow_table(py)?; - let dataframe = py.import_bound("polars")?.getattr("DataFrame")?; - let args = PyTuple::new_bound(py, &[table]); + let dataframe = py.import("polars")?.getattr("DataFrame")?; + let args = PyTuple::new(py, &[table])?; let result: PyObject = dataframe.call1(args)?.into(); Ok(result) } @@ -673,7 +672,7 @@ fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> { // Import the Python 'builtins' module to access the print function // Note that println! does not print to the Python debug console and is not visible in notebooks for instance - let print = py.import_bound("builtins")?.getattr("print")?; + let print = py.import("builtins")?.getattr("print")?; print.call1((result,))?; Ok(()) } diff --git a/src/dataset.rs b/src/dataset.rs index a8fa21ec5..0baf4da2a 100644 --- a/src/dataset.rs +++ b/src/dataset.rs @@ -48,7 +48,7 @@ impl Dataset { // Creates a Python PyArrow.Dataset pub fn new(dataset: &Bound<'_, PyAny>, py: Python) -> PyResult { // Ensure that we were passed an instance of pyarrow.dataset.Dataset - let ds = PyModule::import_bound(py, "pyarrow.dataset")?; + let ds = PyModule::import(py, "pyarrow.dataset")?; let ds_attr = ds.getattr("Dataset")?; let ds_type = ds_attr.downcast::()?; if dataset.is_instance(ds_type)? { diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index ace42115b..445e4fe74 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -104,7 +104,7 @@ impl DatasetExec { }) .transpose()?; - let kwargs = PyDict::new_bound(py); + let kwargs = PyDict::new(py); kwargs.set_item("columns", columns.clone())?; kwargs.set_item( @@ -121,7 +121,7 @@ impl DatasetExec { .0, ); - let builtins = Python::import_bound(py, "builtins")?; + let builtins = Python::import(py, "builtins")?; let pylist = builtins.getattr("list")?; // Get the fragments or partitions of the dataset @@ -198,7 +198,7 @@ impl ExecutionPlan for DatasetExec { let dataset_schema = dataset .getattr("schema") .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; - let kwargs = PyDict::new_bound(py); + let kwargs = PyDict::new(py); kwargs .set_item("columns", self.columns.clone()) .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; @@ -223,7 +223,7 @@ impl ExecutionPlan for DatasetExec { let record_batches: Bound<'_, PyIterator> = scanner .call_method0("to_batches") .map_err(|err| InnerDataFusionError::External(Box::new(err)))? - .iter() + .try_iter() .map_err(|err| InnerDataFusionError::External(Box::new(err)))?; let record_batches = PyArrowBatchesAdapter { diff --git a/src/errors.rs b/src/errors.rs index b02b754a2..f1d5aeb23 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -91,3 +91,7 @@ pub fn py_datafusion_err(e: impl Debug) -> PyErr { pub fn py_unsupported_variant_err(e: impl Debug) -> PyErr { PyErr::new::(format!("{e:?}")) } + +pub fn to_datafusion_err(e: impl Debug) -> InnerDataFusionError { + InnerDataFusionError::Execution(format!("{e:?}")) +} diff --git a/src/expr.rs b/src/expr.rs index 1e9983d42..e750be6a4 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -19,6 +19,7 @@ use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, }; +use pyo3::IntoPyObjectExt; use pyo3::{basic::CompareOp, prelude::*}; use std::convert::{From, Into}; use std::sync::Arc; @@ -126,35 +127,35 @@ pub fn py_expr_list(expr: &[Expr]) -> PyResult> { #[pymethods] impl PyExpr { /// Return the specific expression - fn to_variant(&self, py: Python) -> PyResult { + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { Python::with_gil(|_| { match &self.expr { - Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_py(py)), - Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_py(py)), + Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_bound_py_any(py)?), + Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_bound_py_any(py)?), Expr::ScalarVariable(data_type, variables) => { - Ok(PyScalarVariable::new(data_type, variables).into_py(py)) + Ok(PyScalarVariable::new(data_type, variables).into_bound_py_any(py)?) } - Expr::Like(value) => Ok(PyLike::from(value.clone()).into_py(py)), - Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_py(py)), - Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_py(py)), - Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_py(py)), - Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_py(py)), - Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_py(py)), - Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_py(py)), - Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_py(py)), - Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_py(py)), - Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_py(py)), - Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_py(py)), - Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_py(py)), - Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_py(py)), + Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?), + Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_bound_py_any(py)?), + Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?), + Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_bound_py_any(py)?), + Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_bound_py_any(py)?), + Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_bound_py_any(py)?), Expr::AggregateFunction(expr) => { - Ok(PyAggregateFunction::from(expr.clone()).into_py(py)) + Ok(PyAggregateFunction::from(expr.clone()).into_bound_py_any(py)?) } - Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_py(py)), - Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_py(py)), - Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_py(py)), - Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), - Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), + Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_bound_py_any(py)?), + Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_bound_py_any(py)?), + Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_bound_py_any(py)?), + Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?), + Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?), Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}", value @@ -163,29 +164,29 @@ impl PyExpr { "Converting Expr::WindowFunction to a Python object is not implemented: {:?}", value ))), - Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), - Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)), + Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?), + Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?), Expr::InSubquery(value) => { - Ok(in_subquery::PyInSubquery::from(value.clone()).into_py(py)) + Ok(in_subquery::PyInSubquery::from(value.clone()).into_bound_py_any(py)?) } Expr::ScalarSubquery(value) => { - Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) + Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?) } Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", qualifier, options ))), Expr::GroupingSet(value) => { - Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)) + Ok(grouping_set::PyGroupingSet::from(value.clone()).into_bound_py_any(py)?) } Expr::Placeholder(value) => { - Ok(placeholder::PyPlaceholder::from(value.clone()).into_py(py)) + Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?) } Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!( "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {:?} - {:?}", data_type, column ))), - Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_py(py)), + Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?), } }) } diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 389bfb332..8fc9da5b0 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -19,7 +19,7 @@ use datafusion::common::DataFusionError; use datafusion::logical_expr::expr::{AggregateFunction, Alias}; use datafusion::logical_expr::logical_plan::Aggregate; use datafusion::logical_expr::Expr; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; @@ -151,7 +151,7 @@ impl LogicalNode for PyAggregate { vec![PyLogicalPlan::from((*self.aggregate.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs index 084513971..62f93cd26 100644 --- a/src/expr/analyze.rs +++ b/src/expr/analyze.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Analyze; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; @@ -78,7 +78,7 @@ impl LogicalNode for PyAnalyze { vec![PyLogicalPlan::from((*self.analyze.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs index 01ebb66b0..8872b2d47 100644 --- a/src/expr/create_memory_table.rs +++ b/src/expr/create_memory_table.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::CreateMemoryTable; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -91,7 +91,7 @@ impl LogicalNode for PyCreateMemoryTable { vec![PyLogicalPlan::from((*self.create.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index d119f5c21..87bb76876 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::{CreateView, DdlStatement, LogicalPlan}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{errors::py_type_err, sql::logical::PyLogicalPlan}; @@ -88,8 +88,8 @@ impl LogicalNode for PyCreateView { vec![PyLogicalPlan::from((*self.create.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs index 061ab4824..b62b776f8 100644 --- a/src/expr/distinct.rs +++ b/src/expr/distinct.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::Distinct; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -89,7 +89,7 @@ impl LogicalNode for PyDistinct { } } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/drop_table.rs b/src/expr/drop_table.rs index 330156abe..96983c1cf 100644 --- a/src/expr/drop_table.rs +++ b/src/expr/drop_table.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::logical_plan::DropTable; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -83,7 +83,7 @@ impl LogicalNode for PyDropTable { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs index ce7163466..a1534ac15 100644 --- a/src/expr/empty_relation.rs +++ b/src/expr/empty_relation.rs @@ -17,7 +17,7 @@ use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; use datafusion::logical_expr::EmptyRelation; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use super::logical_node::LogicalNode; @@ -79,7 +79,7 @@ impl LogicalNode for PyEmptyRelation { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/explain.rs b/src/expr/explain.rs index 8e7fb8843..fc02fe2b5 100644 --- a/src/expr/explain.rs +++ b/src/expr/explain.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::{logical_plan::Explain, LogicalPlan}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{common::df_schema::PyDFSchema, errors::py_type_err, sql::logical::PyLogicalPlan}; @@ -104,7 +104,7 @@ impl LogicalNode for PyExplain { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/extension.rs b/src/expr/extension.rs index a29802b0b..1e3fbb199 100644 --- a/src/expr/extension.rs +++ b/src/expr/extension.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::Extension; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -46,7 +46,7 @@ impl LogicalNode for PyExtension { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/filter.rs b/src/expr/filter.rs index a6d8aa7ee..9bdb667cd 100644 --- a/src/expr/filter.rs +++ b/src/expr/filter.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Filter; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -81,7 +81,7 @@ impl LogicalNode for PyFilter { vec![PyLogicalPlan::from((*self.filter.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/join.rs b/src/expr/join.rs index 66e677f8a..76ec532e7 100644 --- a/src/expr/join.rs +++ b/src/expr/join.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::{Join, JoinConstraint, JoinType}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -193,7 +193,7 @@ impl LogicalNode for PyJoin { ] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/limit.rs b/src/expr/limit.rs index 84ad7d68b..c2a33ff89 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Limit; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -90,7 +90,7 @@ impl LogicalNode for PyLimit { vec![PyLogicalPlan::from((*self.limit.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/literal.rs b/src/expr/literal.rs index 2cb2079f1..a660ac914 100644 --- a/src/expr/literal.rs +++ b/src/expr/literal.rs @@ -17,7 +17,7 @@ use crate::errors::PyDataFusionError; use datafusion::common::ScalarValue; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; #[pyclass(name = "Literal", module = "datafusion.expr", subclass)] #[derive(Clone)] @@ -144,8 +144,8 @@ impl PyLiteral { } #[allow(clippy::wrong_self_convention)] - fn into_type(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn into_type<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } fn __repr__(&self) -> PyResult { diff --git a/src/expr/logical_node.rs b/src/expr/logical_node.rs index 757e4f94b..5aff70059 100644 --- a/src/expr/logical_node.rs +++ b/src/expr/logical_node.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use pyo3::{PyObject, PyResult, Python}; +use pyo3::{Bound, PyAny, PyResult, Python}; use crate::sql::logical::PyLogicalPlan; @@ -25,5 +25,5 @@ pub trait LogicalNode { /// The input plan to the current logical node instance. fn inputs(&self) -> Vec; - fn to_variant(&self, py: Python) -> PyResult; + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult>; } diff --git a/src/expr/projection.rs b/src/expr/projection.rs index 36534fdb2..dc7e5e3c1 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -17,7 +17,7 @@ use datafusion::logical_expr::logical_plan::Projection; use datafusion::logical_expr::Expr; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -113,7 +113,7 @@ impl LogicalNode for PyProjection { vec![PyLogicalPlan::from((*self.projection.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/repartition.rs b/src/expr/repartition.rs index 4e680e181..3e782d6af 100644 --- a/src/expr/repartition.rs +++ b/src/expr/repartition.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::{logical_plan::Repartition, Expr, Partitioning}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{errors::py_type_err, sql::logical::PyLogicalPlan}; @@ -121,7 +121,7 @@ impl LogicalNode for PyRepartition { vec![PyLogicalPlan::from((*self.repartition.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/sort.rs b/src/expr/sort.rs index a1803ccaf..ed4947591 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -17,7 +17,7 @@ use datafusion::common::DataFusionError; use datafusion::logical_expr::logical_plan::Sort; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -96,7 +96,7 @@ impl LogicalNode for PySort { vec![PyLogicalPlan::from((*self.sort.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index dac8d0a2b..5ebfe6927 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::Subquery; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::sql::logical::PyLogicalPlan; @@ -75,7 +75,7 @@ impl LogicalNode for PySubquery { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs index a83cff96d..267a4d485 100644 --- a/src/expr/subquery_alias.rs +++ b/src/expr/subquery_alias.rs @@ -18,7 +18,7 @@ use std::fmt::{self, Display, Formatter}; use datafusion::logical_expr::SubqueryAlias; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; @@ -85,7 +85,7 @@ impl LogicalNode for PySubqueryAlias { vec![PyLogicalPlan::from((*self.subquery_alias.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index f61be7fe4..6a0d53f0f 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -17,7 +17,7 @@ use datafusion::common::TableReference; use datafusion::logical_expr::logical_plan::TableScan; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::expr::logical_node::LogicalNode; @@ -146,7 +146,7 @@ impl LogicalNode for PyTableScan { vec![] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/union.rs b/src/expr/union.rs index 62488d9a1..5a08ccc13 100644 --- a/src/expr/union.rs +++ b/src/expr/union.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Union; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -83,7 +83,7 @@ impl LogicalNode for PyUnion { .collect() } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/unnest.rs b/src/expr/unnest.rs index adc705035..8e70e0990 100644 --- a/src/expr/unnest.rs +++ b/src/expr/unnest.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::logical_expr::logical_plan::Unnest; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::df_schema::PyDFSchema; @@ -79,7 +79,7 @@ impl LogicalNode for PyUnnest { vec![PyLogicalPlan::from((*self.unnest_.input).clone())] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/expr/window.rs b/src/expr/window.rs index 4dc6cb9c9..13deaec25 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -18,7 +18,7 @@ use datafusion::common::{DataFusionError, ScalarValue}; use datafusion::logical_expr::expr::WindowFunction; use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; use crate::common::data_type::PyScalarValue; @@ -289,7 +289,7 @@ impl LogicalNode for PyWindowExpr { vec![self.window.input.as_ref().clone().into()] } - fn to_variant(&self, py: Python) -> PyResult { - Ok(self.clone().into_py(py)) + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) } } diff --git a/src/lib.rs b/src/lib.rs index 317c3a49a..ce93ff0c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,21 +94,21 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; // Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/ - let common = PyModule::new_bound(py, "common")?; + let common = PyModule::new(py, "common")?; common::init_module(&common)?; m.add_submodule(&common)?; // Register `expr` as a submodule. Matching `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/ - let expr = PyModule::new_bound(py, "expr")?; + let expr = PyModule::new(py, "expr")?; expr::init_module(&expr)?; m.add_submodule(&expr)?; // Register the functions as a submodule - let funcs = PyModule::new_bound(py, "functions")?; + let funcs = PyModule::new(py, "functions")?; functions::init_module(&funcs)?; m.add_submodule(&funcs)?; - let store = PyModule::new_bound(py, "object_store")?; + let store = PyModule::new(py, "object_store")?; store::init_module(&store)?; m.add_submodule(&store)?; @@ -121,7 +121,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { #[cfg(feature = "substrait")] fn setup_substrait_module(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { - let substrait = PyModule::new_bound(py, "substrait")?; + let substrait = PyModule::new(py, "substrait")?; substrait::init_module(&substrait)?; m.add_submodule(&substrait)?; Ok(()) diff --git a/src/physical_plan.rs b/src/physical_plan.rs index 295908dc7..f0be45c6a 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -66,7 +66,7 @@ impl PyExecutionPlan { )?; let bytes = proto.encode_to_vec(); - Ok(PyBytes::new_bound(py, &bytes)) + Ok(PyBytes::new(py, &bytes)) } #[staticmethod] diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs index 314eebf4f..4b4c86597 100644 --- a/src/pyarrow_filter_expression.rs +++ b/src/pyarrow_filter_expression.rs @@ -16,7 +16,7 @@ // under the License. /// Converts a Datafusion logical plan expression (Expr) into a PyArrow compute expression -use pyo3::prelude::*; +use pyo3::{prelude::*, IntoPyObjectExt}; use std::convert::TryFrom; use std::result::Result; @@ -53,24 +53,28 @@ fn operator_to_py<'py>( Ok(py_op) } -fn extract_scalar_list(exprs: &[Expr], py: Python) -> PyDataFusionResult> { +fn extract_scalar_list<'py>( + exprs: &[Expr], + py: Python<'py>, +) -> PyDataFusionResult>> { let ret = exprs .iter() .map(|expr| match expr { // TODO: should we also leverage `ScalarValue::to_pyarrow` here? Expr::Literal(v) => match v { - ScalarValue::Boolean(Some(b)) => Ok(b.into_py(py)), - ScalarValue::Int8(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Int16(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Int32(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Int64(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt8(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt16(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt32(Some(i)) => Ok(i.into_py(py)), - ScalarValue::UInt64(Some(i)) => Ok(i.into_py(py)), - ScalarValue::Float32(Some(f)) => Ok(f.into_py(py)), - ScalarValue::Float64(Some(f)) => Ok(f.into_py(py)), - ScalarValue::Utf8(Some(s)) => Ok(s.into_py(py)), + // The unwraps here are for infallible conversions + ScalarValue::Boolean(Some(b)) => Ok(b.into_bound_py_any(py)?), + ScalarValue::Int8(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Int16(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Int32(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Int64(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt8(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt16(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt32(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::UInt64(Some(i)) => Ok(i.into_bound_py_any(py)?), + ScalarValue::Float32(Some(f)) => Ok(f.into_bound_py_any(py)?), + ScalarValue::Float64(Some(f)) => Ok(f.into_bound_py_any(py)?), + ScalarValue::Utf8(Some(s)) => Ok(s.into_bound_py_any(py)?), _ => Err(PyDataFusionError::Common(format!( "PyArrow can't handle ScalarValue: {v:?}" ))), @@ -98,8 +102,8 @@ impl TryFrom<&Expr> for PyArrowFilterExpression { // https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Expression.html#pyarrow-dataset-expression fn try_from(expr: &Expr) -> Result { Python::with_gil(|py| { - let pc = Python::import_bound(py, "pyarrow.compute")?; - let op_module = Python::import_bound(py, "operator")?; + let pc = Python::import(py, "pyarrow.compute")?; + let op_module = Python::import(py, "operator")?; let pc_expr: PyDataFusionResult> = match expr { Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?), Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)), diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs index 2b31467f8..cab708458 100644 --- a/src/pyarrow_util.rs +++ b/src/pyarrow_util.rs @@ -33,8 +33,8 @@ impl FromPyArrow for PyScalarValue { let val = value.call_method0("as_py")?; // construct pyarrow array from the python value and pyarrow type - let factory = py.import_bound("pyarrow")?.getattr("array")?; - let args = PyList::new_bound(py, [val]); + let factory = py.import("pyarrow")?.getattr("array")?; + let args = PyList::new(py, [val])?; let array = factory.call1((args, typ))?; // convert the pyarrow array to rust array using C data interface diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 1be33b75f..96561c434 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -64,7 +64,7 @@ impl PyLogicalPlan { #[pymethods] impl PyLogicalPlan { /// Return the specific logical operator - pub fn to_variant(&self, py: Python) -> PyResult { + pub fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { match self.plan.as_ref() { LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py), LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py), @@ -132,7 +132,7 @@ impl PyLogicalPlan { datafusion_proto::protobuf::LogicalPlanNode::try_from_logical_plan(&self.plan, &codec)?; let bytes = proto.encode_to_vec(); - Ok(PyBytes::new_bound(py, &bytes)) + Ok(PyBytes::new(py, &bytes)) } #[staticmethod] diff --git a/src/substrait.rs b/src/substrait.rs index 8dcf3e8a7..1fefc0bbd 100644 --- a/src/substrait.rs +++ b/src/substrait.rs @@ -40,7 +40,7 @@ impl PyPlan { self.plan .encode(&mut proto_bytes) .map_err(PyDataFusionError::EncodeError)?; - Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) + Ok(PyBytes::new(py, &proto_bytes).into()) } } @@ -95,7 +95,7 @@ impl PySubstraitSerializer { py: Python, ) -> PyDataFusionResult { let proto_bytes: Vec = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?; - Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into()) + Ok(PyBytes::new(py, &proto_bytes).into()) } #[staticmethod] diff --git a/src/udaf.rs b/src/udaf.rs index 5f21533e0..34a9cd51d 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -29,6 +29,7 @@ use datafusion::logical_expr::{ }; use crate::common::data_type::PyScalarValue; +use crate::errors::to_datafusion_err; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -73,7 +74,7 @@ impl Accumulator for RustAccumulator { .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function self.accum @@ -119,7 +120,7 @@ impl Accumulator for RustAccumulator { .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()) .collect::>(); - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function self.accum diff --git a/src/udf.rs b/src/udf.rs index 4570e77a6..574c9d7b5 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -28,6 +28,7 @@ use datafusion::logical_expr::function::ScalarFunctionImplementation; use datafusion::logical_expr::ScalarUDF; use datafusion::logical_expr::{create_udf, ColumnarValue}; +use crate::errors::to_datafusion_err; use crate::expr::PyExpr; use crate::utils::parse_volatility; @@ -46,11 +47,11 @@ fn pyarrow_function_to_rust( .map_err(|e| DataFusionError::Execution(format!("{e:?}"))) }) .collect::, _>>()?; - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function let value = func - .call_bound(py, py_args, None) + .call(py, py_args, None) .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; // 3. cast to arrow::array::Array diff --git a/src/udwf.rs b/src/udwf.rs index 04a4a1640..defd9c522 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -27,6 +27,7 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use crate::common::data_type::PyScalarValue; +use crate::errors::to_datafusion_err; use crate::expr::PyExpr; use crate::utils::parse_volatility; use datafusion::arrow::datatypes::DataType; @@ -56,8 +57,8 @@ impl PartitionEvaluator for RustPartitionEvaluator { fn get_range(&self, idx: usize, n_rows: usize) -> Result> { Python::with_gil(|py| { - let py_args = vec![idx.to_object(py), n_rows.to_object(py)]; - let py_args = PyTuple::new_bound(py, py_args); + let py_args = vec![idx.into_pyobject(py)?, n_rows.into_pyobject(py)?]; + let py_args = PyTuple::new(py, py_args)?; self.evaluator .bind(py) @@ -93,17 +94,14 @@ impl PartitionEvaluator for RustPartitionEvaluator { fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result { println!("evaluate all called with number of values {}", values.len()); Python::with_gil(|py| { - let py_values = PyList::new_bound( + let py_values = PyList::new( py, values .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()), - ); - let py_num_rows = num_rows.to_object(py).into_bound(py); - let py_args = PyTuple::new_bound( - py, - PyTuple::new_bound(py, vec![py_values.as_any(), &py_num_rows]), - ); + )?; + let py_num_rows = num_rows.into_pyobject(py)?; + let py_args = PyTuple::new(py, vec![py_values.as_any(), &py_num_rows])?; self.evaluator .bind(py) @@ -112,32 +110,28 @@ impl PartitionEvaluator for RustPartitionEvaluator { let array_data = ArrayData::from_pyarrow_bound(&v).unwrap(); make_array(array_data) }) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) }) + .map_err(to_datafusion_err) } fn evaluate(&mut self, values: &[ArrayRef], range: &Range) -> Result { Python::with_gil(|py| { - let py_values = PyList::new_bound( + let py_values = PyList::new( py, values .iter() .map(|arg| arg.into_data().to_pyarrow(py).unwrap()), - ); - let range_tuple = - PyTuple::new_bound(py, vec![range.start.to_object(py), range.end.to_object(py)]); - let py_args = PyTuple::new_bound( - py, - PyTuple::new_bound(py, vec![py_values.as_any(), range_tuple.as_any()]), - ); + )?; + let range_tuple = PyTuple::new(py, vec![range.start, range.end])?; + let py_args = PyTuple::new(py, vec![py_values.as_any(), range_tuple.as_any()])?; self.evaluator .bind(py) .call_method1("evaluate", py_args) .and_then(|v| v.extract::()) .map(|v| v.0) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) }) + .map_err(to_datafusion_err) } fn evaluate_all_with_rank( @@ -148,23 +142,27 @@ impl PartitionEvaluator for RustPartitionEvaluator { Python::with_gil(|py| { let ranks = ranks_in_partition .iter() - .map(|r| PyTuple::new_bound(py, vec![r.start, r.end])); + .map(|r| PyTuple::new(py, vec![r.start, r.end])) + .collect::>>()?; // 1. cast args to Pyarrow array - let py_args = vec![num_rows.to_object(py), PyList::new_bound(py, ranks).into()]; + let py_args = vec![ + num_rows.into_pyobject(py)?.into_any(), + PyList::new(py, ranks)?.into_any(), + ]; - let py_args = PyTuple::new_bound(py, py_args); + let py_args = PyTuple::new(py, py_args)?; // 2. call function self.evaluator .bind(py) .call_method1("evaluate_all_with_rank", py_args) - .map_err(|e| DataFusionError::Execution(format!("{e}"))) .map(|v| { let array_data = ArrayData::from_pyarrow_bound(&v).unwrap(); make_array(array_data) }) }) + .map_err(to_datafusion_err) } fn supports_bounded_execution(&self) -> bool { From e6f6e66c1d180246ad933f8bcc0d40faa8426dfa Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 21 Feb 2025 16:03:36 -0500 Subject: [PATCH 010/206] Add user documentation for the FFI approach (#1031) * Initial commit for FFI user documentation * Update readme to point to the online documentation. Fix a small typo. * Small text adjustments for clarity and formatting --- README.md | 11 +- docs/source/contributor-guide/ffi.rst | 212 ++++++++++++++++++++++++++ docs/source/index.rst | 1 + 3 files changed, 220 insertions(+), 4 deletions(-) create mode 100644 docs/source/contributor-guide/ffi.rst diff --git a/README.md b/README.md index 5aaf7f5f3..9c56b62dd 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,8 @@ DataFusion's Python bindings can be used as a foundation for building new data s planning, and logical plan optimizations, and then transpiles the logical plan to Dask operations for execution. - [DataFusion Ballista](https://github.com/apache/datafusion-ballista) is a distributed SQL query engine that extends DataFusion's Python bindings for distributed use cases. - -It is also possible to use these Python bindings directly for DataFrame and SQL operations, but you may find that -[Polars](http://pola.rs/) and [DuckDB](http://www.duckdb.org/) are more suitable for this use case, since they have -more of an end-user focus and are more actively maintained than these Python bindings. +- [DataFusion Ray](https://github.com/apache/datafusion-ray) is another distributed query engine that uses + DataFusion's Python bindings. ## Features @@ -114,6 +112,11 @@ Printing the context will show the current configuration settings. print(ctx) ``` +## Extensions + +For information about how to extend DataFusion Python, please see the extensions page of the +[online documentation](https://datafusion.apache.org/python/). + ## More Examples See [examples](examples/README.md) for more information. diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst new file mode 100644 index 000000000..c1f9806b3 --- /dev/null +++ b/docs/source/contributor-guide/ffi.rst @@ -0,0 +1,212 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Python Extensions +================= + +The DataFusion in Python project is designed to allow users to extend its functionality in a few core +areas. Ideally many users would like to package their extensions as a Python package and easily +integrate that package with this project. This page serves to describe some of the challenges we face +when doing these integrations and the approach our project uses. + +The Primary Issue +----------------- + +Suppose you wish to use DataFusion and you have a custom data source that can produce tables that +can then be queried against, similar to how you can register a :ref:`CSV ` or +:ref:`Parquet ` file. In DataFusion terminology, you likely want to implement a +:ref:`Custom Table Provider `. In an effort to make your data source +as performant as possible and to utilize the features of DataFusion, you may decide to write +your source in Rust and then expose it through `PyO3 `_ as a Python library. + +At first glance, it may appear the best way to do this is to add the ``datafusion-python`` +crate as a dependency, provide a ``PyTable``, and then to register it with the +``SessionContext``. Unfortunately, this will not work. + +When you produce your code as a Python library and it needs to interact with the DataFusion +library, at the lowest level they communicate through an Application Binary Interface (ABI). +The acronym sounds similar to API (Application Programming Interface), but it is distinctly +different. + +The ABI sets the standard for how these libraries can share data and functions between each +other. One of the key differences between Rust and other programming languages is that Rust +does not have a stable ABI. What this means in practice is that if you compile a Rust library +with one version of the ``rustc`` compiler and I compile another library to interface with it +but I use a different version of the compiler, there is no guarantee the interface will be +the same. + +In practice, this means that a Python library built with ``datafusion-python`` as a Rust +dependency will generally **not** be compatible with the DataFusion Python package, even +if they reference the same version of ``datafusion-python``. If you attempt to do this, it may +work on your local computer if you have built both packages with the same optimizations. +This can sometimes lead to a false expectation that the code will work, but it frequently +breaks the moment you try to use your package against the released packages. + +You can find more information about the Rust ABI in their +`online documentation `_. + +The FFI Approach +---------------- + +Rust supports interacting with other programming languages through it's Foreign Function +Interface (FFI). The advantage of using the FFI is that it enables you to write data structures +and functions that have a stable ABI. The allows you to use Rust code with C, Python, and +other languages. In fact, the `PyO3 `_ library uses the FFI to share data +and functions between Python and Rust. + +The approach we are taking in the DataFusion in Python project is to incrementally expose +more portions of the DataFusion project via FFI interfaces. This allows users to write Rust +code that does **not** require the ``datafusion-python`` crate as a dependency, expose their +code in Python via PyO3, and have it interact with the DataFusion Python package. + +Early adopters of this approach include `delta-rs `_ +who has adapted their Table Provider for use in ```datafusion-python``` with only a few lines +of code. Also, the DataFusion Python project uses the existing definitions from +`Apache Arrow CStream Interface `_ +to support importing **and** exporting tables. Any Python package that supports reading +the Arrow C Stream interface can work with DataFusion Python out of the box! You can read +more about working with Arrow sources in the :ref:`Data Sources ` +page. + +To learn more about the Foreign Function Interface in Rust, the +`Rustonomicon `_ is a good resource. + +Inspiration from Arrow +---------------------- + +DataFusion is built upon `Apache Arrow `_. The canonical Python +Arrow implementation, `pyarrow `_ provides +an excellent way to share Arrow data between Python projects without performing any copy +operations on the data. They do this by using a well defined set of interfaces. You can +find the details about their stream interface +`here `_. The +`Rust Arrow Implementation `_ also supports these +``C`` style definitions via the Foreign Function Interface. + +In addition to using these interfaces to transfer Arrow data between libraries, ``pyarrow`` +goes one step further to make sharing the interfaces easier in Python. They do this +by exposing PyCapsules that contain the expected functionality. + +You can learn more about PyCapsules from the official +`Python online documentation `_. PyCapsules +have excellent support in PyO3 already. The +`PyO3 online documentation `_ is a good source +for more details on using PyCapsules in Rust. + +Two lessons we leverage from the Arrow project in DataFusion Python are: + +- We reuse the existing Arrow FFI functionality wherever possible. +- We expose PyCapsules that contain a FFI stable struct. + +Implementation Details +---------------------- + +The bulk of the code necessary to perform our FFI operations is in the upstream +`DataFusion `_ core repository. You can review the code and +documentation in the `datafusion-ffi`_ crate. + +Our FFI implementation is narrowly focused at sharing data and functions with Rust backed +libraries. This allows us to use the `abi_stable crate `_. +This is an excellent crate that allows for easy conversion between Rust native types +and FFI-safe alternatives. For example, if you needed to pass a ``Vec`` via FFI, +you can simply convert it to a ``RVec`` in an intuitive manner. It also supports +features like ``RResult`` and ``ROption`` that do not have an obvious translation to a +C equivalent. + +The `datafusion-ffi`_ crate has been designed to make it easy to convert from DataFusion +traits into their FFI counterparts. For example, if you have defined a custom +`TableProvider `_ +and you want to create a sharable FFI counterpart, you could write: + +.. code-block:: rust + + let my_provider = MyTableProvider::default(); + let ffi_provider = FFI_TableProvider::new(Arc::new(my_provider), false, None); + +If you were interfacing with a library that provided the above ``FFI_TableProvider`` and +you needed to turn it back into an ``TableProvider``, you can turn it into a +``ForeignTableProvider`` with implements the ``TableProvider`` trait. + +.. code-block:: rust + + let foreign_provider: ForeignTableProvider = ffi_provider.into(); + +If you review the code in `datafusion-ffi`_ you will find that each of the traits we share +across the boundary has two portions, one with a ``FFI_`` prefix and one with a ``Foreign`` +prefix. This is used to distinguish which side of the FFI boundary that struct is +designed to be used on. The structures with the ``FFI_`` prefix are to be used on the +**provider** of the structure. In the example we're showing, this means the code that has +written the underlying ``TableProvider`` implementation to access your custom data source. +The structures with the ``Foreign`` prefix are to be used by the receiver. In this case, +it is the ``datafusion-python`` library. + +In order to share these FFI structures, we need to wrap them in some kind of Python object +that can be used to interface from one package to another. As described in the above +section on our inspiration from Arrow, we use ``PyCapsule``. We can create a ``PyCapsule`` +for our provider thusly: + +.. code-block:: rust + + let name = CString::new("datafusion_table_provider")?; + let my_capsule = PyCapsule::new_bound(py, provider, Some(name))?; + +On the receiving side, turn this pycapsule object into the ``FFI_TableProvider``, which +can then be turned into a ``ForeignTableProvider`` the associated code is: + +.. code-block:: rust + + let capsule = capsule.downcast::()?; + let provider = unsafe { capsule.reference::() }; + +By convention the ``datafusion-python`` library expects a Python object that has a +``TableProvider`` PyCapsule to have this capsule accessible by calling a function named +``__datafusion_table_provider__``. You can see a complete working example of how to +share a ``TableProvider`` from one python library to DataFusion Python in the +`repository examples folder `_. + +This section has been written using ``TableProvider`` as an example. It is the first +extension that has been written using this approach and the most thoroughly implemented. +As we continue to expose more of the DataFusion features, we intend to follow this same +design pattern. + +Alternative Approach +-------------------- + +Suppose you needed to expose some other features of DataFusion and you could not wait +for the upstream repository to implement the FFI approach we describe. In this case +you decide to create your dependency on the ``datafusion-python`` crate instead. + +As we discussed, this is not guaranteed to work across different compiler versions and +optimization levels. If you wish to go down this route, there are two approaches we +have identified you can use. + +#. Re-export all of ``datafusion-python`` yourself with your extensions built in. +#. Carefully synchonize your software releases with the ``datafusion-python`` CI build + system so that your libraries use the exact same compiler, features, and + optimization level. + +We currently do not recommend either of these approaches as they are difficult to +maintain over a long period. Additionally, they require a tight version coupling +between libraries. + +Status of Work +-------------- + +At the time of this writing, the FFI features are under active development. To see +the latest status, we recommend reviewing the code in the `datafusion-ffi`_ crate. + +.. _datafusion-ffi: https://crates.io/crates/datafusion-ffi diff --git a/docs/source/index.rst b/docs/source/index.rst index 34eb23b28..558b2d572 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -85,6 +85,7 @@ Example :caption: CONTRIBUTOR GUIDE contributor-guide/introduction + contributor-guide/ffi .. _toc.api: .. toctree:: From 3f3983cc86ffe267cff97480241e8a588ac38fa3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 23 Feb 2025 08:00:52 -0500 Subject: [PATCH 011/206] build(deps): bump arrow from 54.1.0 to 54.2.0 (#1035) Bumps [arrow](https://github.com/apache/arrow-rs) from 54.1.0 to 54.2.0. - [Release notes](https://github.com/apache/arrow-rs/releases) - [Changelog](https://github.com/apache/arrow-rs/blob/main/CHANGELOG-old.md) - [Commits](https://github.com/apache/arrow-rs/compare/54.1.0...54.2.0) --- updated-dependencies: - dependency-name: arrow dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1b1ed50a..d23ed6169 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" +checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" +checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" +checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" +checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" +checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" +checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" +checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" +checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" +checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" dependencies = [ "arrow-array", "arrow-buffer", @@ -326,9 +326,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" +checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" dependencies = [ "arrow-array", "arrow-buffer", @@ -339,9 +339,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" +checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" dependencies = [ "arrow-array", "arrow-buffer", @@ -352,18 +352,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" +checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" +checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" dependencies = [ "ahash", "arrow-array", @@ -375,9 +375,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.1.0" +version = "54.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" +checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" dependencies = [ "arrow-array", "arrow-buffer", From 69ebf70bd821d0ae516d2f61d96058e2252a7a1f Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 24 Feb 2025 21:30:52 +0100 Subject: [PATCH 012/206] Chore: Release datafusion-python 45 (#1024) * Bump version number to prepare for release * Add changelog 45.0.0 * Add deprecated marker from either typing or typing_extensions based on the python version * Limit pyarrow version per issue # 1023 * Bumping the version number to support new release candidate * There was no guarantee that the record batches would be returned in a single partition, so update the unit test to check all partitions. * Revert "Limit pyarrow version per issue # 1023" This reverts commit b48d5872661017ec21ea71f7dbb9569f2f0bf797. * Correct import for python 3.13 and above * Bump minor version due to pypi requirement * Update cargo lock --- Cargo.lock | 113 +++++++++++++-------------------- Cargo.toml | 2 +- dev/changelog/45.0.0.md | 42 ++++++++++++ python/datafusion/context.py | 5 +- python/datafusion/dataframe.py | 5 +- python/datafusion/expr.py | 6 +- python/datafusion/substrait.py | 5 +- python/tests/test_dataframe.py | 21 ++++-- 8 files changed, 118 insertions(+), 81 deletions(-) create mode 100644 dev/changelog/45.0.0.md diff --git a/Cargo.lock b/Cargo.lock index d23ed6169..5c7f2bf3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -606,19 +606,18 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c" dependencies = [ "bzip2-sys", - "libc", ] [[package]] name = "bzip2-sys" -version = "0.1.11+1.0.8" +version = "0.1.12+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" dependencies = [ "cc", "libc", @@ -627,9 +626,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.12" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2" +checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" dependencies = [ "jobserver", "libc", @@ -684,21 +683,20 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.53" +version = "0.1.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e24a03c8b52922d68a1589ad61032f2c1aa5a8158d2aa0d93c6e9534944bbad6" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" dependencies = [ "cc", ] [[package]] name = "comfy-table" -version = "7.1.3" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ - "strum", - "strum_macros", + "unicode-segmentation", "unicode-width", ] @@ -837,9 +835,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] @@ -878,7 +876,7 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2 0.5.0", + "bzip2 0.5.1", "chrono", "datafusion-catalog", "datafusion-common", @@ -1240,7 +1238,7 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "petgraph 0.7.1", + "petgraph", ] [[package]] @@ -1341,7 +1339,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "44.0.0" +version = "45.2.0" dependencies = [ "arrow", "async-trait", @@ -1436,9 +1434,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" @@ -1456,12 +1454,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fixedbitset" version = "0.5.7" @@ -2269,9 +2261,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" dependencies = [ "adler2", ] @@ -2548,23 +2540,13 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset 0.4.2", - "indexmap", -] - [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset 0.5.7", + "fixedbitset", "indexmap", ] @@ -2660,9 +2642,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", "prost-derive", @@ -2670,16 +2652,16 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "once_cell", - "petgraph 0.6.5", + "petgraph", "prettyplease", "prost", "prost-types", @@ -2690,12 +2672,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.98", @@ -2703,9 +2685,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ "prost", ] @@ -2721,9 +2703,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88" dependencies = [ "cc", ] @@ -2860,9 +2842,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" +checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" dependencies = [ "cfg_aliases", "libc", @@ -3042,15 +3024,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" dependencies = [ "cc", "cfg-if", "getrandom 0.2.15", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -3097,9 +3078,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.22" +version = "0.23.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" +checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" dependencies = [ "once_cell", "ring", @@ -3377,9 +3358,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] name = "snafu" @@ -3418,12 +3399,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "sqlparser" version = "0.53.0" @@ -3453,9 +3428,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stacker" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +checksum = "1d08feb8f695b465baed819b03c128dc23f57a694510ab1f06c77f763975685e" dependencies = [ "cc", "cfg-if", diff --git a/Cargo.toml b/Cargo.toml index d18e0e8f0..5358b1836 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "44.0.0" +version = "45.2.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/45.0.0.md b/dev/changelog/45.0.0.md new file mode 100644 index 000000000..93659b171 --- /dev/null +++ b/dev/changelog/45.0.0.md @@ -0,0 +1,42 @@ + + +# Apache DataFusion Python 45.0.0 Changelog + +This release consists of 2 commits from 2 contributors. See credits at the end of this changelog for more information. + +**Fixed bugs:** + +- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky) + +**Other:** + +- Chore/upgrade datafusion 45 [#1010](https://github.com/apache/datafusion-python/pull/1010) (kevinjqliu) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 1 Kevin Liu + 1 Tim Saucer +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 864ef1c8b..21955b6d1 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -21,7 +21,10 @@ from typing import TYPE_CHECKING, Any, Protocol -from typing_extensions import deprecated +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.catalog import Catalog, Table from datafusion.dataframe import DataFrame diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 7413a5fa3..23b5d630b 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -33,7 +33,10 @@ overload, ) -from typing_extensions import deprecated +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.plan import ExecutionPlan, LogicalPlan from datafusion.record_batch import RecordBatchStream diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 68ddd7c9a..e3d7158eb 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -25,7 +25,11 @@ from typing import TYPE_CHECKING, Any, Optional, Type import pyarrow as pa -from typing_extensions import deprecated + +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.common import DataTypeMap, NullTreatment, RexType diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index 402184d3f..06302fe38 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -26,7 +26,10 @@ import pathlib from typing import TYPE_CHECKING -from typing_extensions import deprecated +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 from datafusion.plan import LogicalPlan diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 5bc3fb094..c636e896a 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -755,13 +755,20 @@ def test_execution_plan(aggregate_df): assert "CsvExec:" in indent ctx = SessionContext() - stream = ctx.execute(plan, 0) - # get the one and only batch - batch = stream.next() - assert batch is not None - # there should be no more batches - with pytest.raises(StopIteration): - stream.next() + rows_returned = 0 + for idx in range(0, plan.partition_count): + stream = ctx.execute(plan, idx) + try: + batch = stream.next() + assert batch is not None + rows_returned += len(batch.to_pyarrow()[0]) + except StopIteration: + # This is one of the partitions with no values + pass + with pytest.raises(StopIteration): + stream.next() + + assert rows_returned == 5 def test_repartition(df): From a80a788f69cf46ef002b3c537837548cc103748c Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 8 Mar 2025 21:22:36 +0800 Subject: [PATCH 013/206] Enable Dataframe to be converted into views which can be used in register_table (#1016) * add test_view * feat: add into_view method to register DataFrame as a view * add pytableprovider * feat: add as_table method to PyTableProvider and update into_view to return PyTable * refactor: simplify as_table method and update documentation for into_view * test: improve test_register_filtered_dataframe by removing redundant comments and assertions * test: enhance test_register_filtered_dataframe with additional assertions for DataFrame results * ruff formatted * cleanup: remove unused imports from test_view.py * docs: add example for registering a DataFrame as a view in README.md * docs: update docstring for into_view method to clarify usage as ViewTable * chore: add license header to test_view.py * ruff correction * refactor: rename into_view method to _into_view * ruff lint * refactor: simplify into_view method and update Rust binding convention * docs: add views section to user guide with example on registering views * feat: add register_view method to SessionContext for DataFrame registration * docs: update README and user guide to reflect register_view method for DataFrame registration * docs: remove some documentation from PyDataFrame --- README.md | 40 +++++++++++++ .../user-guide/common-operations/index.rst | 1 + .../user-guide/common-operations/views.rst | 58 +++++++++++++++++++ python/datafusion/context.py | 12 ++++ python/datafusion/dataframe.py | 4 ++ python/tests/test_view.py | 49 ++++++++++++++++ src/dataframe.rs | 39 +++++++++++++ 7 files changed, 203 insertions(+) create mode 100644 docs/source/user-guide/common-operations/views.rst create mode 100644 python/tests/test_view.py diff --git a/README.md b/README.md index 9c56b62dd..4f80dbe18 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,46 @@ This produces the following chart: ![Chart](examples/chart.png) +## Registering a DataFrame as a View + +You can use SessionContext's `register_view` method to convert a DataFrame into a view and register it with the context. + +```python +from datafusion import SessionContext, col, literal + +# Create a DataFusion context +ctx = SessionContext() + +# Create sample data +data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} + +# Create a DataFrame from the dictionary +df = ctx.from_pydict(data, "my_table") + +# Filter the DataFrame (for example, keep rows where a > 2) +df_filtered = df.filter(col("a") > literal(2)) + +# Register the dataframe as a view with the context +ctx.register_view("view1", df_filtered) + +# Now run a SQL query against the registered view +df_view = ctx.sql("SELECT * FROM view1") + +# Collect the results +results = df_view.collect() + +# Convert results to a list of dictionaries for display +result_dicts = [batch.to_pydict() for batch in results] + +print(result_dicts) +``` + +This will output: + +```python +[{'a': [3, 4, 5], 'b': [30, 40, 50]}] +``` + ## Configuration It is possible to configure runtime (memory and disk settings) and configuration settings when creating a context. diff --git a/docs/source/user-guide/common-operations/index.rst b/docs/source/user-guide/common-operations/index.rst index d7c708c21..7abd1f138 100644 --- a/docs/source/user-guide/common-operations/index.rst +++ b/docs/source/user-guide/common-operations/index.rst @@ -23,6 +23,7 @@ The contents of this section are designed to guide a new user through how to use .. toctree:: :maxdepth: 2 + views basic-info select-and-filter expressions diff --git a/docs/source/user-guide/common-operations/views.rst b/docs/source/user-guide/common-operations/views.rst new file mode 100644 index 000000000..df11e3abe --- /dev/null +++ b/docs/source/user-guide/common-operations/views.rst @@ -0,0 +1,58 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +====================== +Registering Views +====================== + +You can use the context's ``register_view`` method to register a DataFrame as a view + +.. code-block:: python + + from datafusion import SessionContext, col, literal + + # Create a DataFusion context + ctx = SessionContext() + + # Create sample data + data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} + + # Create a DataFrame from the dictionary + df = ctx.from_pydict(data, "my_table") + + # Filter the DataFrame (for example, keep rows where a > 2) + df_filtered = df.filter(col("a") > literal(2)) + + # Register the dataframe as a view with the context + ctx.register_view("view1", df_filtered) + + # Now run a SQL query against the registered view + df_view = ctx.sql("SELECT * FROM view1") + + # Collect the results + results = df_view.collect() + + # Convert results to a list of dictionaries for display + result_dicts = [batch.to_pydict() for batch in results] + + print(result_dicts) + +This will output: + +.. code-block:: python + + [{'a': [3, 4, 5], 'b': [30, 40, 50]}] diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 21955b6d1..befc4dce6 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -707,6 +707,18 @@ def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFr """ return DataFrame(self.ctx.from_polars(data, name)) + # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 + # is the discussion on how we arrived at adding register_view + def register_view(self, name: str, df: DataFrame): + """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view. + + Args: + name (str): The name to register the view under. + df (DataFrame): The DataFrame to be converted into a view and registered. + """ + view = df.into_view() + self.ctx.register_table(name, view) + def register_table(self, name: str, table: Table) -> None: """Register a :py:class: `~datafusion.catalog.Table` as a table. diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 23b5d630b..85a179ec9 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -124,6 +124,10 @@ def __init__(self, df: DataFrameInternal) -> None: """ self.df = df + def into_view(self) -> pa.Table: + """Convert DataFrame as a ViewTable which can be used in register_table.""" + return self.df.into_view() + def __getitem__(self, key: str | List[str]) -> DataFrame: """Return a new :py:class`DataFrame` with the specified column or columns. diff --git a/python/tests/test_view.py b/python/tests/test_view.py new file mode 100644 index 000000000..1d92cc0d4 --- /dev/null +++ b/python/tests/test_view.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from datafusion import SessionContext, col, literal + + +def test_register_filtered_dataframe(): + ctx = SessionContext() + + data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]} + + df = ctx.from_pydict(data, "my_table") + + df_filtered = df.filter(col("a") > literal(2)) + + ctx.register_view("view1", df_filtered) + + df_view = ctx.sql("SELECT * FROM view1") + + filtered_results = df_view.collect() + + result_dicts = [batch.to_pydict() for batch in filtered_results] + + expected_results = [{"a": [3, 4, 5], "b": [30, 40, 50]}] + + assert result_dicts == expected_results + + df_results = df.collect() + + df_result_dicts = [batch.to_pydict() for batch in df_results] + + expected_df_results = [{"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}] + + assert df_result_dicts == expected_df_results diff --git a/src/dataframe.rs b/src/dataframe.rs index ed9578a71..243e2e14f 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -30,6 +30,7 @@ use datafusion::arrow::util::pretty; use datafusion::common::UnnestOptions; use datafusion::config::{CsvOptions, TableParquetOptions}; use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; +use datafusion::datasource::TableProvider; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; @@ -39,6 +40,7 @@ use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyCapsule, PyTuple, PyTupleMethods}; use tokio::task::JoinHandle; +use crate::catalog::PyTable; use crate::errors::{py_datafusion_err, PyDataFusionError}; use crate::expr::sort_expr::to_sort_expressions; use crate::physical_plan::PyExecutionPlan; @@ -50,6 +52,25 @@ use crate::{ expr::{sort_expr::PySortExpr, PyExpr}, }; +// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 +// - we have not decided on the table_provider approach yet +// this is an interim implementation +#[pyclass(name = "TableProvider", module = "datafusion")] +pub struct PyTableProvider { + provider: Arc, +} + +impl PyTableProvider { + pub fn new(provider: Arc) -> Self { + Self { provider } + } + + pub fn as_table(&self) -> PyTable { + let table_provider: Arc = self.provider.clone(); + PyTable::new(table_provider) + } +} + /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. /// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. @@ -156,6 +177,24 @@ impl PyDataFrame { PyArrowType(self.df.schema().into()) } + /// Convert this DataFrame into a Table that can be used in register_table + /// By convention, into_... methods consume self and return the new object. + /// Disabling the clippy lint, so we can use &self + /// because we're working with Python bindings + /// where objects are shared + /// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 + /// - we have not decided on the table_provider approach yet + #[allow(clippy::wrong_self_convention)] + fn into_view(&self) -> PyDataFusionResult { + // Call the underlying Rust DataFrame::into_view method. + // Note that the Rust method consumes self; here we clone the inner Arc + // so that we don’t invalidate this PyDataFrame. + let table_provider = self.df.as_ref().clone().into_view(); + let table_provider = PyTableProvider::new(table_provider); + + Ok(table_provider.as_table()) + } + #[pyo3(signature = (*args))] fn select_columns(&self, args: Vec) -> PyDataFusionResult { let args = args.iter().map(|s| s.as_ref()).collect::>(); From 9027b4d79fdd7a41dd9c1f25c2ecebc1fabf50f2 Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Sat, 8 Mar 2025 21:24:02 +0800 Subject: [PATCH 014/206] fix: type checking (#993) * fix: type checking * update license * format * format * update catalog * revert type annotation * format * format * update --- python/datafusion/catalog.py | 5 +++-- python/datafusion/context.py | 19 ++++++++++++------ python/datafusion/dataframe.py | 3 ++- python/datafusion/expr.py | 8 ++++---- python/datafusion/functions.py | 10 +++++++--- python/datafusion/input/location.py | 10 +++++----- python/datafusion/udf.py | 7 ++++--- python/tests/test_functions.py | 30 +++++++++++++++++++++++++++++ 8 files changed, 68 insertions(+), 24 deletions(-) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 703037665..0560f4704 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -66,11 +66,12 @@ def __init__(self, table: df_internal.Table) -> None: """This constructor is not typically called by the end user.""" self.table = table + @property def schema(self) -> pyarrow.Schema: """Returns the schema associated with this table.""" - return self.table.schema() + return self.table.schema @property def kind(self) -> str: """Returns the kind of table.""" - return self.table.kind() + return self.table.kind diff --git a/python/datafusion/context.py b/python/datafusion/context.py index befc4dce6..282b2a477 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -728,7 +728,7 @@ def register_table(self, name: str, table: Table) -> None: name: Name of the resultant table. table: DataFusion table to add to the session context. """ - self.ctx.register_table(name, table) + self.ctx.register_table(name, table.table) def deregister_table(self, name: str) -> None: """Remove a table from the session.""" @@ -767,7 +767,7 @@ def register_parquet( file_extension: str = ".parquet", skip_metadata: bool = True, schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[SortExpr]] | None = None, ) -> None: """Register a Parquet file as a table. @@ -798,7 +798,9 @@ def register_parquet( file_extension, skip_metadata, schema, - file_sort_order, + [sort_list_to_raw_sort_list(exprs) for exprs in file_sort_order] + if file_sort_order is not None + else None, ) def register_csv( @@ -934,7 +936,7 @@ def register_udwf(self, udwf: WindowUDF) -> None: def catalog(self, name: str = "datafusion") -> Catalog: """Retrieve a catalog by name.""" - return self.ctx.catalog(name) + return Catalog(self.ctx.catalog(name)) @deprecated( "Use the catalog provider interface ``SessionContext.Catalog`` to " @@ -1054,7 +1056,7 @@ def read_parquet( file_extension: str = ".parquet", skip_metadata: bool = True, schema: pyarrow.Schema | None = None, - file_sort_order: list[list[Expr]] | None = None, + file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -1078,6 +1080,11 @@ def read_parquet( """ if table_partition_cols is None: table_partition_cols = [] + file_sort_order = ( + [sort_list_to_raw_sort_list(f) for f in file_sort_order] + if file_sort_order is not None + else None + ) return DataFrame( self.ctx.read_parquet( str(path), @@ -1121,7 +1128,7 @@ def read_table(self, table: Table) -> DataFrame: :py:class:`~datafusion.catalog.ListingTable`, create a :py:class:`~datafusion.dataframe.DataFrame`. """ - return DataFrame(self.ctx.read_table(table)) + return DataFrame(self.ctx.read_table(table.table)) def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream: """Execute the ``plan`` and return the results.""" diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 85a179ec9..de5d8376e 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -52,6 +52,7 @@ from enum import Enum from datafusion._internal import DataFrame as DataFrameInternal +from datafusion._internal import expr as expr_internal from datafusion.expr import Expr, SortExpr, sort_or_default @@ -277,7 +278,7 @@ def with_columns( def _simplify_expression( *exprs: Expr | Iterable[Expr], **named_exprs: Expr - ) -> list[Expr]: + ) -> list[expr_internal.Expr]: expr_list = [] for expr in exprs: if isinstance(expr, Expr): diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index e3d7158eb..3639abec6 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -176,7 +176,7 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: """Helper function to return a default Sort if an Expr is provided.""" if isinstance(e, SortExpr): return e.raw_sort - return SortExpr(e.expr, True, True).raw_sort + return SortExpr(e, True, True).raw_sort def sort_list_to_raw_sort_list( @@ -231,7 +231,7 @@ def variant_name(self) -> str: def __richcmp__(self, other: Expr, op: int) -> Expr: """Comparison operator.""" - return Expr(self.expr.__richcmp__(other, op)) + return Expr(self.expr.__richcmp__(other.expr, op)) def __repr__(self) -> str: """Generate a string representation of this expression.""" @@ -417,7 +417,7 @@ def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: ascending: If true, sort in ascending order. nulls_first: Return null values first. """ - return SortExpr(self.expr, ascending=ascending, nulls_first=nulls_first) + return SortExpr(self, ascending=ascending, nulls_first=nulls_first) def is_null(self) -> Expr: """Returns ``True`` if this expression is null.""" @@ -789,7 +789,7 @@ class SortExpr: def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None: """This constructor should not be called by the end user.""" - self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first) + self.raw_sort = expr_internal.SortExpr(expr.expr, ascending, nulls_first) def expr(self) -> Expr: """Return the raw expr backing the SortExpr.""" diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 5c260aade..b449c4868 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -366,7 +366,7 @@ def concat_ws(separator: str, *args: Expr) -> Expr: def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a new sort expression.""" - return SortExpr(expr.expr, ascending=ascending, nulls_first=nulls_first) + return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) def alias(expr: Expr, name: str) -> Expr: @@ -942,6 +942,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_millis(arg.expr, *formatters)) @@ -950,6 +951,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_micros(arg.expr, *formatters)) @@ -958,6 +960,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_nanos(arg.expr, *formatters)) @@ -966,6 +969,7 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ + formatters = [f.expr for f in formatters] return Expr(f.to_timestamp_seconds(arg.expr, *formatters)) @@ -1078,9 +1082,9 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr: return Expr(f.range(start.expr, stop.expr, step.expr)) -def uuid(arg: Expr) -> Expr: +def uuid() -> Expr: """Returns uuid v4 as a string value.""" - return Expr(f.uuid(arg.expr)) + return Expr(f.uuid()) def struct(*args: Expr) -> Expr: diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index a8252b53c..517cd1578 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -37,12 +37,12 @@ def is_correct_input(self, input_item: Any, table_name: str, **kwargs): def build_table( self, - input_file: str, + input_item: str, table_name: str, **kwargs, ) -> SqlTable: """Create a table from the input source.""" - _, extension = os.path.splitext(input_file) + _, extension = os.path.splitext(input_item) format = extension.lstrip(".").lower() num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] @@ -50,7 +50,7 @@ def build_table( import pyarrow.parquet as pq # Read the Parquet metadata - metadata = pq.read_metadata(input_file) + metadata = pq.read_metadata(input_item) num_rows = metadata.num_rows # Iterate through the schema and build the SqlTable for col in metadata.schema: @@ -69,7 +69,7 @@ def build_table( # to get that information. However, this should only be occurring # at table creation time and therefore shouldn't # slow down query performance. - with open(input_file, "r") as file: + with open(input_item, "r") as file: reader = csv.reader(file) header_row = next(reader) print(header_row) @@ -84,6 +84,6 @@ def build_table( ) # Input could possibly be multiple files. Create a list if so - input_files = glob.glob(input_file) + input_files = glob.glob(input_item) return SqlTable(table_name, columns, num_rows, input_files) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index c97f453d0..0bba3d723 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -85,7 +85,7 @@ class ScalarUDF: def __init__( self, - name: Optional[str], + name: str, func: Callable[..., _R], input_types: pyarrow.DataType | list[pyarrow.DataType], return_type: _R, @@ -182,7 +182,7 @@ class AggregateUDF: def __init__( self, - name: Optional[str], + name: str, accumulator: Callable[[], Accumulator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, @@ -277,6 +277,7 @@ def sum_bias_10() -> Summarize: ) if name is None: name = accum.__call__().__class__.__qualname__.lower() + assert name is not None if isinstance(input_types, pyarrow.DataType): input_types = [input_types] return AggregateUDF( @@ -462,7 +463,7 @@ class WindowUDF: def __init__( self, - name: Optional[str], + name: str, func: Callable[[], WindowEvaluator], input_types: list[pyarrow.DataType], return_type: pyarrow.DataType, diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index b1a739b49..fca05bb8f 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -871,7 +871,22 @@ def test_temporal_functions(df): f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), f.extract(literal("day"), column("d")), + f.to_timestamp( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_seconds( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_millis( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), + f.to_timestamp_micros( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")), + f.to_timestamp_nanos( + literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") + ), ) result = df.collect() assert len(result) == 1 @@ -913,6 +928,21 @@ def test_temporal_functions(df): assert result.column(11) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") ) + assert result.column(12) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + ) + assert result.column(13) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + ) + assert result.column(14) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + ) + assert result.column(15) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) + assert result.column(16) == pa.array( + [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + ) def test_arrow_cast(df): From acd70409f73f299a144e7ff4115c6e6035c3ffb5 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Sat, 8 Mar 2025 16:37:10 +0100 Subject: [PATCH 015/206] feat: reads using global ctx (#982) * feat: reads using global ctx * Add text to io methods to describe the context they are using --------- Co-authored-by: Tim Saucer --- python/datafusion/__init__.py | 5 + python/datafusion/io.py | 199 ++++++++++++++++++++++++++ python/tests/test_io.py | 95 ++++++++++++ python/tests/test_wrapper_coverage.py | 2 + src/context.rs | 12 +- src/utils.rs | 8 ++ 6 files changed, 319 insertions(+), 2 deletions(-) create mode 100644 python/datafusion/io.py create mode 100644 python/tests/test_io.py diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 85aefcce7..f11ce54a6 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -45,6 +45,7 @@ Expr, WindowFrame, ) +from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF @@ -81,6 +82,10 @@ "functions", "object_store", "substrait", + "read_parquet", + "read_avro", + "read_csv", + "read_json", ] diff --git a/python/datafusion/io.py b/python/datafusion/io.py new file mode 100644 index 000000000..7f3b77efa --- /dev/null +++ b/python/datafusion/io.py @@ -0,0 +1,199 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""IO read functions using global context.""" + +import pathlib + +import pyarrow + +from datafusion.dataframe import DataFrame +from datafusion.expr import Expr + +from ._internal import SessionContext as SessionContextInternal + + +def read_parquet( + path: str | pathlib.Path, + table_partition_cols: list[tuple[str, str]] | None = None, + parquet_pruning: bool = True, + file_extension: str = ".parquet", + skip_metadata: bool = True, + schema: pyarrow.Schema | None = None, + file_sort_order: list[list[Expr]] | None = None, +) -> DataFrame: + """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the Parquet file. + table_partition_cols: Partition columns. + parquet_pruning: Whether the parquet reader should use the predicate + to prune row groups. + file_extension: File extension; only files with this extension are + selected for data input. + skip_metadata: Whether the parquet reader should skip any metadata + that may be in the file schema. This can help avoid schema + conflicts due to metadata. + schema: An optional schema representing the parquet files. If None, + the parquet reader will try to infer it based on data in the + file. + file_sort_order: Sort order for the file. + + Returns: + DataFrame representation of the read Parquet files + """ + if table_partition_cols is None: + table_partition_cols = [] + return DataFrame( + SessionContextInternal._global_ctx().read_parquet( + str(path), + table_partition_cols, + parquet_pruning, + file_extension, + skip_metadata, + schema, + file_sort_order, + ) + ) + + +def read_json( + path: str | pathlib.Path, + schema: pyarrow.Schema | None = None, + schema_infer_max_records: int = 1000, + file_extension: str = ".json", + table_partition_cols: list[tuple[str, str]] | None = None, + file_compression_type: str | None = None, +) -> DataFrame: + """Read a line-delimited JSON data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the JSON file. + schema: The data source schema. + schema_infer_max_records: Maximum number of rows to read from JSON + files for schema inference if needed. + file_extension: File extension; only files with this extension are + selected for data input. + table_partition_cols: Partition columns. + file_compression_type: File compression type. + + Returns: + DataFrame representation of the read JSON files. + """ + if table_partition_cols is None: + table_partition_cols = [] + return DataFrame( + SessionContextInternal._global_ctx().read_json( + str(path), + schema, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, + ) + ) + + +def read_csv( + path: str | pathlib.Path | list[str] | list[pathlib.Path], + schema: pyarrow.Schema | None = None, + has_header: bool = True, + delimiter: str = ",", + schema_infer_max_records: int = 1000, + file_extension: str = ".csv", + table_partition_cols: list[tuple[str, str]] | None = None, + file_compression_type: str | None = None, +) -> DataFrame: + """Read a CSV data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the CSV file + schema: An optional schema representing the CSV files. If None, the + CSV reader will try to infer it based on data in file. + has_header: Whether the CSV file have a header. If schema inference + is run on a file with no headers, default column names are + created. + delimiter: An optional column delimiter. + schema_infer_max_records: Maximum number of rows to read from CSV + files for schema inference if needed. + file_extension: File extension; only files with this extension are + selected for data input. + table_partition_cols: Partition columns. + file_compression_type: File compression type. + + Returns: + DataFrame representation of the read CSV files + """ + if table_partition_cols is None: + table_partition_cols = [] + + path = [str(p) for p in path] if isinstance(path, list) else str(path) + + return DataFrame( + SessionContextInternal._global_ctx().read_csv( + path, + schema, + has_header, + delimiter, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, + ) + ) + + +def read_avro( + path: str | pathlib.Path, + schema: pyarrow.Schema | None = None, + file_partition_cols: list[tuple[str, str]] | None = None, + file_extension: str = ".avro", +) -> DataFrame: + """Create a :py:class:`DataFrame` for reading Avro data source. + + This function will use the global context. Any functions or tables registered + with another context may not be accessible when used with a DataFrame created + using this function. + + Args: + path: Path to the Avro file. + schema: The data source schema. + file_partition_cols: Partition columns. + file_extension: File extension to select. + + Returns: + DataFrame representation of the read Avro file + """ + if file_partition_cols is None: + file_partition_cols = [] + return DataFrame( + SessionContextInternal._global_ctx().read_avro( + str(path), schema, file_partition_cols, file_extension + ) + ) diff --git a/python/tests/test_io.py b/python/tests/test_io.py new file mode 100644 index 000000000..21ad188ee --- /dev/null +++ b/python/tests/test_io.py @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import pathlib + +import pyarrow as pa +from datafusion import column +from datafusion.io import read_avro, read_csv, read_json, read_parquet + + +def test_read_json_global_ctx(ctx): + path = os.path.dirname(os.path.abspath(__file__)) + + # Default + test_data_path = os.path.join(path, "data_test_context", "data.json") + df = read_json(test_data_path) + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].column(1) == pa.array([1, 2, 3]) + + # Schema + schema = pa.schema( + [ + pa.field("A", pa.string(), nullable=True), + ] + ) + df = read_json(test_data_path, schema=schema) + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].schema == schema + + # File extension + test_data_path = os.path.join(path, "data_test_context", "data.json") + df = read_json(test_data_path, file_extension=".json") + result = df.collect() + + assert result[0].column(0) == pa.array(["a", "b", "c"]) + assert result[0].column(1) == pa.array([1, 2, 3]) + + +def test_read_parquet_global(): + parquet_df = read_parquet(path="parquet/data/alltypes_plain.parquet") + parquet_df.show() + assert parquet_df is not None + + path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet" + parquet_df = read_parquet(path=path) + assert parquet_df is not None + + +def test_read_csv(): + csv_df = read_csv(path="testing/data/csv/aggregate_test_100.csv") + csv_df.select(column("c1")).show() + + +def test_read_csv_list(): + csv_df = read_csv(path=["testing/data/csv/aggregate_test_100.csv"]) + expected = csv_df.count() * 2 + + double_csv_df = read_csv( + path=[ + "testing/data/csv/aggregate_test_100.csv", + "testing/data/csv/aggregate_test_100.csv", + ] + ) + actual = double_csv_df.count() + + double_csv_df.select(column("c1")).show() + assert actual == expected + + +def test_read_avro(): + avro_df = read_avro(path="testing/data/avro/alltypes_plain.avro") + avro_df.show() + assert avro_df is not None + + path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro" + avro_df = read_avro(path=path) + assert avro_df is not None diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index 86f2d57f2..ac064ba95 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -34,6 +34,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None: return for attr in dir(internal_obj): + if attr in ["_global_ctx"]: + continue assert attr in dir(wrapped_obj) internal_attr = getattr(internal_obj, attr) diff --git a/src/context.rs b/src/context.rs index 0f962638e..9ba87eb8a 100644 --- a/src/context.rs +++ b/src/context.rs @@ -44,7 +44,7 @@ use crate::store::StorageContexts; use crate::udaf::PyAggregateUDF; use crate::udf::PyScalarUDF; use crate::udwf::PyWindowUDF; -use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future}; +use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_for_future}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; @@ -69,7 +69,7 @@ use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; -use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; +use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType}; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -306,6 +306,14 @@ impl PySessionContext { }) } + #[classmethod] + #[pyo3(signature = ())] + fn _global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { + Ok(Self { + ctx: get_global_ctx().clone(), + }) + } + /// Register an object store with the given name #[pyo3(signature = (scheme, store, host=None))] pub fn register_object_store( diff --git a/src/utils.rs b/src/utils.rs index ed224b364..999aad755 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -17,6 +17,7 @@ use crate::errors::{PyDataFusionError, PyDataFusionResult}; use crate::TokioRuntime; +use datafusion::execution::context::SessionContext; use datafusion::logical_expr::Volatility; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -37,6 +38,13 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) } +/// Utility to get the Global Datafussion CTX +#[inline] +pub(crate) fn get_global_ctx() -> &'static SessionContext { + static CTX: OnceLock = OnceLock::new(); + CTX.get_or_init(|| SessionContext::new()) +} + /// Utility to collect rust futures with GIL released pub fn wait_for_future(py: Python, f: F) -> F::Output where From 973d7ec4a8196a78bc4fb32db4f24e523997ba4c Mon Sep 17 00:00:00 2001 From: Crystal Zhou <45134936+CrystalZhou0529@users.noreply.github.com> Date: Sat, 8 Mar 2025 16:23:54 -0500 Subject: [PATCH 016/206] feat: Implementation of udf and udaf decorator (#1040) * Implementation of udf and udaf decorator * Rename decorators back to udf and udaf, update documentations * Minor typo fixes * Fixing linting errors * ruff formatting --------- Co-authored-by: Tim Saucer --- python/datafusion/udf.py | 257 +++++++++++++++++++++++++++----------- python/tests/test_udaf.py | 42 +++++++ python/tests/test_udf.py | 42 ++++++- 3 files changed, 265 insertions(+), 76 deletions(-) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 0bba3d723..af7bcf2ed 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -19,6 +19,7 @@ from __future__ import annotations +import functools from abc import ABCMeta, abstractmethod from enum import Enum from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar @@ -110,43 +111,102 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udf.__call__(*args_raw)) - @staticmethod - def udf( - func: Callable[..., _R], - input_types: list[pyarrow.DataType], - return_type: _R, - volatility: Volatility | str, - name: Optional[str] = None, - ) -> ScalarUDF: - """Create a new User-Defined Function. + class udf: + """Create a new User-Defined Function (UDF). + + This class can be used both as a **function** and as a **decorator**. + + Usage: + - **As a function**: Call `udf(func, input_types, return_type, volatility, + name)`. + - **As a decorator**: Use `@udf(input_types, return_type, volatility, + name)`. In this case, do **not** pass `func` explicitly. Args: - func: A callable python function. - input_types: The data types of the arguments to ``func``. This list - must be of the same length as the number of arguments. - return_type: The data type of the return value from the python - function. - volatility: See ``Volatility`` for allowed values. - name: A descriptive name for the function. + func (Callable, optional): **Only needed when calling as a function.** + Skip this argument when using `udf` as a decorator. + input_types (list[pyarrow.DataType]): The data types of the arguments + to `func`. This list must be of the same length as the number of + arguments. + return_type (_R): The data type of the return value from the function. + volatility (Volatility | str): See `Volatility` for allowed values. + name (Optional[str]): A descriptive name for the function. Returns: - A user-defined aggregate function, which can be used in either data - aggregation or window function calls. + A user-defined function that can be used in SQL expressions, + data aggregation, or window function calls. + + Example: + **Using `udf` as a function:** + ``` + def double_func(x): + return x * 2 + double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(), + "volatile", "double_it") + ``` + + **Using `udf` as a decorator:** + ``` + @udf([pyarrow.int32()], pyarrow.int32(), "volatile", "double_it") + def double_udf(x): + return x * 2 + ``` """ - if not callable(func): - raise TypeError("`func` argument must be callable") - if name is None: - if hasattr(func, "__qualname__"): - name = func.__qualname__.lower() + + def __new__(cls, *args, **kwargs): + """Create a new UDF. + + Trigger UDF function or decorator depending on if the first args is callable + """ + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return cls._function(*args, **kwargs) else: - name = func.__class__.__name__.lower() - return ScalarUDF( - name=name, - func=func, - input_types=input_types, - return_type=return_type, - volatility=volatility, - ) + # Case 2: Used as a decorator with parameters + return cls._decorator(*args, **kwargs) + + @staticmethod + def _function( + func: Callable[..., _R], + input_types: list[pyarrow.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> ScalarUDF: + if not callable(func): + raise TypeError("`func` argument must be callable") + if name is None: + if hasattr(func, "__qualname__"): + name = func.__qualname__.lower() + else: + name = func.__class__.__name__.lower() + return ScalarUDF( + name=name, + func=func, + input_types=input_types, + return_type=return_type, + volatility=volatility, + ) + + @staticmethod + def _decorator( + input_types: list[pyarrow.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ): + def decorator(func): + udf_caller = ScalarUDF.udf( + func, input_types, return_type, volatility, name + ) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + return udf_caller(*args, **kwargs) + + return wrapper + + return decorator class Accumulator(metaclass=ABCMeta): @@ -212,25 +272,27 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udaf.__call__(*args_raw)) - @staticmethod - def udaf( - accum: Callable[[], Accumulator], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], - volatility: Volatility | str, - name: Optional[str] = None, - ) -> AggregateUDF: - """Create a new User-Defined Aggregate Function. + class udaf: + """Create a new User-Defined Aggregate Function (UDAF). - If your :py:class:`Accumulator` can be instantiated with no arguments, you - can simply pass it's type as ``accum``. If you need to pass additional arguments - to it's constructor, you can define a lambda or a factory method. During runtime - the :py:class:`Accumulator` will be constructed for every instance in - which this UDAF is used. The following examples are all valid. + This class allows you to define an **aggregate function** that can be used in + data aggregation or window function calls. - .. code-block:: python + Usage: + - **As a function**: Call `udaf(accum, input_types, return_type, state_type, + volatility, name)`. + - **As a decorator**: Use `@udaf(input_types, return_type, state_type, + volatility, name)`. + When using `udaf` as a decorator, **do not pass `accum` explicitly**. + **Function example:** + + If your `:py:class:Accumulator` can be instantiated with no arguments, you + can simply pass it's type as `accum`. If you need to pass additional + arguments to it's constructor, you can define a lambda or a factory method. + During runtime the `:py:class:Accumulator` will be constructed for every + instance in which this UDAF is used. The following examples are all valid. + ``` import pyarrow as pa import pyarrow.compute as pc @@ -253,12 +315,24 @@ def evaluate(self) -> pa.Scalar: def sum_bias_10() -> Summarize: return Summarize(10.0) - udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], "immutable") - udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], "immutable") - udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), [pa.float64()], "immutable") + udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], + "immutable") + udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), + [pa.float64()], "immutable") + ``` + + **Decorator example:** + ``` + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def udf4() -> Summarize: + return Summarize(10.0) + ``` Args: - accum: The accumulator python function. + accum: The accumulator python function. **Only needed when calling as a + function. Skip this argument when using `udaf` as a decorator.** input_types: The data types of the arguments to ``accum``. return_type: The data type of the return value. state_type: The data types of the intermediate accumulation. @@ -268,26 +342,69 @@ def sum_bias_10() -> Summarize: Returns: A user-defined aggregate function, which can be used in either data aggregation or window function calls. - """ # noqa W505 - if not callable(accum): - raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): - raise TypeError( - "Accumulator must implement the abstract base class Accumulator" + """ + + def __new__(cls, *args, **kwargs): + """Create a new UDAF. + + Trigger UDAF function or decorator depending on if the first args is + callable + """ + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return cls._function(*args, **kwargs) + else: + # Case 2: Used as a decorator with parameters + return cls._decorator(*args, **kwargs) + + @staticmethod + def _function( + accum: Callable[[], Accumulator], + input_types: pyarrow.DataType | list[pyarrow.DataType], + return_type: pyarrow.DataType, + state_type: list[pyarrow.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ) -> AggregateUDF: + if not callable(accum): + raise TypeError("`func` must be callable.") + if not isinstance(accum.__call__(), Accumulator): + raise TypeError( + "Accumulator must implement the abstract base class Accumulator" + ) + if name is None: + name = accum.__call__().__class__.__qualname__.lower() + if isinstance(input_types, pyarrow.DataType): + input_types = [input_types] + return AggregateUDF( + name=name, + accumulator=accum, + input_types=input_types, + return_type=return_type, + state_type=state_type, + volatility=volatility, ) - if name is None: - name = accum.__call__().__class__.__qualname__.lower() - assert name is not None - if isinstance(input_types, pyarrow.DataType): - input_types = [input_types] - return AggregateUDF( - name=name, - accumulator=accum, - input_types=input_types, - return_type=return_type, - state_type=state_type, - volatility=volatility, - ) + + @staticmethod + def _decorator( + input_types: pyarrow.DataType | list[pyarrow.DataType], + return_type: pyarrow.DataType, + state_type: list[pyarrow.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ): + def decorator(accum: Callable[[], Accumulator]): + udaf_caller = AggregateUDF.udaf( + accum, input_types, return_type, state_type, volatility, name + ) + + @functools.wraps(accum) + def wrapper(*args, **kwargs): + return udaf_caller(*args, **kwargs) + + return wrapper + + return decorator class WindowEvaluator(metaclass=ABCMeta): diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 0005a3da8..e69c77d3c 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -117,6 +117,26 @@ def test_udaf_aggregate(df): assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) +def test_udaf_decorator_aggregate(df): + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def summarize(): + return Summarize() + + df1 = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df1.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + df2 = df.aggregate([], [summarize(column("a"))]) + + # Run a second time to ensure the state is properly reset + result = df2.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + def test_udaf_aggregate_with_arguments(df): bias = 10.0 @@ -143,6 +163,28 @@ def test_udaf_aggregate_with_arguments(df): assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0]) +def test_udaf_decorator_aggregate_with_arguments(df): + bias = 10.0 + + @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable") + def summarize(): + return Summarize(bias) + + df1 = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df1.collect()[0] + + assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0]) + + df2 = df.aggregate([], [summarize(column("a"))]) + + # Run a second time to ensure the state is properly reset + result = df2.collect()[0] + + assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0]) + + def test_group_by(df): summarize = udaf( Summarize, diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index 3a5dce6d6..a6c047552 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -24,7 +24,7 @@ def df(ctx): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + [pa.array([1, 2, 3]), pa.array([4, 4, None])], names=["a", "b"], ) return ctx.create_dataframe([[batch]], name="test_table") @@ -39,10 +39,20 @@ def test_udf(df): volatility="immutable", ) - df = df.select(is_null(column("a"))) + df = df.select(is_null(column("b"))) result = df.collect()[0].column(0) - assert result == pa.array([False, False, False]) + assert result == pa.array([False, False, True]) + + +def test_udf_decorator(df): + @udf([pa.int64()], pa.bool_(), "immutable") + def is_null(x: pa.Array) -> pa.Array: + return x.is_null() + + df = df.select(is_null(column("b"))) + result = df.collect()[0].column(0) + assert result == pa.array([False, False, True]) def test_register_udf(ctx, df) -> None: @@ -56,10 +66,10 @@ def test_register_udf(ctx, df) -> None: ctx.register_udf(is_null) - df_result = ctx.sql("select is_null(a) from test_table") + df_result = ctx.sql("select is_null(b) from test_table") result = df_result.collect()[0].column(0) - assert result == pa.array([False, False, False]) + assert result == pa.array([False, False, True]) class OverThresholdUDF: @@ -70,7 +80,7 @@ def __call__(self, values: pa.Array) -> pa.Array: return pa.array(v.as_py() >= self.threshold for v in values) -def test_udf_with_parameters(df) -> None: +def test_udf_with_parameters_function(df) -> None: udf_no_param = udf( OverThresholdUDF(), pa.int64(), @@ -94,3 +104,23 @@ def test_udf_with_parameters(df) -> None: result = df2.collect()[0].column(0) assert result == pa.array([False, True, True]) + + +def test_udf_with_parameters_decorator(df) -> None: + @udf([pa.int64()], pa.bool_(), "immutable") + def udf_no_param(values: pa.Array) -> pa.Array: + return OverThresholdUDF()(values) + + df1 = df.select(udf_no_param(column("a"))) + result = df1.collect()[0].column(0) + + assert result == pa.array([True, True, True]) + + @udf([pa.int64()], pa.bool_(), "immutable") + def udf_with_param(values: pa.Array) -> pa.Array: + return OverThresholdUDF(2)(values) + + df2 = df.select(udf_with_param(column("a"))) + result = df2.collect()[0].column(0) + + assert result == pa.array([False, True, True]) From d72f5605b3d523585d04857505793920f96242ba Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 10 Mar 2025 06:56:12 -0400 Subject: [PATCH 017/206] Enable FA ruff lint (#1052) --- examples/python-udwf.py | 2 ++ pyproject.toml | 2 +- python/datafusion/io.py | 2 ++ python/tests/test_udaf.py | 2 ++ python/tests/test_udwf.py | 2 ++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 32f8fadaa..7d39dc1b8 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import datafusion import pyarrow as pa from datafusion import col, lit, udwf diff --git a/pyproject.toml b/pyproject.toml index f416e02a5..d16a18aa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "D", "W", "I"] +select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"] [tool.ruff.lint.pydocstyle] convention = "google" diff --git a/python/datafusion/io.py b/python/datafusion/io.py index 7f3b77efa..3b6264948 100644 --- a/python/datafusion/io.py +++ b/python/datafusion/io.py @@ -17,6 +17,8 @@ """IO read functions using global context.""" +from __future__ import annotations + import pathlib import pyarrow diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index e69c77d3c..97cf81f3c 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + from typing import List import pyarrow as pa diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 0ffa04179..2fea34aa3 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import pyarrow as pa import pytest from datafusion import SessionContext, column, lit, udwf From 0002372ccdb780e011631c797ec9613174cf0a94 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 10 Mar 2025 14:22:42 -0400 Subject: [PATCH 018/206] Enable take comments to assign issues to users (#1058) --- .github/workflows/take.yml | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/take.yml diff --git a/.github/workflows/take.yml b/.github/workflows/take.yml new file mode 100644 index 000000000..86dc190ad --- /dev/null +++ b/.github/workflows/take.yml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Assign the issue via a `take` comment +on: + issue_comment: + types: created + +permissions: + issues: write + +jobs: + issue_assign: + runs-on: ubuntu-latest + if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' + concurrency: + group: ${{ github.actor }}-issue-assign + steps: + - run: | + CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s) + if [ "$CODE" -eq "204" ] + then + echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" + curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees + else + echo "Cannot assign issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" + fi \ No newline at end of file From 9d634de6df2f8b76bd303ab1f5972f01deb2210d Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 10 Mar 2025 14:24:40 -0400 Subject: [PATCH 019/206] Update python min version to 3.9 (#1043) * 3.8 -> 3.9 * upgrade pyo3 abi3-py38 -> abi3-py39 --- Cargo.toml | 2 +- .../source/contributor-guide/introduction.rst | 2 +- examples/ffi-table-provider/Cargo.lock | 75 +- examples/ffi-table-provider/Cargo.toml | 2 +- examples/ffi-table-provider/pyproject.toml | 2 +- pyproject.toml | 3 +- uv.lock | 707 ++---------------- 7 files changed, 121 insertions(+), 672 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5358b1836..50967a219 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ substrait = ["dep:datafusion-substrait"] [dependencies] tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } -pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} arrow = { version = "54", features = ["pyarrow"] } datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst index 25f2c21a4..2fba64111 100644 --- a/docs/source/contributor-guide/introduction.rst +++ b/docs/source/contributor-guide/introduction.rst @@ -118,7 +118,7 @@ be ignored by ``git``. .. code-block:: implementation=CPython - version=3.8 + version=3.9 shared=true abi3=true lib_name=python3.12 diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock index 32af85180..8d0edd515 100644 --- a/examples/ffi-table-provider/Cargo.lock +++ b/examples/ffi-table-provider/Cargo.lock @@ -766,7 +766,8 @@ dependencies = [ [[package]] name = "datafusion" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" dependencies = [ "arrow", "arrow-array", @@ -816,7 +817,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" dependencies = [ "arrow", "async-trait", @@ -836,7 +838,8 @@ dependencies = [ [[package]] name = "datafusion-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" dependencies = [ "ahash", "arrow", @@ -862,7 +865,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" dependencies = [ "log", "tokio", @@ -871,12 +875,14 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" [[package]] name = "datafusion-execution" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" dependencies = [ "arrow", "dashmap", @@ -894,7 +900,8 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" dependencies = [ "arrow", "chrono", @@ -914,7 +921,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" dependencies = [ "arrow", "datafusion-common", @@ -925,7 +933,8 @@ dependencies = [ [[package]] name = "datafusion-ffi" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" dependencies = [ "abi_stable", "arrow", @@ -945,7 +954,8 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" dependencies = [ "arrow", "arrow-buffer", @@ -974,7 +984,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" dependencies = [ "ahash", "arrow", @@ -996,7 +1007,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" dependencies = [ "ahash", "arrow", @@ -1008,7 +1020,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" dependencies = [ "arrow", "arrow-array", @@ -1031,7 +1044,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" dependencies = [ "arrow", "async-trait", @@ -1046,7 +1060,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1062,7 +1077,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1071,7 +1087,8 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" dependencies = [ "datafusion-expr", "quote", @@ -1081,7 +1098,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" dependencies = [ "arrow", "chrono", @@ -1099,7 +1117,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" dependencies = [ "ahash", "arrow", @@ -1123,7 +1142,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" dependencies = [ "ahash", "arrow", @@ -1137,7 +1157,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" dependencies = [ "arrow", "arrow-schema", @@ -1158,7 +1179,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" dependencies = [ "ahash", "arrow", @@ -1189,7 +1211,8 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" dependencies = [ "arrow", "chrono", @@ -1204,7 +1227,8 @@ dependencies = [ [[package]] name = "datafusion-proto-common" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" dependencies = [ "arrow", "datafusion-common", @@ -1214,7 +1238,8 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "45.0.0" -source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" dependencies = [ "arrow", "arrow-array", diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml index 0e558fdd0..f4e4fda79 100644 --- a/examples/ffi-table-provider/Cargo.toml +++ b/examples/ffi-table-provider/Cargo.toml @@ -23,7 +23,7 @@ edition = "2021" [dependencies] datafusion = { version = "45.0.0" } datafusion-ffi = { version = "45.0.0" } -pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] } +pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } arrow = { version = "54" } arrow-array = { version = "54" } arrow-schema = { version = "54" } diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-table-provider/pyproject.toml index 116efae9c..9cd25b423 100644 --- a/examples/ffi-table-provider/pyproject.toml +++ b/examples/ffi-table-provider/pyproject.toml @@ -21,7 +21,7 @@ build-backend = "maturin" [project] name = "ffi_table_provider" -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/pyproject.toml b/pyproject.toml index d16a18aa6..1c2733677 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ name = "datafusion" description = "Build and run queries against data" readme = "README.md" license = { file = "LICENSE.txt" } -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = ["datafusion", "dataframe", "rust", "query-engine"] classifiers = [ "Development Status :: 2 - Pre-Alpha", @@ -35,7 +35,6 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/uv.lock b/uv.lock index 587ddc8b7..619b92856 100644 --- a/uv.lock +++ b/uv.lock @@ -1,23 +1,10 @@ version = 1 -requires-python = ">=3.8" +requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.12'", "python_full_version == '3.11.*'", "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", - "python_full_version < '3.9'", -] - -[[package]] -name = "alabaster" -version = "0.7.13" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/94/71/a8ee96d1fd95ca04a0d2e2d9c4081dac4c2d2b12f7ddb899c8cb9bfd1532/alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2", size = 11454 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3", size = 13857 }, + "python_full_version < '3.10'", ] [[package]] @@ -25,7 +12,7 @@ name = "alabaster" version = "0.7.16" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776 } wheels = [ @@ -46,42 +33,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, ] -[[package]] -name = "appnope" -version = "0.1.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, -] - -[[package]] -name = "astroid" -version = "3.2.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/53/1067e1113ecaf58312357f2cd93063674924119d80d173adc3f6f2387aa2/astroid-3.2.4.tar.gz", hash = "sha256:0e14202810b30da1b735827f78f5157be2bbd4a7a59b7707ca0bfc2fb4c0063a", size = 397576 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/80/96/b32bbbb46170a1c8b8b1f28c794202e25cfe743565e9d3469b8eb1e0cc05/astroid-3.2.4-py3-none-any.whl", hash = "sha256:413658a61eeca6202a59231abb473f932038fbcbf1666587f66d482083413a25", size = 276348 }, -] - [[package]] name = "astroid" version = "3.3.8" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] dependencies = [ - { name = "typing-extensions", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196 } wheels = [ @@ -101,23 +58,11 @@ wheels = [ name = "babel" version = "2.16.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pytz", marker = "python_full_version < '3.9'" }, -] sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 } wheels = [ { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, ] -[[package]] -name = "backcall" -version = "0.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/40/764a663805d84deee23043e1426a9175567db89c8b3287b5c2ad9f71aa93/backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", size = 18041 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/1c/ff6546b6c12603d8dd1070aa3c3d273ad4c07f5771689a7b69a550e8c951/backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255", size = 11157 }, -] - [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -194,14 +139,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, - { url = "https://files.pythonhosted.org/packages/48/08/15bf6b43ae9bd06f6b00ad8a91f5a8fe1069d4c9fab550a866755402724e/cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", size = 182457 }, - { url = "https://files.pythonhosted.org/packages/c2/5b/f1523dd545f92f7df468e5f653ffa4df30ac222f3c884e51e139878f1cb5/cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", size = 425932 }, - { url = "https://files.pythonhosted.org/packages/53/93/7e547ab4105969cc8c93b38a667b82a835dd2cc78f3a7dad6130cfd41e1d/cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", size = 448585 }, - { url = "https://files.pythonhosted.org/packages/56/c4/a308f2c332006206bb511de219efeff090e9d63529ba0a77aae72e82248b/cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", size = 456268 }, - { url = "https://files.pythonhosted.org/packages/ca/5b/b63681518265f2f4060d2b60755c1c77ec89e5e045fc3773b72735ddaad5/cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", size = 436592 }, - { url = "https://files.pythonhosted.org/packages/bb/19/b51af9f4a4faa4a8ac5a0e5d5c2522dcd9703d07fac69da34a36c4d960d3/cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", size = 446512 }, - { url = "https://files.pythonhosted.org/packages/e2/63/2bed8323890cb613bbecda807688a31ed11a7fe7afe31f8faaae0206a9a3/cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", size = 171576 }, - { url = "https://files.pythonhosted.org/packages/2f/70/80c33b044ebc79527447fd4fbc5455d514c3bb840dede4455de97da39b4d/cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", size = 181229 }, { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220 }, { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605 }, { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910 }, @@ -274,19 +211,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, - { url = "https://files.pythonhosted.org/packages/10/bd/6517ea94f2672e801011d50b5d06be2a0deaf566aea27bcdcd47e5195357/charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c", size = 195653 }, - { url = "https://files.pythonhosted.org/packages/e5/0d/815a2ba3f283b4eeaa5ece57acade365c5b4135f65a807a083c818716582/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9", size = 140701 }, - { url = "https://files.pythonhosted.org/packages/aa/17/c94be7ee0d142687e047fe1de72060f6d6837f40eedc26e87e6e124a3fc6/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8", size = 150495 }, - { url = "https://files.pythonhosted.org/packages/f7/33/557ac796c47165fc141e4fb71d7b0310f67e05cb420756f3a82e0a0068e0/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6", size = 142946 }, - { url = "https://files.pythonhosted.org/packages/1e/0d/38ef4ae41e9248d63fc4998d933cae22473b1b2ac4122cf908d0f5eb32aa/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c", size = 144737 }, - { url = "https://files.pythonhosted.org/packages/43/01/754cdb29dd0560f58290aaaa284d43eea343ad0512e6ad3b8b5c11f08592/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a", size = 147471 }, - { url = "https://files.pythonhosted.org/packages/ba/cd/861883ba5160c7a9bd242c30b2c71074cda2aefcc0addc91118e0d4e0765/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd", size = 140801 }, - { url = "https://files.pythonhosted.org/packages/6f/7f/0c0dad447819e90b93f8ed238cc8f11b91353c23c19e70fa80483a155bed/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd", size = 149312 }, - { url = "https://files.pythonhosted.org/packages/8e/09/9f8abcc6fff60fb727268b63c376c8c79cc37b833c2dfe1f535dfb59523b/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824", size = 152347 }, - { url = "https://files.pythonhosted.org/packages/be/e5/3f363dad2e24378f88ccf63ecc39e817c29f32e308ef21a7a6d9c1201165/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca", size = 149888 }, - { url = "https://files.pythonhosted.org/packages/e4/10/a78c0e91f487b4ad0ef7480ac765e15b774f83de2597f1b6ef0eaf7a2f99/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b", size = 145169 }, - { url = "https://files.pythonhosted.org/packages/d3/81/396e7d7f5d7420da8273c91175d2e9a3f569288e3611d521685e4b9ac9cc/charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e", size = 95094 }, - { url = "https://files.pythonhosted.org/packages/40/bb/20affbbd9ea29c71ea123769dc568a6d42052ff5089c5fe23e21e21084a6/charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4", size = 102139 }, { url = "https://files.pythonhosted.org/packages/7f/c0/b913f8f02836ed9ab32ea643c6fe4d3325c3d8627cf6e78098671cafff86/charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", size = 197867 }, { url = "https://files.pythonhosted.org/packages/0f/6c/2bee440303d705b6fb1e2ec789543edec83d32d258299b16eed28aad48e0/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", size = 141385 }, { url = "https://files.pythonhosted.org/packages/3d/04/cb42585f07f6f9fd3219ffb6f37d5a39b4fd2db2355b23683060029c35f7/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", size = 151367 }, @@ -351,11 +275,9 @@ wheels = [ [[package]] name = "datafusion" -version = "44.0.0" source = { editable = "." } dependencies = [ - { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "pyarrow", version = "18.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pyarrow" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] @@ -369,20 +291,16 @@ dev = [ { name = "toml" }, ] docs = [ - { name = "ipython", version = "8.12.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "ipython", version = "8.31.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "jinja2" }, { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "myst-parser", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "pandas", version = "2.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "pandas", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "pandas" }, { name = "pickleshare" }, { name = "pydata-sphinx-theme" }, - { name = "setuptools", version = "75.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "setuptools", version = "75.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "setuptools" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "sphinx-autoapi" }, ] @@ -435,28 +353,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, ] -[[package]] -name = "docutils" -version = "0.20.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666 }, -] - [[package]] name = "docutils" version = "0.21.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 } wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, @@ -503,8 +403,7 @@ name = "importlib-metadata" version = "8.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "zipp", version = "3.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "zipp", version = "3.21.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "zipp", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 } wheels = [ @@ -520,52 +419,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] -[[package]] -name = "ipython" -version = "8.12.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "appnope", marker = "python_full_version < '3.9' and sys_platform == 'darwin'" }, - { name = "backcall", marker = "python_full_version < '3.9'" }, - { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version < '3.9'" }, - { name = "jedi", marker = "python_full_version < '3.9'" }, - { name = "matplotlib-inline", marker = "python_full_version < '3.9'" }, - { name = "pexpect", marker = "python_full_version < '3.9' and sys_platform != 'win32'" }, - { name = "pickleshare", marker = "python_full_version < '3.9'" }, - { name = "prompt-toolkit", marker = "python_full_version < '3.9'" }, - { name = "pygments", marker = "python_full_version < '3.9'" }, - { name = "stack-data", marker = "python_full_version < '3.9'" }, - { name = "traitlets", marker = "python_full_version < '3.9'" }, - { name = "typing-extensions", marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/6a/44ef299b1762f5a73841e87fae8a73a8cc8aee538d6dc8c77a5afe1fd2ce/ipython-8.12.3.tar.gz", hash = "sha256:3910c4b54543c2ad73d06579aa771041b7d5707b033bd488669b4cf544e3b363", size = 5470171 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/97/8fe103906cd81bc42d3b0175b5534a9f67dccae47d6451131cf8d0d70bb2/ipython-8.12.3-py3-none-any.whl", hash = "sha256:b0340d46a933d27c657b211a329d0be23793c36595acf9e6ef4164bc01a1804c", size = 798307 }, -] - [[package]] name = "ipython" version = "8.18.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] dependencies = [ - { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version == '3.9.*'" }, - { name = "exceptiongroup", marker = "python_full_version == '3.9.*'" }, - { name = "jedi", marker = "python_full_version == '3.9.*'" }, - { name = "matplotlib-inline", marker = "python_full_version == '3.9.*'" }, - { name = "pexpect", marker = "python_full_version == '3.9.*' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version == '3.9.*'" }, - { name = "pygments", marker = "python_full_version == '3.9.*'" }, - { name = "stack-data", marker = "python_full_version == '3.9.*'" }, - { name = "traitlets", marker = "python_full_version == '3.9.*'" }, - { name = "typing-extensions", marker = "python_full_version == '3.9.*'" }, + { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, + { name = "decorator", marker = "python_full_version < '3.10'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.10'" }, + { name = "jedi", marker = "python_full_version < '3.10'" }, + { name = "matplotlib-inline", marker = "python_full_version < '3.10'" }, + { name = "pexpect", marker = "python_full_version < '3.10' and sys_platform != 'win32'" }, + { name = "prompt-toolkit", marker = "python_full_version < '3.10'" }, + { name = "pygments", marker = "python_full_version < '3.10'" }, + { name = "stack-data", marker = "python_full_version < '3.10'" }, + { name = "traitlets", marker = "python_full_version < '3.10'" }, + { name = "typing-extensions", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b1/b9/3ba6c45a6df813c09a48bac313c22ff83efa26cbb55011218d925a46e2ad/ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", size = 5486330 } wheels = [ @@ -616,8 +488,7 @@ name = "jinja2" version = "3.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "markupsafe", version = "2.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "markupsafe", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "markupsafe" }, ] sdist = { url = "https://files.pythonhosted.org/packages/af/92/b3130cbbf5591acf9ade8708c365f3238046ac7cb8ccba6e81abccb0ccff/jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", size = 244674 } wheels = [ @@ -636,77 +507,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, ] -[[package]] -name = "markupsafe" -version = "2.1.5" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/87/5b/aae44c6655f3801e81aa3eef09dbbf012431987ba564d7231722f68df02d/MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", size = 19384 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e4/54/ad5eb37bf9d51800010a74e4665425831a9db4e7c4e0fde4352e391e808e/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", size = 18206 }, - { url = "https://files.pythonhosted.org/packages/6a/4a/a4d49415e600bacae038c67f9fecc1d5433b9d3c71a4de6f33537b89654c/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", size = 14079 }, - { url = "https://files.pythonhosted.org/packages/0a/7b/85681ae3c33c385b10ac0f8dd025c30af83c78cec1c37a6aa3b55e67f5ec/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", size = 26620 }, - { url = "https://files.pythonhosted.org/packages/7c/52/2b1b570f6b8b803cef5ac28fdf78c0da318916c7d2fe9402a84d591b394c/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", size = 25818 }, - { url = "https://files.pythonhosted.org/packages/29/fe/a36ba8c7ca55621620b2d7c585313efd10729e63ef81e4e61f52330da781/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", size = 25493 }, - { url = "https://files.pythonhosted.org/packages/60/ae/9c60231cdfda003434e8bd27282b1f4e197ad5a710c14bee8bea8a9ca4f0/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", size = 30630 }, - { url = "https://files.pythonhosted.org/packages/65/dc/1510be4d179869f5dafe071aecb3f1f41b45d37c02329dfba01ff59e5ac5/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", size = 29745 }, - { url = "https://files.pythonhosted.org/packages/30/39/8d845dd7d0b0613d86e0ef89549bfb5f61ed781f59af45fc96496e897f3a/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", size = 30021 }, - { url = "https://files.pythonhosted.org/packages/c7/5c/356a6f62e4f3c5fbf2602b4771376af22a3b16efa74eb8716fb4e328e01e/MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", size = 16659 }, - { url = "https://files.pythonhosted.org/packages/69/48/acbf292615c65f0604a0c6fc402ce6d8c991276e16c80c46a8f758fbd30c/MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", size = 17213 }, - { url = "https://files.pythonhosted.org/packages/11/e7/291e55127bb2ae67c64d66cef01432b5933859dfb7d6949daa721b89d0b3/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", size = 18219 }, - { url = "https://files.pythonhosted.org/packages/6b/cb/aed7a284c00dfa7c0682d14df85ad4955a350a21d2e3b06d8240497359bf/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", size = 14098 }, - { url = "https://files.pythonhosted.org/packages/1c/cf/35fe557e53709e93feb65575c93927942087e9b97213eabc3fe9d5b25a55/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", size = 29014 }, - { url = "https://files.pythonhosted.org/packages/97/18/c30da5e7a0e7f4603abfc6780574131221d9148f323752c2755d48abad30/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", size = 28220 }, - { url = "https://files.pythonhosted.org/packages/0c/40/2e73e7d532d030b1e41180807a80d564eda53babaf04d65e15c1cf897e40/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", size = 27756 }, - { url = "https://files.pythonhosted.org/packages/18/46/5dca760547e8c59c5311b332f70605d24c99d1303dd9a6e1fc3ed0d73561/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", size = 33988 }, - { url = "https://files.pythonhosted.org/packages/6d/c5/27febe918ac36397919cd4a67d5579cbbfa8da027fa1238af6285bb368ea/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", size = 32718 }, - { url = "https://files.pythonhosted.org/packages/f8/81/56e567126a2c2bc2684d6391332e357589a96a76cb9f8e5052d85cb0ead8/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", size = 33317 }, - { url = "https://files.pythonhosted.org/packages/00/0b/23f4b2470accb53285c613a3ab9ec19dc944eaf53592cb6d9e2af8aa24cc/MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", size = 16670 }, - { url = "https://files.pythonhosted.org/packages/b7/a2/c78a06a9ec6d04b3445a949615c4c7ed86a0b2eb68e44e7541b9d57067cc/MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", size = 17224 }, - { url = "https://files.pythonhosted.org/packages/53/bd/583bf3e4c8d6a321938c13f49d44024dbe5ed63e0a7ba127e454a66da974/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", size = 18215 }, - { url = "https://files.pythonhosted.org/packages/48/d6/e7cd795fc710292c3af3a06d80868ce4b02bfbbf370b7cee11d282815a2a/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", size = 14069 }, - { url = "https://files.pythonhosted.org/packages/51/b5/5d8ec796e2a08fc814a2c7d2584b55f889a55cf17dd1a90f2beb70744e5c/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", size = 29452 }, - { url = "https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", size = 28462 }, - { url = "https://files.pythonhosted.org/packages/2d/75/fd6cb2e68780f72d47e6671840ca517bda5ef663d30ada7616b0462ad1e3/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", size = 27869 }, - { url = "https://files.pythonhosted.org/packages/b0/81/147c477391c2750e8fc7705829f7351cf1cd3be64406edcf900dc633feb2/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", size = 33906 }, - { url = "https://files.pythonhosted.org/packages/8b/ff/9a52b71839d7a256b563e85d11050e307121000dcebc97df120176b3ad93/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", size = 32296 }, - { url = "https://files.pythonhosted.org/packages/88/07/2dc76aa51b481eb96a4c3198894f38b480490e834479611a4053fbf08623/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", size = 33038 }, - { url = "https://files.pythonhosted.org/packages/96/0c/620c1fb3661858c0e37eb3cbffd8c6f732a67cd97296f725789679801b31/MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", size = 16572 }, - { url = "https://files.pythonhosted.org/packages/3f/14/c3554d512d5f9100a95e737502f4a2323a1959f6d0d01e0d0997b35f7b10/MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", size = 17127 }, - { url = "https://files.pythonhosted.org/packages/f8/ff/2c942a82c35a49df5de3a630ce0a8456ac2969691b230e530ac12314364c/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", size = 18192 }, - { url = "https://files.pythonhosted.org/packages/4f/14/6f294b9c4f969d0c801a4615e221c1e084722ea6114ab2114189c5b8cbe0/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", size = 14072 }, - { url = "https://files.pythonhosted.org/packages/81/d4/fd74714ed30a1dedd0b82427c02fa4deec64f173831ec716da11c51a50aa/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", size = 26928 }, - { url = "https://files.pythonhosted.org/packages/c7/bd/50319665ce81bb10e90d1cf76f9e1aa269ea6f7fa30ab4521f14d122a3df/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", size = 26106 }, - { url = "https://files.pythonhosted.org/packages/4c/6f/f2b0f675635b05f6afd5ea03c094557bdb8622fa8e673387444fe8d8e787/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68", size = 25781 }, - { url = "https://files.pythonhosted.org/packages/51/e0/393467cf899b34a9d3678e78961c2c8cdf49fb902a959ba54ece01273fb1/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", size = 30518 }, - { url = "https://files.pythonhosted.org/packages/f6/02/5437e2ad33047290dafced9df741d9efc3e716b75583bbd73a9984f1b6f7/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", size = 29669 }, - { url = "https://files.pythonhosted.org/packages/0e/7d/968284145ffd9d726183ed6237c77938c021abacde4e073020f920e060b2/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", size = 29933 }, - { url = "https://files.pythonhosted.org/packages/bf/f3/ecb00fc8ab02b7beae8699f34db9357ae49d9f21d4d3de6f305f34fa949e/MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", size = 16656 }, - { url = "https://files.pythonhosted.org/packages/92/21/357205f03514a49b293e214ac39de01fadd0970a6e05e4bf1ddd0ffd0881/MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", size = 17206 }, - { url = "https://files.pythonhosted.org/packages/0f/31/780bb297db036ba7b7bbede5e1d7f1e14d704ad4beb3ce53fb495d22bc62/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", size = 18193 }, - { url = "https://files.pythonhosted.org/packages/6c/77/d77701bbef72892affe060cdacb7a2ed7fd68dae3b477a8642f15ad3b132/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", size = 14073 }, - { url = "https://files.pythonhosted.org/packages/d9/a7/1e558b4f78454c8a3a0199292d96159eb4d091f983bc35ef258314fe7269/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", size = 26486 }, - { url = "https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", size = 25685 }, - { url = "https://files.pythonhosted.org/packages/6a/18/ae5a258e3401f9b8312f92b028c54d7026a97ec3ab20bfaddbdfa7d8cce8/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", size = 25338 }, - { url = "https://files.pythonhosted.org/packages/0b/cc/48206bd61c5b9d0129f4d75243b156929b04c94c09041321456fd06a876d/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", size = 30439 }, - { url = "https://files.pythonhosted.org/packages/d1/06/a41c112ab9ffdeeb5f77bc3e331fdadf97fa65e52e44ba31880f4e7f983c/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", size = 29531 }, - { url = "https://files.pythonhosted.org/packages/02/8c/ab9a463301a50dab04d5472e998acbd4080597abc048166ded5c7aa768c8/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", size = 29823 }, - { url = "https://files.pythonhosted.org/packages/bc/29/9bc18da763496b055d8e98ce476c8e718dcfd78157e17f555ce6dd7d0895/MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", size = 16658 }, - { url = "https://files.pythonhosted.org/packages/f6/f8/4da07de16f10551ca1f640c92b5f316f9394088b183c6a57183df6de5ae4/MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", size = 17211 }, -] - [[package]] name = "markupsafe" version = "3.0.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } wheels = [ { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 }, @@ -832,18 +636,15 @@ name = "myst-parser" version = "3.0.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", - "python_full_version < '3.9'", + "python_full_version < '3.10'", ] dependencies = [ - { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "docutils", marker = "python_full_version < '3.10'" }, { name = "jinja2", marker = "python_full_version < '3.10'" }, { name = "markdown-it-py", marker = "python_full_version < '3.10'" }, { name = "mdit-py-plugins", marker = "python_full_version < '3.10'" }, { name = "pyyaml", marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/49/64/e2f13dac02f599980798c01156393b781aec983b52a6e4057ee58f07c43a/myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87", size = 92392 } wheels = [ @@ -860,7 +661,7 @@ resolution-markers = [ "python_full_version == '3.10.*'", ] dependencies = [ - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "docutils", marker = "python_full_version >= '3.10'" }, { name = "jinja2", marker = "python_full_version >= '3.10'" }, { name = "markdown-it-py", marker = "python_full_version >= '3.10'" }, { name = "mdit-py-plugins", marker = "python_full_version >= '3.10'" }, @@ -872,50 +673,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/b4/b036f8fdb667587bb37df29dc6644681dd78b7a2a6321a34684b79412b28/myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d", size = 84563 }, ] -[[package]] -name = "numpy" -version = "1.24.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140 }, - { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297 }, - { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611 }, - { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357 }, - { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222 }, - { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514 }, - { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508 }, - { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033 }, - { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951 }, - { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923 }, - { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446 }, - { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466 }, - { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722 }, - { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102 }, - { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616 }, - { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263 }, - { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660 }, - { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112 }, - { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549 }, - { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950 }, - { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228 }, - { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170 }, - { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918 }, - { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441 }, - { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590 }, - { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744 }, - { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290 }, -] - [[package]] name = "numpy" version = "2.0.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 } wheels = [ @@ -1041,63 +804,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, ] -[[package]] -name = "pandas" -version = "2.0.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "python-dateutil", marker = "python_full_version < '3.9'" }, - { name = "pytz", marker = "python_full_version < '3.9'" }, - { name = "tzdata", marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908 }, - { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486 }, - { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897 }, - { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421 }, - { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792 }, - { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333 }, - { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672 }, - { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229 }, - { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591 }, - { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370 }, - { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935 }, - { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692 }, - { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303 }, - { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932 }, - { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018 }, - { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723 }, - { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403 }, - { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638 }, - { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160 }, - { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752 }, - { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852 }, - { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496 }, - { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766 }, - { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902 }, -] - [[package]] name = "pandas" version = "2.2.3" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] dependencies = [ - { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "python-dateutil", marker = "python_full_version >= '3.9'" }, - { name = "pytz", marker = "python_full_version >= '3.9'" }, - { name = "tzdata", marker = "python_full_version >= '3.9'" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } wheels = [ @@ -1213,65 +929,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, ] -[[package]] -name = "pyarrow" -version = "17.0.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/27/4e/ea6d43f324169f8aec0e57569443a38bab4b398d09769ca64f7b4d467de3/pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", size = 1112479 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/5d/78d4b040bc5ff2fc6c3d03e80fca396b742f6c125b8af06bcf7427f931bc/pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", size = 28994846 }, - { url = "https://files.pythonhosted.org/packages/3b/73/8ed168db7642e91180330e4ea9f3ff8bab404678f00d32d7df0871a4933b/pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", size = 27165908 }, - { url = "https://files.pythonhosted.org/packages/81/36/e78c24be99242063f6d0590ef68c857ea07bdea470242c361e9a15bd57a4/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", size = 39264209 }, - { url = "https://files.pythonhosted.org/packages/18/4c/3db637d7578f683b0a8fb8999b436bdbedd6e3517bd4f90c70853cf3ad20/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", size = 39862883 }, - { url = "https://files.pythonhosted.org/packages/81/3c/0580626896c842614a523e66b351181ed5bb14e5dfc263cd68cea2c46d90/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8", size = 38723009 }, - { url = "https://files.pythonhosted.org/packages/ee/fb/c1b47f0ada36d856a352da261a44d7344d8f22e2f7db3945f8c3b81be5dd/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", size = 39855626 }, - { url = "https://files.pythonhosted.org/packages/19/09/b0a02908180a25d57312ab5919069c39fddf30602568980419f4b02393f6/pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", size = 25147242 }, - { url = "https://files.pythonhosted.org/packages/f9/46/ce89f87c2936f5bb9d879473b9663ce7a4b1f4359acc2f0eb39865eaa1af/pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", size = 29028748 }, - { url = "https://files.pythonhosted.org/packages/8d/8e/ce2e9b2146de422f6638333c01903140e9ada244a2a477918a368306c64c/pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", size = 27190965 }, - { url = "https://files.pythonhosted.org/packages/3b/c8/5675719570eb1acd809481c6d64e2136ffb340bc387f4ca62dce79516cea/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", size = 39269081 }, - { url = "https://files.pythonhosted.org/packages/5e/78/3931194f16ab681ebb87ad252e7b8d2c8b23dad49706cadc865dff4a1dd3/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", size = 39864921 }, - { url = "https://files.pythonhosted.org/packages/d8/81/69b6606093363f55a2a574c018901c40952d4e902e670656d18213c71ad7/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", size = 38740798 }, - { url = "https://files.pythonhosted.org/packages/4c/21/9ca93b84b92ef927814cb7ba37f0774a484c849d58f0b692b16af8eebcfb/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", size = 39871877 }, - { url = "https://files.pythonhosted.org/packages/30/d1/63a7c248432c71c7d3ee803e706590a0b81ce1a8d2b2ae49677774b813bb/pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", size = 25151089 }, - { url = "https://files.pythonhosted.org/packages/d4/62/ce6ac1275a432b4a27c55fe96c58147f111d8ba1ad800a112d31859fae2f/pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", size = 29019418 }, - { url = "https://files.pythonhosted.org/packages/8e/0a/dbd0c134e7a0c30bea439675cc120012337202e5fac7163ba839aa3691d2/pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", size = 27152197 }, - { url = "https://files.pythonhosted.org/packages/cb/05/3f4a16498349db79090767620d6dc23c1ec0c658a668d61d76b87706c65d/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", size = 39263026 }, - { url = "https://files.pythonhosted.org/packages/c2/0c/ea2107236740be8fa0e0d4a293a095c9f43546a2465bb7df34eee9126b09/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", size = 39880798 }, - { url = "https://files.pythonhosted.org/packages/f6/b0/b9164a8bc495083c10c281cc65064553ec87b7537d6f742a89d5953a2a3e/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", size = 38715172 }, - { url = "https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", size = 39874508 }, - { url = "https://files.pythonhosted.org/packages/ae/49/baafe2a964f663413be3bd1cf5c45ed98c5e42e804e2328e18f4570027c1/pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", size = 25099235 }, - { url = "https://files.pythonhosted.org/packages/8d/bd/8f52c1d7b430260f80a349cffa2df351750a737b5336313d56dcadeb9ae1/pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", size = 28999345 }, - { url = "https://files.pythonhosted.org/packages/64/d9/51e35550f2f18b8815a2ab25948f735434db32000c0e91eba3a32634782a/pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", size = 27168441 }, - { url = "https://files.pythonhosted.org/packages/18/d8/7161d87d07ea51be70c49f615004c1446d5723622a18b2681f7e4b71bf6e/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", size = 39363163 }, - { url = "https://files.pythonhosted.org/packages/3f/08/bc497130789833de09e345e3ce4647e3ce86517c4f70f2144f0367ca378b/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", size = 39965253 }, - { url = "https://files.pythonhosted.org/packages/d3/2e/493dd7db889402b4c7871ca7dfdd20f2c5deedbff802d3eb8576359930f9/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", size = 38805378 }, - { url = "https://files.pythonhosted.org/packages/e6/c1/4c6bcdf7a820034aa91a8b4d25fef38809be79b42ca7aaa16d4680b0bbac/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", size = 39958364 }, - { url = "https://files.pythonhosted.org/packages/d1/db/42ac644453cfdfc60fe002b46d647fe7a6dfad753ef7b28e99b4c936ad5d/pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", size = 25229211 }, - { url = "https://files.pythonhosted.org/packages/43/e0/a898096d35be240aa61fb2d54db58b86d664b10e1e51256f9300f47565e8/pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", size = 29007881 }, - { url = "https://files.pythonhosted.org/packages/59/22/f7d14907ed0697b5dd488d393129f2738629fa5bcba863e00931b7975946/pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", size = 27178117 }, - { url = "https://files.pythonhosted.org/packages/bf/ee/661211feac0ed48467b1d5c57298c91403809ec3ab78b1d175e1d6ad03cf/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", size = 39273896 }, - { url = "https://files.pythonhosted.org/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", size = 39875438 }, - { url = "https://files.pythonhosted.org/packages/75/63/29d1bfcc57af73cde3fc3baccab2f37548de512dbe0ab294b033cd203516/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", size = 38735092 }, - { url = "https://files.pythonhosted.org/packages/39/f4/90258b4de753df7cc61cefb0312f8abcf226672e96cc64996e66afce817a/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", size = 39867610 }, - { url = "https://files.pythonhosted.org/packages/e7/f6/b75d4816c32f1618ed31a005ee635dd1d91d8164495d94f2ea092f594661/pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", size = 25148611 }, -] - [[package]] name = "pyarrow" version = "18.1.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } wheels = [ { url = "https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size = 29531620 }, @@ -1332,10 +993,8 @@ version = "0.8.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "beautifulsoup4" }, - { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "docutils" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fc/d6/3921de802cf1ee771f0e76c9068b52498aeb8eeec6b830ff931c81c7ecf3/pydata_sphinx_theme-0.8.0.tar.gz", hash = "sha256:9f72015d9c572ea92e3007ab221a8325767c426783b6b9941813e65fa988dc90", size = 1123746 } @@ -1349,13 +1008,11 @@ version = "2.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "deprecated" }, - { name = "pyjwt", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version < '3.9'" }, - { name = "pyjwt", version = "2.10.1", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version >= '3.9'" }, + { name = "pyjwt", extra = ["crypto"] }, { name = "pynacl" }, { name = "requests" }, { name = "typing-extensions" }, - { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "urllib3" }, ] sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 } wheels = [ @@ -1371,33 +1028,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] -[[package]] -name = "pyjwt" -version = "2.9.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/fb/68/ce067f09fca4abeca8771fe667d89cc347d1e99da3e093112ac329c6020e/pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c", size = 78825 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/84/0fdf9b18ba31d69877bd39c9cd6052b47f3761e9910c15de788e519f079f/PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850", size = 22344 }, -] - -[package.optional-dependencies] -crypto = [ - { name = "cryptography", marker = "python_full_version < '3.9'" }, -] - [[package]] name = "pyjwt" version = "2.10.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } wheels = [ { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, @@ -1405,7 +1039,7 @@ wheels = [ [package.optional-dependencies] crypto = [ - { name = "cryptography", marker = "python_full_version >= '3.9'" }, + { name = "cryptography" }, ] [[package]] @@ -1508,13 +1142,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, - { url = "https://files.pythonhosted.org/packages/74/d9/323a59d506f12f498c2097488d80d16f4cf965cee1791eab58b56b19f47a/PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", size = 183218 }, - { url = "https://files.pythonhosted.org/packages/74/cc/20c34d00f04d785f2028737e2e2a8254e1425102e730fee1d6396f832577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", size = 728067 }, - { url = "https://files.pythonhosted.org/packages/20/52/551c69ca1501d21c0de51ddafa8c23a0191ef296ff098e98358f69080577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", size = 757812 }, - { url = "https://files.pythonhosted.org/packages/fd/7f/2c3697bba5d4aa5cc2afe81826d73dfae5f049458e44732c7a0938baa673/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", size = 746531 }, - { url = "https://files.pythonhosted.org/packages/8c/ab/6226d3df99900e580091bb44258fde77a8433511a86883bd4681ea19a858/PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", size = 800820 }, - { url = "https://files.pythonhosted.org/packages/a0/99/a9eb0f3e710c06c5d922026f6736e920d431812ace24aae38228d0d64b04/PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", size = 145514 }, - { url = "https://files.pythonhosted.org/packages/75/8a/ee831ad5fafa4431099aa4e078d4c8efd43cd5e48fbc774641d233b683a9/PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", size = 162702 }, { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777 }, { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318 }, { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 }, @@ -1534,8 +1161,7 @@ dependencies = [ { name = "certifi" }, { name = "charset-normalizer" }, { name = "idna" }, - { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "urllib3" }, ] sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } wheels = [ @@ -1567,28 +1193,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/94/0498cdb7316ed67a1928300dd87d659c933479f44dec51b4f62bfd1f8028/ruff-0.9.1-py3-none-win_arm64.whl", hash = "sha256:1cd76c7f9c679e6e8f2af8f778367dca82b95009bc7b1a85a47f1521ae524fa7", size = 9145708 }, ] -[[package]] -name = "setuptools" -version = "75.3.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 }, -] - [[package]] name = "setuptools" version = "75.8.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 } wheels = [ { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 }, @@ -1621,63 +1229,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, ] -[[package]] -name = "sphinx" -version = "7.1.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -dependencies = [ - { name = "alabaster", version = "0.7.13", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "babel", marker = "python_full_version < '3.9'" }, - { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "imagesize", marker = "python_full_version < '3.9'" }, - { name = "importlib-metadata", marker = "python_full_version < '3.9'" }, - { name = "jinja2", marker = "python_full_version < '3.9'" }, - { name = "packaging", marker = "python_full_version < '3.9'" }, - { name = "pygments", marker = "python_full_version < '3.9'" }, - { name = "requests", marker = "python_full_version < '3.9'" }, - { name = "snowballstemmer", marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-applehelp", version = "1.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-devhelp", version = "1.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-htmlhelp", version = "2.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-qthelp", version = "1.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinxcontrib-serializinghtml", version = "1.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/dc/01/688bdf9282241dca09fe6e3a1110eda399fa9b10d0672db609e37c2e7a39/sphinx-7.1.2.tar.gz", hash = "sha256:780f4d32f1d7d1126576e0e5ecc19dc32ab76cd24e950228dcf7b1f6d3d9e22f", size = 6828258 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/17/325cf6a257d84751a48ae90752b3d8fe0be8f9535b6253add61c49d0d9bc/sphinx-7.1.2-py3-none-any.whl", hash = "sha256:d170a81825b2fcacb6dfd5a0d7f578a053e45d3f2b153fecc948c37344eb4cbe", size = 3169543 }, -] - [[package]] name = "sphinx" version = "7.4.7" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.9.*'", + "python_full_version < '3.10'", ] dependencies = [ - { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "babel", marker = "python_full_version == '3.9.*'" }, - { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "imagesize", marker = "python_full_version == '3.9.*'" }, - { name = "importlib-metadata", marker = "python_full_version == '3.9.*'" }, - { name = "jinja2", marker = "python_full_version == '3.9.*'" }, - { name = "packaging", marker = "python_full_version == '3.9.*'" }, - { name = "pygments", marker = "python_full_version == '3.9.*'" }, - { name = "requests", marker = "python_full_version == '3.9.*'" }, - { name = "snowballstemmer", marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, - { name = "tomli", marker = "python_full_version == '3.9.*'" }, + { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "babel", marker = "python_full_version < '3.10'" }, + { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, + { name = "docutils", marker = "python_full_version < '3.10'" }, + { name = "imagesize", marker = "python_full_version < '3.10'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, + { name = "jinja2", marker = "python_full_version < '3.10'" }, + { name = "packaging", marker = "python_full_version < '3.10'" }, + { name = "pygments", marker = "python_full_version < '3.10'" }, + { name = "requests", marker = "python_full_version < '3.10'" }, + { name = "snowballstemmer", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.10'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.10'" }, + { name = "tomli", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911 } wheels = [ @@ -1697,19 +1274,19 @@ dependencies = [ { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "babel", marker = "python_full_version >= '3.10'" }, { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, - { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "docutils", marker = "python_full_version >= '3.10'" }, { name = "imagesize", marker = "python_full_version >= '3.10'" }, { name = "jinja2", marker = "python_full_version >= '3.10'" }, { name = "packaging", marker = "python_full_version >= '3.10'" }, { name = "pygments", marker = "python_full_version >= '3.10'" }, { name = "requests", marker = "python_full_version >= '3.10'" }, { name = "snowballstemmer", marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.10'" }, { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.10'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.10'" }, { name = "tomli", marker = "python_full_version == '3.10.*'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 } @@ -1722,97 +1299,40 @@ name = "sphinx-autoapi" version = "3.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "astroid", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "astroid", version = "3.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "astroid" }, { name = "jinja2" }, { name = "pyyaml" }, - { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "stdlib-list", version = "0.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, - { name = "stdlib-list", version = "0.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "stdlib-list", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4a/eb/cc243583bb1d518ca3b10998c203d919a8ed90affd4831f2b61ad09043d2/sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c", size = 29292 } wheels = [ { url = "https://files.pythonhosted.org/packages/de/d6/f2acdc2567337fd5f5dc091a4e58d8a0fb14927b9779fc1e5ecee96d9824/sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92", size = 34095 }, ] -[[package]] -name = "sphinxcontrib-applehelp" -version = "1.0.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/32/df/45e827f4d7e7fcc84e853bcef1d836effd762d63ccb86f43ede4e98b478c/sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e", size = 24766 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228", size = 120601 }, -] - [[package]] name = "sphinxcontrib-applehelp" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 } wheels = [ { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 }, ] -[[package]] -name = "sphinxcontrib-devhelp" -version = "1.0.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/98/33/dc28393f16385f722c893cb55539c641c9aaec8d1bc1c15b69ce0ac2dbb3/sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4", size = 17398 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", size = 84690 }, -] - [[package]] name = "sphinxcontrib-devhelp" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 } wheels = [ { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 }, ] -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.0.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/47/64cff68ea3aa450c373301e5bebfbb9fce0a3e70aca245fcadd4af06cd75/sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff", size = 27967 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903", size = 99833 }, -] - [[package]] name = "sphinxcontrib-htmlhelp" version = "2.1.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 } wheels = [ { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, @@ -1827,55 +1347,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 }, ] -[[package]] -name = "sphinxcontrib-qthelp" -version = "1.0.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/8e/c4846e59f38a5f2b4a0e3b27af38f2fcf904d4bfd82095bf92de0b114ebd/sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", size = 21658 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6", size = 90609 }, -] - [[package]] name = "sphinxcontrib-qthelp" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 } wheels = [ { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 }, ] -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "1.1.5" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/b5/72/835d6fadb9e5d02304cf39b18f93d227cd93abd3c41ebf58e6853eeb1455/sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952", size = 21019 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", size = 94021 }, -] - [[package]] name = "sphinxcontrib-serializinghtml" version = "2.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 } wheels = [ { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 }, @@ -1895,25 +1379,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, ] -[[package]] -name = "stdlib-list" -version = "0.10.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/39/bb/1cdbc326a5ab0026602e0489cbf02357e78140253c4b57cd866d380eb355/stdlib_list-0.10.0.tar.gz", hash = "sha256:6519c50d645513ed287657bfe856d527f277331540691ddeaf77b25459964a14", size = 59447 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/13/d9/9085375f0d23a4896b307bf14dcc61b49ec8cc67cb33e06cf95bf3af3966/stdlib_list-0.10.0-py3-none-any.whl", hash = "sha256:b3a911bc441d03e0332dd1a9e7d0870ba3bb0a542a74d7524f54fb431256e214", size = 79814 }, -] - [[package]] name = "stdlib-list" version = "0.11.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/5d/04/6b37a71e92ddca16b190b7df62494ac4779d58ced4787f73584eb32c8f03/stdlib_list-0.11.0.tar.gz", hash = "sha256:b74a7b643a77a12637e907f3f62f0ab9f67300bce4014f6b2d3c8b4c8fd63c66", size = 60335 } wheels = [ { url = "https://files.pythonhosted.org/packages/16/fe/e07300c027a868d32d8ed7a425503401e91a03ff90e7ca525c115c634ffb/stdlib_list-0.11.0-py3-none-any.whl", hash = "sha256:8bf8decfffaaf273d4cfeb5bd852b910a00dec1037dcf163576803622bccf597", size = 83617 }, @@ -1994,28 +1463,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, ] -[[package]] -name = "urllib3" -version = "2.2.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, -] - [[package]] name = "urllib3" version = "2.3.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } wheels = [ { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, @@ -2091,17 +1542,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, - { url = "https://files.pythonhosted.org/packages/0c/66/95b9e90e6e1274999b183c9c3f984996d870e933ca9560115bd1cd1d6f77/wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9", size = 53234 }, - { url = "https://files.pythonhosted.org/packages/a4/b6/6eced5e2db5924bf6d9223d2bb96b62e00395aae77058e6a9e11bf16b3bd/wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119", size = 38462 }, - { url = "https://files.pythonhosted.org/packages/5d/a4/c8472fe2568978b5532df84273c53ddf713f689d408a4335717ab89547e0/wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6", size = 38730 }, - { url = "https://files.pythonhosted.org/packages/3c/70/1d259c6b1ad164eb23ff70e3e452dd1950f96e6473f72b7207891d0fd1f0/wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9", size = 86225 }, - { url = "https://files.pythonhosted.org/packages/a9/68/6b83367e1afb8de91cbea4ef8e85b58acdf62f034f05d78c7b82afaa23d8/wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a", size = 78055 }, - { url = "https://files.pythonhosted.org/packages/0d/21/09573d2443916705c57fdab85d508f592c0a58d57becc53e15755d67fba2/wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2", size = 85592 }, - { url = "https://files.pythonhosted.org/packages/45/ce/700e17a852dd5dec894e241c72973ea82363486bcc1fb05d47b4fbd1d683/wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a", size = 83906 }, - { url = "https://files.pythonhosted.org/packages/37/14/bd210faf0a66faeb8529d42b6b45a25d6aa6ce25ddfc19168e4161aed227/wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04", size = 76763 }, - { url = "https://files.pythonhosted.org/packages/34/0c/85af70d291f44659c422416f0272046109e785bf6db8c081cfeeae5715c5/wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f", size = 83573 }, - { url = "https://files.pythonhosted.org/packages/f8/1e/b215068e824878f69ea945804fa26c176f7c2735a3ad5367d78930bd076a/wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7", size = 36408 }, - { url = "https://files.pythonhosted.org/packages/52/27/3dd9ad5f1097b33c95d05929e409cc86d7c765cb5437b86694dc8f8e9af0/wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3", size = 38737 }, { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 }, { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 }, { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 }, @@ -2116,25 +1556,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, ] -[[package]] -name = "zipp" -version = "3.20.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.9'", -] -sdist = { url = "https://files.pythonhosted.org/packages/54/bf/5c0000c44ebc80123ecbdddba1f5dcd94a5ada602a9c225d84b5aaa55e86/zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29", size = 24199 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 }, -] - [[package]] name = "zipp" version = "3.21.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.9.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 } wheels = [ { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, From b194a8772e58ccefc697e11671113127a8038716 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 12 Mar 2025 14:25:32 -0400 Subject: [PATCH 020/206] feat/improve ruff test coverage (#1055) * Run python tests on all currently supported python versions * Update ruff checks to select all * Ruff auto fix * Applying ruff suggestions * noqa rules updates per ruff checks * Working through more ruff suggestions * Working through more ruff suggestions * update timestamps on tests * More ruff updates * More ruff updates * Instead of importing udf static functions as variables, import * More ruff formatting suggestions * more ruff formatting suggestions * More ruff formatting * More ruff formatting * Cut off lint errors for this PR * Working through more ruff checks and disabling a bunch for now * Address CI difference from local ruff * UDWF isn't a proper abstract base class right now since users can opt in to all methods * Update pre-commit to match the version of ruff used in CI * To enable testing in python 3.9 we need numpy. Also going to the current minimal supported version * Update min requried version of python to 3.9 in pyproject.toml. The other changes will come in #1043 that is soon to be merged. * Suppress UP035 * ruff format --- .github/workflows/test.yaml | 2 + .pre-commit-config.yaml | 2 +- benchmarks/tpch/tpch.py | 14 +- dev/release/check-rat-report.py | 2 +- dev/release/generate-changelog.py | 10 +- docs/source/conf.py | 2 +- examples/python-udwf.py | 2 +- examples/tpch/_tests.py | 15 +- pyproject.toml | 76 +++++- python/datafusion/__init__.py | 50 ++-- python/datafusion/common.py | 14 +- python/datafusion/context.py | 4 +- python/datafusion/dataframe.py | 15 +- python/datafusion/expr.py | 94 +++---- python/datafusion/functions.py | 46 ++-- python/datafusion/input/__init__.py | 2 +- python/datafusion/input/base.py | 6 +- python/datafusion/input/location.py | 40 +-- python/datafusion/io.py | 20 +- python/datafusion/object_store.py | 2 +- python/datafusion/plan.py | 8 +- python/datafusion/record_batch.py | 8 +- python/datafusion/substrait.py | 21 +- python/datafusion/udf.py | 236 +++++++++-------- python/tests/generic.py | 19 +- python/tests/test_aggregation.py | 16 +- python/tests/test_catalog.py | 9 +- python/tests/test_context.py | 53 ++-- python/tests/test_dataframe.py | 38 ++- python/tests/test_expr.py | 11 +- python/tests/test_functions.py | 358 ++++++++++++++------------ python/tests/test_imports.py | 7 +- python/tests/test_input.py | 12 +- python/tests/test_io.py | 13 +- python/tests/test_sql.py | 35 +-- python/tests/test_store.py | 13 +- python/tests/test_substrait.py | 2 +- python/tests/test_udaf.py | 10 +- python/tests/test_udwf.py | 2 +- python/tests/test_wrapper_coverage.py | 7 +- 40 files changed, 697 insertions(+), 599 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c1d9ac838..da3582766 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -33,9 +33,11 @@ jobs: fail-fast: false matrix: python-version: + - "3.9" - "3.10" - "3.11" - "3.12" + - "3.13" toolchain: - "stable" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b548ff18f..abcfcf321 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: actionlint-docker - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.3.0 + rev: v0.9.10 hooks: # Run the linter. - id: ruff diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index fb86b12b6..bfb9ac398 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -59,13 +59,13 @@ def bench(data_path, query_path): end = time.time() time_millis = (end - start) * 1000 total_time_millis += time_millis - print("setup,{}".format(round(time_millis, 1))) - results.write("setup,{}\n".format(round(time_millis, 1))) + print(f"setup,{round(time_millis, 1)}") + results.write(f"setup,{round(time_millis, 1)}\n") results.flush() # run queries for query in range(1, 23): - with open("{}/q{}.sql".format(query_path, query)) as f: + with open(f"{query_path}/q{query}.sql") as f: text = f.read() tmp = text.split(";") queries = [] @@ -83,14 +83,14 @@ def bench(data_path, query_path): end = time.time() time_millis = (end - start) * 1000 total_time_millis += time_millis - print("q{},{}".format(query, round(time_millis, 1))) - results.write("q{},{}\n".format(query, round(time_millis, 1))) + print(f"q{query},{round(time_millis, 1)}") + results.write(f"q{query},{round(time_millis, 1)}\n") results.flush() except Exception as e: print("query", query, "failed", e) - print("total,{}".format(round(total_time_millis, 1))) - results.write("total,{}\n".format(round(total_time_millis, 1))) + print(f"total,{round(total_time_millis, 1)}") + results.write(f"total,{round(total_time_millis, 1)}\n") if __name__ == "__main__": diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py index d3dd7c5dd..0c9f4c326 100644 --- a/dev/release/check-rat-report.py +++ b/dev/release/check-rat-report.py @@ -29,7 +29,7 @@ exclude_globs_filename = sys.argv[1] xml_filename = sys.argv[2] -globs = [line.strip() for line in open(exclude_globs_filename, "r")] +globs = [line.strip() for line in open(exclude_globs_filename)] tree = ET.parse(xml_filename) root = tree.getroot() diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index 2564eea86..e30e2def2 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -26,15 +26,11 @@ def print_pulls(repo_name, title, pulls): if len(pulls) > 0: - print("**{}:**".format(title)) + print(f"**{title}:**") print() for pull, commit in pulls: - url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) - print( - "- {} [#{}]({}) ({})".format( - pull.title, pull.number, url, commit.author.login - ) - ) + url = f"https://github.com/{repo_name}/pull/{pull.number}" + print(f"- {pull.title} [#{pull.number}]({url}) ({commit.author.login})") print() diff --git a/docs/source/conf.py b/docs/source/conf.py index 2e5a41339..c82a189e0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,7 +73,7 @@ autoapi_python_class_content = "both" -def autoapi_skip_member_fn(app, what, name, obj, skip, options): +def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001 skip_contents = [ # Re-exports ("class", "datafusion.DataFrame"), diff --git a/examples/python-udwf.py b/examples/python-udwf.py index 7d39dc1b8..98d118bf2 100644 --- a/examples/python-udwf.py +++ b/examples/python-udwf.py @@ -59,7 +59,7 @@ def __init__(self, alpha: float) -> None: def supports_bounded_execution(self) -> bool: return True - def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: + def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002 # Override the default range of current row since uses_window_frame is False # So for the purpose of this test we just smooth from the previous row to # current. diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index c4d872085..2be4dfabd 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -27,28 +27,25 @@ def df_selection(col_name, col_type): if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type): return F.round(col(col_name), lit(2)).alias(col_name) - elif col_type == pa.string() or col_type == pa.string_view(): + if col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) - else: - return col(col_name) + return col(col_name) def load_schema(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return col_name, pa.string() - elif isinstance(col_type, pa.Decimal128Type): + if isinstance(col_type, pa.Decimal128Type): return col_name, pa.float64() - else: - return col_name, col_type + return col_name, col_type def expected_selection(col_name, col_type): if col_type == pa.int64() or col_type == pa.int32(): return F.trim(col(col_name)).cast(col_type).alias(col_name) - elif col_type == pa.string() or col_type == pa.string_view(): + if col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) - else: - return col(col_name) + return col(col_name) def selections_and_schema(original_schema): diff --git a/pyproject.toml b/pyproject.toml index 1c2733677..060e3b80a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,57 @@ features = ["substrait"] # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"] +select = ["ALL" ] +ignore = [ + "A001", # Allow using words like min as variable names + "A002", # Allow using words like filter as variable names + "ANN401", # Allow Any for wrapper classes + "COM812", # Recommended to ignore these rules when using with ruff-format + "FIX002", # Allow TODO lines - consider removing at some point + "FBT001", # Allow boolean positional args + "FBT002", # Allow boolean positional args + "ISC001", # Recommended to ignore these rules when using with ruff-format + "SLF001", # Allow accessing private members + "TD002", + "TD003", # Allow TODO lines + "UP007", # Disallowing Union is pedantic + # TODO: Enable all of the following, but this PR is getting too large already + "PT001", + "ANN204", + "B008", + "EM101", + "PLR0913", + "PLR1714", + "ANN201", + "C400", + "TRY003", + "B904", + "UP006", + "RUF012", + "FBT003", + "C416", + "SIM102", + "PGH003", + "PLR2004", + "PERF401", + "PD901", + "EM102", + "ERA001", + "SIM108", + "ICN001", + "ANN001", + "ANN202", + "PTH", + "N812", + "INP001", + "DTZ007", + "PLW2901", + "RET503", + "RUF015", + "A005", + "TC001", + "UP035", +] [tool.ruff.lint.pydocstyle] convention = "google" @@ -75,16 +125,30 @@ max-doc-length = 88 # Disable docstring checking for these directories [tool.ruff.lint.per-file-ignores] -"python/tests/*" = ["D"] -"examples/*" = ["D", "W505"] -"dev/*" = ["D"] -"benchmarks/*" = ["D", "F"] +"python/tests/*" = [ + "ANN", + "ARG", + "BLE001", + "D", + "S101", + "SLF", + "PD", + "PLR2004", + "PT011", + "RUF015", + "S608", + "PLR0913", + "PT004", +] +"examples/*" = ["D", "W505", "E501", "T201", "S101"] +"dev/*" = ["D", "E", "T", "S", "PLR", "C", "SIM", "UP", "EXE", "N817"] +"benchmarks/*" = ["D", "F", "T", "BLE", "FURB", "PLR", "E", "TD", "TRY", "S", "SIM", "EXE", "UP"] "docs/*" = ["D"] [dependency-groups] dev = [ "maturin>=1.8.1", - "numpy>1.24.4 ; python_full_version >= '3.10'", + "numpy>1.25.0", "pytest>=7.4.4", "ruff>=0.9.1", "toml>=0.10.2", diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index f11ce54a6..286e5dc31 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -48,44 +48,47 @@ from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream -from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF +from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf __version__ = importlib_metadata.version(__name__) __all__ = [ "Accumulator", + "AggregateUDF", + "Catalog", "Config", - "DataFrame", - "SessionContext", - "SessionConfig", - "SQLOptions", - "RuntimeEnvBuilder", - "Expr", - "ScalarUDF", - "WindowFrame", - "column", - "col", - "literal", - "lit", "DFSchema", - "Catalog", + "DataFrame", "Database", - "Table", - "AggregateUDF", - "WindowUDF", - "LogicalPlan", "ExecutionPlan", + "Expr", + "LogicalPlan", "RecordBatch", "RecordBatchStream", + "RuntimeEnvBuilder", + "SQLOptions", + "ScalarUDF", + "SessionConfig", + "SessionContext", + "Table", + "WindowFrame", + "WindowUDF", + "col", + "column", "common", "expr", "functions", + "lit", + "literal", "object_store", - "substrait", - "read_parquet", "read_avro", "read_csv", "read_json", + "read_parquet", + "substrait", + "udaf", + "udf", + "udwf", ] @@ -120,10 +123,3 @@ def str_lit(value): def lit(value): """Create a literal expression.""" return Expr.literal(value) - - -udf = ScalarUDF.udf - -udaf = AggregateUDF.udaf - -udwf = WindowUDF.udwf diff --git a/python/datafusion/common.py b/python/datafusion/common.py index a2298c634..e762a993b 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -20,7 +20,7 @@ from ._internal import common as common_internal -# TODO these should all have proper wrapper classes +# TODO: these should all have proper wrapper classes DFSchema = common_internal.DFSchema DataType = common_internal.DataType @@ -38,15 +38,15 @@ "DFSchema", "DataType", "DataTypeMap", - "RexType", - "PythonType", - "SqlType", "NullTreatment", - "SqlTable", + "PythonType", + "RexType", + "SqlFunction", "SqlSchema", - "SqlView", "SqlStatistics", - "SqlFunction", + "SqlTable", + "SqlType", + "SqlView", ] diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 282b2a477..0ab1a908a 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -393,8 +393,6 @@ def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder: class RuntimeConfig(RuntimeEnvBuilder): """See `RuntimeEnvBuilder`.""" - pass - class SQLOptions: """Options to be used when performing SQL queries.""" @@ -498,7 +496,7 @@ def __init__( self.ctx = SessionContextInternal(config, runtime) - def enable_url_table(self) -> "SessionContext": + def enable_url_table(self) -> SessionContext: """Control if local files can be queried as tables. Returns: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index de5d8376e..d1c71c2bb 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -29,6 +29,7 @@ List, Literal, Optional, + Type, Union, overload, ) @@ -49,10 +50,11 @@ import polars as pl import pyarrow as pa + from datafusion._internal import DataFrame as DataFrameInternal + from datafusion._internal import expr as expr_internal + from enum import Enum -from datafusion._internal import DataFrame as DataFrameInternal -from datafusion._internal import expr as expr_internal from datafusion.expr import Expr, SortExpr, sort_or_default @@ -73,7 +75,7 @@ class Compression(Enum): LZ4_RAW = "lz4_raw" @classmethod - def from_str(cls, value: str) -> "Compression": + def from_str(cls: Type[Compression], value: str) -> Compression: """Convert a string to a Compression enum value. Args: @@ -88,8 +90,9 @@ def from_str(cls, value: str) -> "Compression": try: return cls(value.lower()) except ValueError: + valid_values = str([item.value for item in Compression]) raise ValueError( - f"{value} is not a valid Compression. Valid values are: {[item.value for item in Compression]}" + f"{value} is not a valid Compression. Valid values are: {valid_values}" ) def get_default_level(self) -> Optional[int]: @@ -104,9 +107,9 @@ def get_default_level(self) -> Optional[int]: # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223 if self == Compression.GZIP: return 6 - elif self == Compression.BROTLI: + if self == Compression.BROTLI: return 1 - elif self == Compression.ZSTD: + if self == Compression.ZSTD: return 4 return None diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 3639abec6..702f75aed 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -101,63 +101,63 @@ WindowExpr = expr_internal.WindowExpr __all__ = [ - "Expr", - "Column", - "Literal", - "BinaryExpr", - "Literal", + "Aggregate", "AggregateFunction", - "Not", - "IsNotNull", - "IsNull", - "IsTrue", - "IsFalse", - "IsUnknown", - "IsNotTrue", - "IsNotFalse", - "IsNotUnknown", - "Negative", - "Like", - "ILike", - "SimilarTo", - "ScalarVariable", "Alias", - "InList", - "Exists", - "Subquery", - "InSubquery", - "ScalarSubquery", - "Placeholder", - "GroupingSet", + "Analyze", + "Between", + "BinaryExpr", "Case", "CaseBuilder", "Cast", - "TryCast", - "Between", + "Column", + "CreateMemoryTable", + "CreateView", + "Distinct", + "DropTable", + "EmptyRelation", + "Exists", "Explain", + "Expr", + "Extension", + "Filter", + "GroupingSet", + "ILike", + "InList", + "InSubquery", + "IsFalse", + "IsNotFalse", + "IsNotNull", + "IsNotTrue", + "IsNotUnknown", + "IsNull", + "IsTrue", + "IsUnknown", + "Join", + "JoinConstraint", + "JoinType", + "Like", "Limit", - "Aggregate", + "Literal", + "Literal", + "Negative", + "Not", + "Partitioning", + "Placeholder", + "Projection", + "Repartition", + "ScalarSubquery", + "ScalarVariable", + "SimilarTo", "Sort", "SortExpr", - "Analyze", - "EmptyRelation", - "Join", - "JoinType", - "JoinConstraint", + "Subquery", + "SubqueryAlias", + "TableScan", + "TryCast", "Union", "Unnest", "UnnestExpr", - "Extension", - "Filter", - "Projection", - "TableScan", - "CreateMemoryTable", - "CreateView", - "Distinct", - "SubqueryAlias", - "DropTable", - "Partitioning", - "Repartition", "Window", "WindowExpr", "WindowFrame", @@ -311,7 +311,7 @@ def __getitem__(self, key: str | int) -> Expr: ) return Expr(self.expr.__getitem__(key)) - def __eq__(self, rhs: Any) -> Expr: + def __eq__(self, rhs: object) -> Expr: """Equal to. Accepts either an expression or any valid PyArrow scalar literal value. @@ -320,7 +320,7 @@ def __eq__(self, rhs: Any) -> Expr: rhs = Expr.literal(rhs) return Expr(self.expr.__eq__(rhs.expr)) - def __ne__(self, rhs: Any) -> Expr: + def __ne__(self, rhs: object) -> Expr: """Not equal to. Accepts either an expression or any valid PyArrow scalar literal value. diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index b449c4868..0cc7434cf 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -18,13 +18,12 @@ from __future__ import annotations -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional import pyarrow as pa from datafusion._internal import functions as f from datafusion.common import NullTreatment -from datafusion.context import SessionContext from datafusion.expr import ( CaseBuilder, Expr, @@ -34,6 +33,9 @@ sort_list_to_raw_sort_list, ) +if TYPE_CHECKING: + from datafusion.context import SessionContext + __all__ = [ "abs", "acos", @@ -81,8 +83,8 @@ "array_sort", "array_to_string", "array_union", - "arrow_typeof", "arrow_cast", + "arrow_typeof", "ascii", "asin", "asinh", @@ -97,6 +99,7 @@ "bool_and", "bool_or", "btrim", + "cardinality", "case", "cbrt", "ceil", @@ -116,6 +119,7 @@ "covar", "covar_pop", "covar_samp", + "cume_dist", "current_date", "current_time", "date_bin", @@ -125,17 +129,17 @@ "datetrunc", "decode", "degrees", + "dense_rank", "digest", "empty", "encode", "ends_with", - "extract", "exp", + "extract", "factorial", "find_in_set", "first_value", "flatten", - "cardinality", "floor", "from_unixtime", "gcd", @@ -143,8 +147,10 @@ "initcap", "isnan", "iszero", + "lag", "last_value", "lcm", + "lead", "left", "length", "levenshtein", @@ -166,10 +172,10 @@ "list_prepend", "list_push_back", "list_push_front", - "list_repeat", "list_remove", "list_remove_all", "list_remove_n", + "list_repeat", "list_replace", "list_replace_all", "list_replace_n", @@ -180,14 +186,14 @@ "list_union", "ln", "log", - "log10", "log2", + "log10", "lower", "lpad", "ltrim", "make_array", - "make_list", "make_date", + "make_list", "max", "md5", "mean", @@ -195,19 +201,22 @@ "min", "named_struct", "nanvl", - "nvl", "now", "nth_value", + "ntile", "nullif", + "nvl", "octet_length", "order_by", "overlay", + "percent_rank", "pi", "pow", "power", "radians", "random", "range", + "rank", "regexp_like", "regexp_match", "regexp_replace", @@ -225,6 +234,7 @@ "reverse", "right", "round", + "row_number", "rpad", "rtrim", "sha224", @@ -252,8 +262,8 @@ "to_hex", "to_timestamp", "to_timestamp_micros", - "to_timestamp_nanos", "to_timestamp_millis", + "to_timestamp_nanos", "to_timestamp_seconds", "to_unixtime", "translate", @@ -268,14 +278,6 @@ "when", # Window Functions "window", - "lead", - "lag", - "row_number", - "rank", - "dense_rank", - "percent_rank", - "cume_dist", - "ntile", ] @@ -292,14 +294,14 @@ def nullif(expr1: Expr, expr2: Expr) -> Expr: return Expr(f.nullif(expr1.expr, expr2.expr)) -def encode(input: Expr, encoding: Expr) -> Expr: +def encode(expr: Expr, encoding: Expr) -> Expr: """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" - return Expr(f.encode(input.expr, encoding.expr)) + return Expr(f.encode(expr.expr, encoding.expr)) -def decode(input: Expr, encoding: Expr) -> Expr: +def decode(expr: Expr, encoding: Expr) -> Expr: """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.""" - return Expr(f.decode(input.expr, encoding.expr)) + return Expr(f.decode(expr.expr, encoding.expr)) def array_to_string(expr: Expr, delimiter: Expr) -> Expr: diff --git a/python/datafusion/input/__init__.py b/python/datafusion/input/__init__.py index f85ce21f0..f0c1f42b4 100644 --- a/python/datafusion/input/__init__.py +++ b/python/datafusion/input/__init__.py @@ -23,5 +23,5 @@ from .location import LocationInputPlugin __all__ = [ - LocationInputPlugin, + "LocationInputPlugin", ] diff --git a/python/datafusion/input/base.py b/python/datafusion/input/base.py index 4eba19784..f67dde2a1 100644 --- a/python/datafusion/input/base.py +++ b/python/datafusion/input/base.py @@ -38,11 +38,9 @@ class BaseInputSource(ABC): """ @abstractmethod - def is_correct_input(self, input_item: Any, table_name: str, **kwargs) -> bool: + def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool: """Returns `True` if the input is valid.""" - pass @abstractmethod - def build_table(self, input_item: Any, table_name: str, **kwarg) -> SqlTable: + def build_table(self, input_item: Any, table_name: str, **kwarg: Any) -> SqlTable: # type: ignore[invalid-type-form] """Create a table from the input source.""" - pass diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index 517cd1578..08d98d115 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -18,7 +18,7 @@ """The default input source for DataFusion.""" import glob -import os +from pathlib import Path from typing import Any from datafusion.common import DataTypeMap, SqlTable @@ -31,7 +31,7 @@ class LocationInputPlugin(BaseInputSource): This can be read in from a file (on disk, remote etc.). """ - def is_correct_input(self, input_item: Any, table_name: str, **kwargs): + def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool: # noqa: ARG002 """Returns `True` if the input is valid.""" return isinstance(input_item, str) @@ -39,27 +39,28 @@ def build_table( self, input_item: str, table_name: str, - **kwargs, - ) -> SqlTable: + **kwargs: Any, # noqa: ARG002 + ) -> SqlTable: # type: ignore[invalid-type-form] """Create a table from the input source.""" - _, extension = os.path.splitext(input_item) - format = extension.lstrip(".").lower() + extension = Path(input_item).suffix + file_format = extension.lstrip(".").lower() num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] - if format == "parquet": + if file_format == "parquet": import pyarrow.parquet as pq # Read the Parquet metadata metadata = pq.read_metadata(input_item) num_rows = metadata.num_rows # Iterate through the schema and build the SqlTable - for col in metadata.schema: - columns.append( - ( - col.name, - DataTypeMap.from_parquet_type_str(col.physical_type), - ) + columns = [ + ( + col.name, + DataTypeMap.from_parquet_type_str(col.physical_type), ) + for col in metadata.schema + ] + elif format == "csv": import csv @@ -69,19 +70,18 @@ def build_table( # to get that information. However, this should only be occurring # at table creation time and therefore shouldn't # slow down query performance. - with open(input_item, "r") as file: + with Path(input_item).open() as file: reader = csv.reader(file) - header_row = next(reader) - print(header_row) + _header_row = next(reader) for _ in reader: num_rows += 1 # TODO: Need to actually consume this row into reasonable columns - raise RuntimeError("TODO: Currently unable to support CSV input files.") + msg = "TODO: Currently unable to support CSV input files." + raise RuntimeError(msg) else: - raise RuntimeError( - f"Input of format: `{format}` is currently not supported.\ + msg = f"Input of format: `{format}` is currently not supported.\ Only Parquet and CSV." - ) + raise RuntimeError(msg) # Input could possibly be multiple files. Create a list if so input_files = glob.glob(input_item) diff --git a/python/datafusion/io.py b/python/datafusion/io.py index 3b6264948..3e39703e3 100644 --- a/python/datafusion/io.py +++ b/python/datafusion/io.py @@ -19,15 +19,19 @@ from __future__ import annotations -import pathlib - -import pyarrow +from typing import TYPE_CHECKING from datafusion.dataframe import DataFrame -from datafusion.expr import Expr from ._internal import SessionContext as SessionContextInternal +if TYPE_CHECKING: + import pathlib + + import pyarrow as pa + + from datafusion.expr import Expr + def read_parquet( path: str | pathlib.Path, @@ -35,7 +39,7 @@ def read_parquet( parquet_pruning: bool = True, file_extension: str = ".parquet", skip_metadata: bool = True, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[Expr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -79,7 +83,7 @@ def read_parquet( def read_json( path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = ".json", table_partition_cols: list[tuple[str, str]] | None = None, @@ -120,7 +124,7 @@ def read_json( def read_csv( path: str | pathlib.Path | list[str] | list[pathlib.Path], - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, has_header: bool = True, delimiter: str = ",", schema_infer_max_records: int = 1000, @@ -173,7 +177,7 @@ def read_csv( def read_avro( path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".avro", ) -> DataFrame: diff --git a/python/datafusion/object_store.py b/python/datafusion/object_store.py index 7cc17506f..6298526f5 100644 --- a/python/datafusion/object_store.py +++ b/python/datafusion/object_store.py @@ -24,4 +24,4 @@ MicrosoftAzure = object_store.MicrosoftAzure Http = object_store.Http -__all__ = ["AmazonS3", "GoogleCloud", "LocalFileSystem", "MicrosoftAzure", "Http"] +__all__ = ["AmazonS3", "GoogleCloud", "Http", "LocalFileSystem", "MicrosoftAzure"] diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index 133fc446d..0b7bebcb3 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -19,7 +19,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, List +from typing import TYPE_CHECKING, Any import datafusion._internal as df_internal @@ -27,8 +27,8 @@ from datafusion.context import SessionContext __all__ = [ - "LogicalPlan", "ExecutionPlan", + "LogicalPlan", ] @@ -54,7 +54,7 @@ def to_variant(self) -> Any: """Convert the logical plan into its specific variant.""" return self._raw_plan.to_variant() - def inputs(self) -> List[LogicalPlan]: + def inputs(self) -> list[LogicalPlan]: """Returns the list of inputs to the logical plan.""" return [LogicalPlan(p) for p in self._raw_plan.inputs()] @@ -106,7 +106,7 @@ def __init__(self, plan: df_internal.ExecutionPlan) -> None: """This constructor should not be called by the end user.""" self._raw_plan = plan - def children(self) -> List[ExecutionPlan]: + def children(self) -> list[ExecutionPlan]: """Get a list of children `ExecutionPlan` that act as inputs to this plan. The returned list will be empty for leaf nodes such as scans, will contain a diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py index 772cd9089..556eaa786 100644 --- a/python/datafusion/record_batch.py +++ b/python/datafusion/record_batch.py @@ -26,14 +26,14 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - import pyarrow + import pyarrow as pa import typing_extensions import datafusion._internal as df_internal class RecordBatch: - """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`.""" + """This class is essentially a wrapper for :py:class:`pa.RecordBatch`.""" def __init__(self, record_batch: df_internal.RecordBatch) -> None: """This constructor is generally not called by the end user. @@ -42,8 +42,8 @@ def __init__(self, record_batch: df_internal.RecordBatch) -> None: """ self.record_batch = record_batch - def to_pyarrow(self) -> pyarrow.RecordBatch: - """Convert to :py:class:`pyarrow.RecordBatch`.""" + def to_pyarrow(self) -> pa.RecordBatch: + """Convert to :py:class:`pa.RecordBatch`.""" return self.record_batch.to_pyarrow() diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py index 06302fe38..f10adfb0c 100644 --- a/python/datafusion/substrait.py +++ b/python/datafusion/substrait.py @@ -23,7 +23,6 @@ from __future__ import annotations -import pathlib from typing import TYPE_CHECKING try: @@ -36,11 +35,13 @@ from ._internal import substrait as substrait_internal if TYPE_CHECKING: + import pathlib + from datafusion.context import SessionContext __all__ = [ - "Plan", "Consumer", + "Plan", "Producer", "Serde", ] @@ -68,11 +69,9 @@ def encode(self) -> bytes: @deprecated("Use `Plan` instead.") -class plan(Plan): +class plan(Plan): # noqa: N801 """See `Plan`.""" - pass - class Serde: """Provides the ``Substrait`` serialization and deserialization.""" @@ -140,11 +139,9 @@ def deserialize_bytes(proto_bytes: bytes) -> Plan: @deprecated("Use `Serde` instead.") -class serde(Serde): +class serde(Serde): # noqa: N801 """See `Serde` instead.""" - pass - class Producer: """Generates substrait plans from a logical plan.""" @@ -168,11 +165,9 @@ def to_substrait_plan(logical_plan: LogicalPlan, ctx: SessionContext) -> Plan: @deprecated("Use `Producer` instead.") -class producer(Producer): +class producer(Producer): # noqa: N801 """Use `Producer` instead.""" - pass - class Consumer: """Generates a logical plan from a substrait plan.""" @@ -194,7 +189,5 @@ def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan: @deprecated("Use `Consumer` instead.") -class consumer(Consumer): +class consumer(Consumer): # noqa: N801 """Use `Consumer` instead.""" - - pass diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index af7bcf2ed..603b7063d 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -22,15 +22,15 @@ import functools from abc import ABCMeta, abstractmethod from enum import Enum -from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload -import pyarrow +import pyarrow as pa import datafusion._internal as df_internal from datafusion.expr import Expr if TYPE_CHECKING: - _R = TypeVar("_R", bound=pyarrow.DataType) + _R = TypeVar("_R", bound=pa.DataType) class Volatility(Enum): @@ -72,7 +72,7 @@ class Volatility(Enum): for each output row, resulting in a unique random value for each row. """ - def __str__(self): + def __str__(self) -> str: """Returns the string equivalent.""" return self.name.lower() @@ -88,7 +88,7 @@ def __init__( self, name: str, func: Callable[..., _R], - input_types: pyarrow.DataType | list[pyarrow.DataType], + input_types: pa.DataType | list[pa.DataType], return_type: _R, volatility: Volatility | str, ) -> None: @@ -96,7 +96,7 @@ def __init__( See helper method :py:func:`udf` for argument details. """ - if isinstance(input_types, pyarrow.DataType): + if isinstance(input_types, pa.DataType): input_types = [input_types] self._udf = df_internal.ScalarUDF( name, func, input_types, return_type, str(volatility) @@ -111,7 +111,27 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udf.__call__(*args_raw)) - class udf: + @overload + @staticmethod + def udf( + input_types: list[pa.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[..., ScalarUDF]: ... + + @overload + @staticmethod + def udf( + func: Callable[..., _R], + input_types: list[pa.DataType], + return_type: _R, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> ScalarUDF: ... + + @staticmethod + def udf(*args: Any, **kwargs: Any): # noqa: D417 """Create a new User-Defined Function (UDF). This class can be used both as a **function** and as a **decorator**. @@ -125,7 +145,7 @@ class udf: Args: func (Callable, optional): **Only needed when calling as a function.** Skip this argument when using `udf` as a decorator. - input_types (list[pyarrow.DataType]): The data types of the arguments + input_types (list[pa.DataType]): The data types of the arguments to `func`. This list must be of the same length as the number of arguments. return_type (_R): The data type of the return value from the function. @@ -141,40 +161,28 @@ class udf: ``` def double_func(x): return x * 2 - double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(), + double_udf = udf(double_func, [pa.int32()], pa.int32(), "volatile", "double_it") ``` **Using `udf` as a decorator:** ``` - @udf([pyarrow.int32()], pyarrow.int32(), "volatile", "double_it") + @udf([pa.int32()], pa.int32(), "volatile", "double_it") def double_udf(x): return x * 2 ``` """ - def __new__(cls, *args, **kwargs): - """Create a new UDF. - - Trigger UDF function or decorator depending on if the first args is callable - """ - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return cls._function(*args, **kwargs) - else: - # Case 2: Used as a decorator with parameters - return cls._decorator(*args, **kwargs) - - @staticmethod def _function( func: Callable[..., _R], - input_types: list[pyarrow.DataType], + input_types: list[pa.DataType], return_type: _R, volatility: Volatility | str, name: Optional[str] = None, ) -> ScalarUDF: if not callable(func): - raise TypeError("`func` argument must be callable") + msg = "`func` argument must be callable" + raise TypeError(msg) if name is None: if hasattr(func, "__qualname__"): name = func.__qualname__.lower() @@ -188,49 +196,50 @@ def _function( volatility=volatility, ) - @staticmethod def _decorator( - input_types: list[pyarrow.DataType], + input_types: list[pa.DataType], return_type: _R, volatility: Volatility | str, name: Optional[str] = None, - ): - def decorator(func): + ) -> Callable: + def decorator(func: Callable): udf_caller = ScalarUDF.udf( func, input_types, return_type, volatility, name ) @functools.wraps(func) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any): return udf_caller(*args, **kwargs) return wrapper return decorator + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return _function(*args, **kwargs) + # Case 2: Used as a decorator with parameters + return _decorator(*args, **kwargs) + class Accumulator(metaclass=ABCMeta): """Defines how an :py:class:`AggregateUDF` accumulates values.""" @abstractmethod - def state(self) -> List[pyarrow.Scalar]: + def state(self) -> list[pa.Scalar]: """Return the current state.""" - pass @abstractmethod - def update(self, *values: pyarrow.Array) -> None: + def update(self, *values: pa.Array) -> None: """Evaluate an array of values and update state.""" - pass @abstractmethod - def merge(self, states: List[pyarrow.Array]) -> None: + def merge(self, states: list[pa.Array]) -> None: """Merge a set of states.""" - pass @abstractmethod - def evaluate(self) -> pyarrow.Scalar: + def evaluate(self) -> pa.Scalar: """Return the resultant value.""" - pass class AggregateUDF: @@ -244,9 +253,9 @@ def __init__( self, name: str, accumulator: Callable[[], Accumulator], - input_types: list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], + input_types: list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], volatility: Volatility | str, ) -> None: """Instantiate a user-defined aggregate function (UDAF). @@ -272,7 +281,29 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udaf.__call__(*args_raw)) - class udaf: + @overload + @staticmethod + def udaf( + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[..., AggregateUDF]: ... + + @overload + @staticmethod + def udaf( + accum: Callable[[], Accumulator], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], + volatility: Volatility | str, + name: Optional[str] = None, + ) -> AggregateUDF: ... + + @staticmethod + def udaf(*args: Any, **kwargs: Any): # noqa: D417 """Create a new User-Defined Aggregate Function (UDAF). This class allows you to define an **aggregate function** that can be used in @@ -300,13 +331,13 @@ class Summarize(Accumulator): def __init__(self, bias: float = 0.0): self._sum = pa.scalar(bias) - def state(self) -> List[pa.Scalar]: + def state(self) -> list[pa.Scalar]: return [self._sum] def update(self, values: pa.Array) -> None: self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) - def merge(self, states: List[pa.Array]) -> None: + def merge(self, states: list[pa.Array]) -> None: self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) def evaluate(self) -> pa.Scalar: @@ -344,37 +375,23 @@ def udf4() -> Summarize: aggregation or window function calls. """ - def __new__(cls, *args, **kwargs): - """Create a new UDAF. - - Trigger UDAF function or decorator depending on if the first args is - callable - """ - if args and callable(args[0]): - # Case 1: Used as a function, require the first parameter to be callable - return cls._function(*args, **kwargs) - else: - # Case 2: Used as a decorator with parameters - return cls._decorator(*args, **kwargs) - - @staticmethod def _function( accum: Callable[[], Accumulator], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], volatility: Volatility | str, name: Optional[str] = None, ) -> AggregateUDF: if not callable(accum): - raise TypeError("`func` must be callable.") - if not isinstance(accum.__call__(), Accumulator): - raise TypeError( - "Accumulator must implement the abstract base class Accumulator" - ) + msg = "`func` must be callable." + raise TypeError(msg) + if not isinstance(accum(), Accumulator): + msg = "Accumulator must implement the abstract base class Accumulator" + raise TypeError(msg) if name is None: - name = accum.__call__().__class__.__qualname__.lower() - if isinstance(input_types, pyarrow.DataType): + name = accum().__class__.__qualname__.lower() + if isinstance(input_types, pa.DataType): input_types = [input_types] return AggregateUDF( name=name, @@ -385,29 +402,34 @@ def _function( volatility=volatility, ) - @staticmethod def _decorator( - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, - state_type: list[pyarrow.DataType], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + state_type: list[pa.DataType], volatility: Volatility | str, name: Optional[str] = None, - ): - def decorator(accum: Callable[[], Accumulator]): + ) -> Callable[..., Callable[..., Expr]]: + def decorator(accum: Callable[[], Accumulator]) -> Callable[..., Expr]: udaf_caller = AggregateUDF.udaf( accum, input_types, return_type, state_type, volatility, name ) @functools.wraps(accum) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any) -> Expr: return udaf_caller(*args, **kwargs) return wrapper return decorator + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return _function(*args, **kwargs) + # Case 2: Used as a decorator with parameters + return _decorator(*args, **kwargs) + -class WindowEvaluator(metaclass=ABCMeta): +class WindowEvaluator: """Evaluator class for user-defined window functions (UDWF). It is up to the user to decide which evaluate function is appropriate. @@ -423,7 +445,7 @@ class WindowEvaluator(metaclass=ABCMeta): +------------------------+--------------------------------+------------------+---------------------------+ | True | True/False | True/False | ``evaluate`` | +------------------------+--------------------------------+------------------+---------------------------+ - """ # noqa: W505 + """ # noqa: W505, E501 def memoize(self) -> None: """Perform a memoize operation to improve performance. @@ -436,9 +458,8 @@ def memoize(self) -> None: `memoize` is called after each input batch is processed, and such functions can save whatever they need """ - pass - def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: + def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002 """Return the range for the window fuction. If `uses_window_frame` flag is `false`. This method is used to @@ -460,14 +481,17 @@ def is_causal(self) -> bool: """Get whether evaluator needs future data for its result.""" return False - def evaluate_all(self, values: list[pyarrow.Array], num_rows: int) -> pyarrow.Array: + def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: """Evaluate a window function on an entire input partition. This function is called once per input *partition* for window functions that *do not use* values from the window frame, such as - :py:func:`~datafusion.functions.row_number`, :py:func:`~datafusion.functions.rank`, - :py:func:`~datafusion.functions.dense_rank`, :py:func:`~datafusion.functions.percent_rank`, - :py:func:`~datafusion.functions.cume_dist`, :py:func:`~datafusion.functions.lead`, + :py:func:`~datafusion.functions.row_number`, + :py:func:`~datafusion.functions.rank`, + :py:func:`~datafusion.functions.dense_rank`, + :py:func:`~datafusion.functions.percent_rank`, + :py:func:`~datafusion.functions.cume_dist`, + :py:func:`~datafusion.functions.lead`, and :py:func:`~datafusion.functions.lag`. It produces the result of all rows in a single pass. It @@ -499,12 +523,11 @@ def evaluate_all(self, values: list[pyarrow.Array], num_rows: int) -> pyarrow.Ar .. code-block:: text avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) - """ # noqa: W505 - pass + """ # noqa: W505, E501 def evaluate( - self, values: list[pyarrow.Array], eval_range: tuple[int, int] - ) -> pyarrow.Scalar: + self, values: list[pa.Array], eval_range: tuple[int, int] + ) -> pa.Scalar: """Evaluate window function on a range of rows in an input partition. This is the simplest and most general function to implement @@ -519,11 +542,10 @@ def evaluate( and evaluation results of ORDER BY expressions. If function has a single argument, `values[1..]` will contain ORDER BY expression results. """ - pass def evaluate_all_with_rank( self, num_rows: int, ranks_in_partition: list[tuple[int, int]] - ) -> pyarrow.Array: + ) -> pa.Array: """Called for window functions that only need the rank of a row. Evaluate the partition evaluator against the partition using @@ -552,7 +574,6 @@ def evaluate_all_with_rank( The user must implement this method if ``include_rank`` returns True. """ - pass def supports_bounded_execution(self) -> bool: """Can the window function be incrementally computed using bounded memory?""" @@ -567,10 +588,6 @@ def include_rank(self) -> bool: return False -if TYPE_CHECKING: - _W = TypeVar("_W", bound=WindowEvaluator) - - class WindowUDF: """Class for performing window user-defined functions (UDF). @@ -582,8 +599,8 @@ def __init__( self, name: str, func: Callable[[], WindowEvaluator], - input_types: list[pyarrow.DataType], - return_type: pyarrow.DataType, + input_types: list[pa.DataType], + return_type: pa.DataType, volatility: Volatility | str, ) -> None: """Instantiate a user-defined window function (UDWF). @@ -607,8 +624,8 @@ def __call__(self, *args: Expr) -> Expr: @staticmethod def udwf( func: Callable[[], WindowEvaluator], - input_types: pyarrow.DataType | list[pyarrow.DataType], - return_type: pyarrow.DataType, + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, volatility: Volatility | str, name: Optional[str] = None, ) -> WindowUDF: @@ -648,16 +665,16 @@ def bias_10() -> BiasedNumbers: Returns: A user-defined window function. - """ # noqa W505 + """ # noqa: W505, E501 if not callable(func): - raise TypeError("`func` must be callable.") - if not isinstance(func.__call__(), WindowEvaluator): - raise TypeError( - "`func` must implement the abstract base class WindowEvaluator" - ) + msg = "`func` must be callable." + raise TypeError(msg) + if not isinstance(func(), WindowEvaluator): + msg = "`func` must implement the abstract base class WindowEvaluator" + raise TypeError(msg) if name is None: - name = func.__call__().__class__.__qualname__.lower() - if isinstance(input_types, pyarrow.DataType): + name = func().__class__.__qualname__.lower() + if isinstance(input_types, pa.DataType): input_types = [input_types] return WindowUDF( name=name, @@ -666,3 +683,10 @@ def bias_10() -> BiasedNumbers: return_type=return_type, volatility=volatility, ) + + +# Convenience exports so we can import instead of treating as +# variables at the package root +udf = ScalarUDF.udf +udaf = AggregateUDF.udaf +udwf = WindowUDF.udwf diff --git a/python/tests/generic.py b/python/tests/generic.py index 0177e2df0..1b98fdf9e 100644 --- a/python/tests/generic.py +++ b/python/tests/generic.py @@ -16,6 +16,7 @@ # under the License. import datetime +from datetime import timezone import numpy as np import pyarrow as pa @@ -26,29 +27,29 @@ def data(): - np.random.seed(1) + rng = np.random.default_rng(1) data = np.concatenate( [ - np.random.normal(0, 0.01, size=50), - np.random.normal(50, 0.01, size=50), + rng.normal(0, 0.01, size=50), + rng.normal(50, 0.01, size=50), ] ) return pa.array(data) def data_with_nans(): - np.random.seed(0) - data = np.random.normal(0, 0.01, size=50) - mask = np.random.randint(0, 2, size=50) + rng = np.random.default_rng(0) + data = rng.normal(0, 0.01, size=50) + mask = rng.normal(0, 2, size=50) data[mask == 0] = np.nan return data def data_datetime(f): data = [ - datetime.datetime.now(), - datetime.datetime.now() - datetime.timedelta(days=1), - datetime.datetime.now() + datetime.timedelta(days=1), + datetime.datetime.now(tz=timezone.utc), + datetime.datetime.now(tz=timezone.utc) - datetime.timedelta(days=1), + datetime.datetime.now(tz=timezone.utc) + datetime.timedelta(days=1), ] return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False])) diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 5ef46131b..61b1c7d80 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -66,7 +66,7 @@ def df_aggregate_100(): @pytest.mark.parametrize( - "agg_expr, calc_expected", + ("agg_expr", "calc_expected"), [ (f.avg(column("a")), lambda a, b, c, d: np.array(np.average(a))), ( @@ -114,7 +114,7 @@ def test_aggregation_stats(df, agg_expr, calc_expected): @pytest.mark.parametrize( - "agg_expr, expected, array_sort", + ("agg_expr", "expected", "array_sort"), [ (f.approx_distinct(column("b")), pa.array([2], type=pa.uint64()), False), ( @@ -182,12 +182,11 @@ def test_aggregation(df, agg_expr, expected, array_sort): agg_df.show() result = agg_df.collect()[0] - print(result) assert result.column(0) == expected @pytest.mark.parametrize( - "name,expr,expected", + ("name", "expr", "expected"), [ ( "approx_percentile_cont", @@ -299,7 +298,9 @@ def test_aggregate_100(df_aggregate_100, name, expr, expected): ] -@pytest.mark.parametrize("name,expr,result", data_test_bitwise_and_boolean_functions) +@pytest.mark.parametrize( + ("name", "expr", "result"), data_test_bitwise_and_boolean_functions +) def test_bit_and_bool_fns(df, name, expr, result): df = df.aggregate([], [expr.alias(name)]) @@ -311,7 +312,7 @@ def test_bit_and_bool_fns(df, name, expr, result): @pytest.mark.parametrize( - "name,expr,result", + ("name", "expr", "result"), [ ("first_value", f.first_value(column("a")), [0, 4]), ( @@ -361,7 +362,6 @@ def test_bit_and_bool_fns(df, name, expr, result): ), [8, 9], ), - ("first_value", f.first_value(column("a")), [0, 4]), ( "nth_value_ordered", f.nth_value(column("a"), 2, order_by=[column("a").sort(ascending=False)]), @@ -401,7 +401,7 @@ def test_first_last_value(df_partitioned, name, expr, result) -> None: @pytest.mark.parametrize( - "name,expr,result", + ("name", "expr", "result"), [ ("string_agg", f.string_agg(column("a"), ","), "one,two,three,two"), ("string_agg", f.string_agg(column("b"), ""), "03124"), diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py index 214f6b165..23b328458 100644 --- a/python/tests/test_catalog.py +++ b/python/tests/test_catalog.py @@ -19,6 +19,9 @@ import pytest +# Note we take in `database` as a variable even though we don't use +# it because that will cause the fixture to set up the context with +# the tables we need. def test_basic(ctx, database): with pytest.raises(KeyError): ctx.catalog("non-existent") @@ -26,10 +29,10 @@ def test_basic(ctx, database): default = ctx.catalog() assert default.names() == ["public"] - for database in [default.database("public"), default.database()]: - assert database.names() == {"csv1", "csv", "csv2"} + for db in [default.database("public"), default.database()]: + assert db.names() == {"csv1", "csv", "csv2"} - table = database.table("csv") + table = db.table("csv") assert table.kind == "physical" assert table.schema == pa.schema( [ diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 91046e6b8..7a0a7aa08 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -16,7 +16,6 @@ # under the License. import datetime as dt import gzip -import os import pathlib import pyarrow as pa @@ -45,7 +44,7 @@ def test_create_context_runtime_config_only(): SessionContext(runtime=RuntimeEnvBuilder()) -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_runtime_configs(tmp_path, path_to_str): path1 = tmp_path / "dir1" path2 = tmp_path / "dir2" @@ -62,7 +61,7 @@ def test_runtime_configs(tmp_path, path_to_str): assert db is not None -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_temporary_files(tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -79,14 +78,14 @@ def test_create_context_with_all_valid_args(): runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() - .with_create_default_catalog_and_schema(True) + .with_create_default_catalog_and_schema(enabled=True) .with_default_catalog_and_schema("foo", "bar") .with_target_partitions(1) - .with_information_schema(True) - .with_repartition_joins(False) - .with_repartition_aggregations(False) - .with_repartition_windows(False) - .with_parquet_pruning(False) + .with_information_schema(enabled=True) + .with_repartition_joins(enabled=False) + .with_repartition_aggregations(enabled=False) + .with_repartition_windows(enabled=False) + .with_parquet_pruning(enabled=False) ) ctx = SessionContext(config, runtime) @@ -167,7 +166,7 @@ def test_from_arrow_table(ctx): def record_batch_generator(num_batches: int): schema = pa.schema([("a", pa.int64()), ("b", pa.int64())]) - for i in range(num_batches): + for _i in range(num_batches): yield pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 5, 6])], schema=schema ) @@ -492,10 +491,10 @@ def test_table_not_found(ctx): def test_read_json(ctx): - path = os.path.dirname(os.path.abspath(__file__)) + path = pathlib.Path(__file__).parent.resolve() # Default - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = path / "data_test_context" / "data.json" df = ctx.read_json(test_data_path) result = df.collect() @@ -515,7 +514,7 @@ def test_read_json(ctx): assert result[0].schema == schema # File extension - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = path / "data_test_context" / "data.json" df = ctx.read_json(test_data_path, file_extension=".json") result = df.collect() @@ -524,15 +523,17 @@ def test_read_json(ctx): def test_read_json_compressed(ctx, tmp_path): - path = os.path.dirname(os.path.abspath(__file__)) - test_data_path = os.path.join(path, "data_test_context", "data.json") + path = pathlib.Path(__file__).parent.resolve() + test_data_path = path / "data_test_context" / "data.json" # File compression type gzip_path = tmp_path / "data.json.gz" - with open(test_data_path, "rb") as csv_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(csv_file) + with ( + pathlib.Path.open(test_data_path, "rb") as csv_file, + gzip.open(gzip_path, "wb") as gzipped_file, + ): + gzipped_file.writelines(csv_file) df = ctx.read_json(gzip_path, file_extension=".gz", file_compression_type="gz") result = df.collect() @@ -563,14 +564,16 @@ def test_read_csv_list(ctx): def test_read_csv_compressed(ctx, tmp_path): - test_data_path = "testing/data/csv/aggregate_test_100.csv" + test_data_path = pathlib.Path("testing/data/csv/aggregate_test_100.csv") # File compression type gzip_path = tmp_path / "aggregate_test_100.csv.gz" - with open(test_data_path, "rb") as csv_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(csv_file) + with ( + pathlib.Path.open(test_data_path, "rb") as csv_file, + gzip.open(gzip_path, "wb") as gzipped_file, + ): + gzipped_file.writelines(csv_file) csv_df = ctx.read_csv(gzip_path, file_extension=".gz", file_compression_type="gz") csv_df.select(column("c1")).show() @@ -603,7 +606,7 @@ def test_create_sql_options(): def test_sql_with_options_no_ddl(ctx): sql = "CREATE TABLE IF NOT EXISTS valuetable AS VALUES(1,'HELLO'),(12,'DATAFUSION')" ctx.sql(sql) - options = SQLOptions().with_allow_ddl(False) + options = SQLOptions().with_allow_ddl(allow=False) with pytest.raises(Exception, match="DDL"): ctx.sql_with_options(sql, options=options) @@ -618,7 +621,7 @@ def test_sql_with_options_no_dml(ctx): ctx.register_dataset(table_name, dataset) sql = f'INSERT INTO "{table_name}" VALUES (1, 2), (2, 3);' ctx.sql(sql) - options = SQLOptions().with_allow_dml(False) + options = SQLOptions().with_allow_dml(allow=False) with pytest.raises(Exception, match="DML"): ctx.sql_with_options(sql, options=options) @@ -626,6 +629,6 @@ def test_sql_with_options_no_dml(ctx): def test_sql_with_options_no_statements(ctx): sql = "SET time zone = 1;" ctx.sql(sql) - options = SQLOptions().with_allow_statements(False) + options = SQLOptions().with_allow_statements(allow=False) with pytest.raises(Exception, match="SetVariable"): ctx.sql_with_options(sql, options=options) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index c636e896a..d084f12dd 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -339,7 +339,7 @@ def test_join(): # Verify we don't make a breaking change to pre-43.0.0 # where users would pass join_keys as a positional argument - df2 = df.join(df1, (["a"], ["a"]), how="inner") # type: ignore + df2 = df.join(df1, (["a"], ["a"]), how="inner") df2.show() df2 = df2.sort(column("l.a")) table = pa.Table.from_batches(df2.collect()) @@ -375,17 +375,17 @@ def test_join_invalid_params(): with pytest.raises( ValueError, match=r"`left_on` or `right_on` should not provided with `on`" ): - df2 = df.join(df1, on="a", how="inner", right_on="test") # type: ignore + df2 = df.join(df1, on="a", how="inner", right_on="test") with pytest.raises( ValueError, match=r"`left_on` and `right_on` should both be provided." ): - df2 = df.join(df1, left_on="a", how="inner") # type: ignore + df2 = df.join(df1, left_on="a", how="inner") with pytest.raises( ValueError, match=r"either `on` or `left_on` and `right_on` should be provided." ): - df2 = df.join(df1, how="inner") # type: ignore + df2 = df.join(df1, how="inner") def test_join_on(): @@ -567,7 +567,7 @@ def test_distinct(): ] -@pytest.mark.parametrize("name,expr,result", data_test_window_functions) +@pytest.mark.parametrize(("name", "expr", "result"), data_test_window_functions) def test_window_functions(partitioned_df, name, expr, result): df = partitioned_df.select( column("a"), column("b"), column("c"), f.alias(expr, name) @@ -731,7 +731,7 @@ def test_execution_plan(aggregate_df): plan = aggregate_df.execution_plan() expected = ( - "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n" # noqa: E501 + "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n" ) assert expected == plan.display() @@ -756,7 +756,7 @@ def test_execution_plan(aggregate_df): ctx = SessionContext() rows_returned = 0 - for idx in range(0, plan.partition_count): + for idx in range(plan.partition_count): stream = ctx.execute(plan, idx) try: batch = stream.next() @@ -885,7 +885,7 @@ def test_union_distinct(ctx): ) df_c = ctx.create_dataframe([[batch]]).sort(column("a")) - df_a_u_b = df_a.union(df_b, True).sort(column("a")) + df_a_u_b = df_a.union(df_b, distinct=True).sort(column("a")) assert df_c.collect() == df_a_u_b.collect() assert df_c.collect() == df_a_u_b.collect() @@ -954,8 +954,6 @@ def test_to_arrow_table(df): def test_execute_stream(df): stream = df.execute_stream() - for s in stream: - print(type(s)) assert all(batch is not None for batch in stream) assert not list(stream) # after one iteration the generator must be exhausted @@ -969,7 +967,7 @@ def test_execute_stream_to_arrow_table(df, schema): (batch.to_pyarrow() for batch in stream), schema=df.schema() ) else: - pyarrow_table = pa.Table.from_batches((batch.to_pyarrow() for batch in stream)) + pyarrow_table = pa.Table.from_batches(batch.to_pyarrow() for batch in stream) assert isinstance(pyarrow_table, pa.Table) assert pyarrow_table.shape == (3, 3) @@ -1033,7 +1031,7 @@ def test_describe(df): } -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_write_csv(ctx, df, tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -1046,7 +1044,7 @@ def test_write_csv(ctx, df, tmp_path, path_to_str): assert result == expected -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_write_json(ctx, df, tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -1059,7 +1057,7 @@ def test_write_json(ctx, df, tmp_path, path_to_str): assert result == expected -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_write_parquet(df, tmp_path, path_to_str): path = str(tmp_path) if path_to_str else tmp_path @@ -1071,7 +1069,7 @@ def test_write_parquet(df, tmp_path, path_to_str): @pytest.mark.parametrize( - "compression, compression_level", + ("compression", "compression_level"), [("gzip", 6), ("brotli", 7), ("zstd", 15)], ) def test_write_compressed_parquet(df, tmp_path, compression, compression_level): @@ -1082,7 +1080,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level): ) # test that the actual compression scheme is the one written - for root, dirs, files in os.walk(path): + for _root, _dirs, files in os.walk(path): for file in files: if file.endswith(".parquet"): metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict() @@ -1097,7 +1095,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level): @pytest.mark.parametrize( - "compression, compression_level", + ("compression", "compression_level"), [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)], ) def test_write_compressed_parquet_wrong_compression_level( @@ -1152,7 +1150,7 @@ def test_dataframe_export(df) -> None: table = pa.table(df, schema=desired_schema) assert table.num_columns == 1 assert table.num_rows == 3 - for i in range(0, 3): + for i in range(3): assert table[0][i].as_py() is None # Expect an error when we cannot convert schema @@ -1186,8 +1184,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: result = df.to_pydict() assert result["a"] == [1, 2, 3] - assert result["string_col"] == ["string data" for _i in range(0, 3)] - assert result["new_col"] == [3 for _i in range(0, 3)] + assert result["string_col"] == ["string data" for _i in range(3)] + assert result["new_col"] == [3 for _i in range(3)] def test_dataframe_repr_html(df) -> None: diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 354c7e180..926e69845 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -85,18 +85,14 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) - # TODO: Upstream now has expressions for skip and fetch - # REF: https://github.com/apache/datafusion/pull/12836 - # assert plan.skip() == 0 + assert "Skip: None" in str(plan) df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") plan = df.logical_plan() plan = plan.to_variant() assert isinstance(plan, Limit) - # TODO: Upstream now has expressions for skip and fetch - # REF: https://github.com/apache/datafusion/pull/12836 - # assert plan.skip() == 5 + assert "Skip: Some(Literal(Int64(5)))" in str(plan) def test_aggregate_query(test_ctx): @@ -165,6 +161,7 @@ def traverse_logical_plan(plan): res = traverse_logical_plan(input_plan) if res is not None: return res + return None ctx = SessionContext() data = {"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]} @@ -176,7 +173,7 @@ def traverse_logical_plan(plan): assert variant.expr().to_variant().qualified_name() == "table1.name" assert ( str(variant.list()) - == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]' + == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]' # noqa: E501 ) assert not variant.negated() diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index fca05bb8f..ed88a16e3 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import math -from datetime import datetime +from datetime import datetime, timezone import numpy as np import pyarrow as pa @@ -25,6 +25,8 @@ np.seterr(invalid="ignore") +DEFAULT_TZ = timezone.utc + @pytest.fixture def df(): @@ -37,9 +39,9 @@ def df(): pa.array(["hello ", " world ", " !"], type=pa.string_view()), pa.array( [ - datetime(2022, 12, 31), - datetime(2027, 6, 26), - datetime(2020, 7, 2), + datetime(2022, 12, 31, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 26, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 2, tzinfo=DEFAULT_TZ), ] ), pa.array([False, True, True]), @@ -221,12 +223,12 @@ def py_indexof(arr, v): def py_arr_remove(arr, v, n=None): new_arr = arr[:] found = 0 - while found != n: - try: + try: + while found != n: new_arr.remove(v) found += 1 - except ValueError: - break + except ValueError: + pass return new_arr @@ -234,13 +236,13 @@ def py_arr_remove(arr, v, n=None): def py_arr_replace(arr, from_, to, n=None): new_arr = arr[:] found = 0 - while found != n: - try: + try: + while found != n: idx = new_arr.index(from_) new_arr[idx] = to found += 1 - except ValueError: - break + except ValueError: + pass return new_arr @@ -268,266 +270,266 @@ def py_flatten(arr): @pytest.mark.parametrize( ("stmt", "py_expr"), [ - [ + ( lambda col: f.array_append(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_push_back(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_append(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_push_back(col, literal(99.0)), lambda data: [np.append(arr, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_concat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.array_cat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.list_cat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.list_concat(col, col), lambda data: [np.concatenate([arr, arr]) for arr in data], - ], - [ + ), + ( lambda col: f.array_dims(col), lambda data: [[len(r)] for r in data], - ], - [ + ), + ( lambda col: f.array_distinct(col), lambda data: [list(set(r)) for r in data], - ], - [ + ), + ( lambda col: f.list_distinct(col), lambda data: [list(set(r)) for r in data], - ], - [ + ), + ( lambda col: f.list_dims(col), lambda data: [[len(r)] for r in data], - ], - [ + ), + ( lambda col: f.array_element(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.array_empty(col), lambda data: [len(r) == 0 for r in data], - ], - [ + ), + ( lambda col: f.empty(col), lambda data: [len(r) == 0 for r in data], - ], - [ + ), + ( lambda col: f.array_extract(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.list_element(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.list_extract(col, literal(1)), lambda data: [r[0] for r in data], - ], - [ + ), + ( lambda col: f.array_length(col), lambda data: [len(r) for r in data], - ], - [ + ), + ( lambda col: f.list_length(col), lambda data: [len(r) for r in data], - ], - [ + ), + ( lambda col: f.array_has(col, literal(1.0)), lambda data: [1.0 in r for r in data], - ], - [ + ), + ( lambda col: f.array_has_all( col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]]) ), lambda data: [np.all([v in r for v in [1.0, 3.0, 5.0]]) for r in data], - ], - [ + ), + ( lambda col: f.array_has_any( col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]]) ), lambda data: [np.any([v in r for v in [1.0, 3.0, 5.0]]) for r in data], - ], - [ + ), + ( lambda col: f.array_position(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.array_indexof(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.list_position(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.list_indexof(col, literal(1.0)), lambda data: [py_indexof(r, 1.0) for r in data], - ], - [ + ), + ( lambda col: f.array_positions(col, literal(1.0)), lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data], - ], - [ + ), + ( lambda col: f.list_positions(col, literal(1.0)), lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data], - ], - [ + ), + ( lambda col: f.array_ndims(col), lambda data: [np.array(r).ndim for r in data], - ], - [ + ), + ( lambda col: f.list_ndims(col), lambda data: [np.array(r).ndim for r in data], - ], - [ + ), + ( lambda col: f.array_prepend(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_push_front(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_prepend(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_push_front(literal(99.0), col), lambda data: [np.insert(arr, 0, 99.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_pop_back(col), lambda data: [arr[:-1] for arr in data], - ], - [ + ), + ( lambda col: f.array_pop_front(col), lambda data: [arr[1:] for arr in data], - ], - [ + ), + ( lambda col: f.array_remove(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.list_remove(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.array_remove_n(col, literal(3.0), literal(2)), lambda data: [py_arr_remove(arr, 3.0, 2) for arr in data], - ], - [ + ), + ( lambda col: f.list_remove_n(col, literal(3.0), literal(2)), lambda data: [py_arr_remove(arr, 3.0, 2) for arr in data], - ], - [ + ), + ( lambda col: f.array_remove_all(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_remove_all(col, literal(3.0)), lambda data: [py_arr_remove(arr, 3.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_repeat(col, literal(2)), lambda data: [[arr] * 2 for arr in data], - ], - [ + ), + ( lambda col: f.list_repeat(col, literal(2)), lambda data: [[arr] * 2 for arr in data], - ], - [ + ), + ( lambda col: f.array_replace(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.list_replace(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.array_replace_n(col, literal(3.0), literal(4.0), literal(1)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data], - ], - [ + ), + ( lambda col: f.list_replace_n(col, literal(3.0), literal(4.0), literal(2)), lambda data: [py_arr_replace(arr, 3.0, 4.0, 2) for arr in data], - ], - [ + ), + ( lambda col: f.array_replace_all(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_replace_all(col, literal(3.0), literal(4.0)), lambda data: [py_arr_replace(arr, 3.0, 4.0) for arr in data], - ], - [ + ), + ( lambda col: f.array_sort(col, descending=True, null_first=True), lambda data: [np.sort(arr)[::-1] for arr in data], - ], - [ + ), + ( lambda col: f.list_sort(col, descending=False, null_first=False), lambda data: [np.sort(arr) for arr in data], - ], - [ + ), + ( lambda col: f.array_slice(col, literal(2), literal(4)), lambda data: [arr[1:4] for arr in data], - ], + ), pytest.param( lambda col: f.list_slice(col, literal(-1), literal(2)), lambda data: [arr[-1:2] for arr in data], ), - [ + ( lambda col: f.array_intersect(col, literal([3.0, 4.0])), lambda data: [np.intersect1d(arr, [3.0, 4.0]) for arr in data], - ], - [ + ), + ( lambda col: f.list_intersect(col, literal([3.0, 4.0])), lambda data: [np.intersect1d(arr, [3.0, 4.0]) for arr in data], - ], - [ + ), + ( lambda col: f.array_union(col, literal([12.0, 999.0])), lambda data: [np.union1d(arr, [12.0, 999.0]) for arr in data], - ], - [ + ), + ( lambda col: f.list_union(col, literal([12.0, 999.0])), lambda data: [np.union1d(arr, [12.0, 999.0]) for arr in data], - ], - [ + ), + ( lambda col: f.array_except(col, literal([3.0])), lambda data: [np.setdiff1d(arr, [3.0]) for arr in data], - ], - [ + ), + ( lambda col: f.list_except(col, literal([3.0])), lambda data: [np.setdiff1d(arr, [3.0]) for arr in data], - ], - [ + ), + ( lambda col: f.array_resize(col, literal(10), literal(0.0)), lambda data: [py_arr_resize(arr, 10, 0.0) for arr in data], - ], - [ + ), + ( lambda col: f.list_resize(col, literal(10), literal(0.0)), lambda data: [py_arr_resize(arr, 10, 0.0) for arr in data], - ], - [ + ), + ( lambda col: f.range(literal(1), literal(5), literal(2)), lambda data: [np.arange(1, 5, 2)], - ], + ), ], ) def test_array_functions(stmt, py_expr): @@ -611,22 +613,22 @@ def test_make_array_functions(make_func): @pytest.mark.parametrize( ("stmt", "py_expr"), [ - [ + ( f.array_to_string(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], - [ + ), + ( f.array_join(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], - [ + ), + ( f.list_to_string(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], - [ + ), + ( f.list_join(column("arr"), literal(",")), lambda data: [",".join([str(int(v)) for v in r]) for r in data], - ], + ), ], ) def test_array_function_obj_tests(stmt, py_expr): @@ -640,7 +642,7 @@ def test_array_function_obj_tests(stmt, py_expr): @pytest.mark.parametrize( - "function, expected_result", + ("function", "expected_result"), [ ( f.ascii(column("a")), @@ -894,54 +896,72 @@ def test_temporal_functions(df): assert result.column(0) == pa.array([12, 6, 7], type=pa.int32()) assert result.column(1) == pa.array([2022, 2027, 2020], type=pa.int32()) assert result.column(2) == pa.array( - [datetime(2022, 12, 1), datetime(2027, 6, 1), datetime(2020, 7, 1)], - type=pa.timestamp("us"), + [ + datetime(2022, 12, 1, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 1, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 1, tzinfo=DEFAULT_TZ), + ], + type=pa.timestamp("ns", tz=DEFAULT_TZ), ) assert result.column(3) == pa.array( - [datetime(2022, 12, 31), datetime(2027, 6, 26), datetime(2020, 7, 2)], - type=pa.timestamp("us"), + [ + datetime(2022, 12, 31, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 26, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 2, tzinfo=DEFAULT_TZ), + ], + type=pa.timestamp("ns", tz=DEFAULT_TZ), ) assert result.column(4) == pa.array( [ - datetime(2022, 12, 30, 23, 47, 30), - datetime(2027, 6, 25, 23, 47, 30), - datetime(2020, 7, 1, 23, 47, 30), + datetime(2022, 12, 30, 23, 47, 30, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 25, 23, 47, 30, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 1, 23, 47, 30, tzinfo=DEFAULT_TZ), ], - type=pa.timestamp("ns"), + type=pa.timestamp("ns", tz=DEFAULT_TZ), ) assert result.column(5) == pa.array( - [datetime(2023, 1, 10, 20, 52, 54)] * 3, type=pa.timestamp("s") + [datetime(2023, 1, 10, 20, 52, 54, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("s"), ) assert result.column(6) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) assert result.column(7) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + [datetime(2023, 9, 7, 5, 6, 14, tzinfo=DEFAULT_TZ)] * 3, type=pa.timestamp("s") ) assert result.column(8) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + [datetime(2023, 9, 7, 5, 6, 14, 523000, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ms"), ) assert result.column(9) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("us"), ) assert result.column(10) == pa.array([31, 26, 2], type=pa.int32()) assert result.column(11) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) assert result.column(12) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s") + [datetime(2023, 9, 7, 5, 6, 14, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("s"), ) assert result.column(13) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms") + [datetime(2023, 9, 7, 5, 6, 14, 523000, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ms"), ) assert result.column(14) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("us"), ) assert result.column(15) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) assert result.column(16) == pa.array( - [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns") + [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, + type=pa.timestamp("ns"), ) @@ -1057,7 +1077,7 @@ def test_regr_funcs_sql_2(): @pytest.mark.parametrize( - "func, expected", + ("func", "expected"), [ pytest.param(f.regr_slope(column("c2"), column("c1")), [4.6], id="regr_slope"), pytest.param( @@ -1160,7 +1180,7 @@ def test_binary_string_functions(df): @pytest.mark.parametrize( - "python_datatype, name, expected", + ("python_datatype", "name", "expected"), [ pytest.param(bool, "e", pa.bool_(), id="bool"), pytest.param(int, "b", pa.int64(), id="int"), @@ -1179,7 +1199,7 @@ def test_cast(df, python_datatype, name: str, expected): @pytest.mark.parametrize( - "negated, low, high, expected", + ("negated", "low", "high", "expected"), [ pytest.param(False, 3, 5, {"filtered": [4, 5]}), pytest.param(False, 4, 5, {"filtered": [4, 5]}), diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py index 0c155cbde..9ef7ed89a 100644 --- a/python/tests/test_imports.py +++ b/python/tests/test_imports.py @@ -169,14 +169,15 @@ def test_class_module_is_datafusion(): def test_import_from_functions_submodule(): - from datafusion.functions import abs, sin # noqa + from datafusion.functions import abs as df_abs + from datafusion.functions import sin - assert functions.abs is abs + assert functions.abs is df_abs assert functions.sin is sin msg = "cannot import name 'foobar' from 'datafusion.functions'" with pytest.raises(ImportError, match=msg): - from datafusion.functions import foobar # noqa + from datafusion.functions import foobar # noqa: F401 def test_classes_are_inheritable(): diff --git a/python/tests/test_input.py b/python/tests/test_input.py index 806471357..4663f6148 100644 --- a/python/tests/test_input.py +++ b/python/tests/test_input.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import os +import pathlib from datafusion.input.location import LocationInputPlugin @@ -23,10 +23,10 @@ def test_location_input(): location_input = LocationInputPlugin() - cwd = os.getcwd() - input_file = cwd + "/testing/data/parquet/generated_simple_numerics/blogs.parquet" + cwd = pathlib.Path.cwd() + input_file = cwd / "testing/data/parquet/generated_simple_numerics/blogs.parquet" table_name = "blog" - tbl = location_input.build_table(input_file, table_name) - assert "blog" == tbl.name - assert 3 == len(tbl.columns) + tbl = location_input.build_table(str(input_file), table_name) + assert tbl.name == "blog" + assert len(tbl.columns) == 3 assert "blogs.parquet" in tbl.filepaths[0] diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 21ad188ee..7ca509689 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -14,8 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import os -import pathlib +from pathlib import Path import pyarrow as pa from datafusion import column @@ -23,10 +22,10 @@ def test_read_json_global_ctx(ctx): - path = os.path.dirname(os.path.abspath(__file__)) + path = Path(__file__).parent.resolve() # Default - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = Path(path) / "data_test_context" / "data.json" df = read_json(test_data_path) result = df.collect() @@ -46,7 +45,7 @@ def test_read_json_global_ctx(ctx): assert result[0].schema == schema # File extension - test_data_path = os.path.join(path, "data_test_context", "data.json") + test_data_path = Path(path) / "data_test_context" / "data.json" df = read_json(test_data_path, file_extension=".json") result = df.collect() @@ -59,7 +58,7 @@ def test_read_parquet_global(): parquet_df.show() assert parquet_df is not None - path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet" + path = Path.cwd() / "parquet/data/alltypes_plain.parquet" parquet_df = read_parquet(path=path) assert parquet_df is not None @@ -90,6 +89,6 @@ def test_read_avro(): avro_df.show() assert avro_df is not None - path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro" + path = Path.cwd() / "testing/data/avro/alltypes_plain.avro" avro_df = read_avro(path=path) assert avro_df is not None diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 862f745bf..b6348e3a0 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import gzip -import os +from pathlib import Path import numpy as np import pyarrow as pa @@ -47,9 +47,8 @@ def test_register_csv(ctx, tmp_path): ) write_csv(table, path) - with open(path, "rb") as csv_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(csv_file) + with Path.open(path, "rb") as csv_file, gzip.open(gzip_path, "wb") as gzipped_file: + gzipped_file.writelines(csv_file) ctx.register_csv("csv", path) ctx.register_csv("csv1", str(path)) @@ -158,7 +157,7 @@ def test_register_parquet(ctx, tmp_path): assert result.to_pydict() == {"cnt": [100]} -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_register_parquet_partitioned(ctx, tmp_path, path_to_str): dir_root = tmp_path / "dataset_parquet_partitioned" dir_root.mkdir(exist_ok=False) @@ -194,7 +193,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str): assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1} -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_register_dataset(ctx, tmp_path, path_to_str): path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) path = str(path) if path_to_str else path @@ -209,13 +208,15 @@ def test_register_dataset(ctx, tmp_path, path_to_str): def test_register_json(ctx, tmp_path): - path = os.path.dirname(os.path.abspath(__file__)) - test_data_path = os.path.join(path, "data_test_context", "data.json") + path = Path(__file__).parent.resolve() + test_data_path = Path(path) / "data_test_context" / "data.json" gzip_path = tmp_path / "data.json.gz" - with open(test_data_path, "rb") as json_file: - with gzip.open(gzip_path, "wb") as gzipped_file: - gzipped_file.writelines(json_file) + with ( + Path.open(test_data_path, "rb") as json_file, + gzip.open(gzip_path, "wb") as gzipped_file, + ): + gzipped_file.writelines(json_file) ctx.register_json("json", test_data_path) ctx.register_json("json1", str(test_data_path)) @@ -470,16 +471,18 @@ def test_simple_select(ctx, tmp_path, arr): # In DF 43.0.0 we now default to having BinaryView and StringView # so the array that is saved to the parquet is slightly different # than the array read. Convert to values for comparison. - if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray): + if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)): arr = arr.tolist() result = result.tolist() np.testing.assert_equal(result, arr) -@pytest.mark.parametrize("file_sort_order", (None, [[col("int").sort(True, True)]])) -@pytest.mark.parametrize("pass_schema", (True, False)) -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize( + "file_sort_order", [None, [[col("int").sort(ascending=True, nulls_first=True)]]] +) +@pytest.mark.parametrize("pass_schema", [True, False]) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_register_listing_table( ctx, tmp_path, pass_schema, file_sort_order, path_to_str ): @@ -528,7 +531,7 @@ def test_register_listing_table( assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2} result = ctx.sql( - "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp" + "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp" # noqa: E501 ).collect() result = pa.Table.from_batches(result) diff --git a/python/tests/test_store.py b/python/tests/test_store.py index 53ffc3acf..ac9af98f3 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import os +from pathlib import Path import pytest from datafusion import SessionContext @@ -23,17 +23,16 @@ @pytest.fixture def ctx(): - ctx = SessionContext() - return ctx + return SessionContext() def test_read_parquet(ctx): ctx.register_parquet( "test", - f"file://{os.getcwd()}/parquet/data/alltypes_plain.parquet", - [], - True, - ".parquet", + f"file://{Path.cwd()}/parquet/data/alltypes_plain.parquet", + table_partition_cols=[], + parquet_pruning=True, + file_extension=".parquet", ) df = ctx.sql("SELECT * FROM test") assert isinstance(df.collect(), list) diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index feada7cde..f367a447d 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -50,7 +50,7 @@ def test_substrait_serialization(ctx): substrait_plan = ss.Producer.to_substrait_plan(df.logical_plan(), ctx) -@pytest.mark.parametrize("path_to_str", (True, False)) +@pytest.mark.parametrize("path_to_str", [True, False]) def test_substrait_file_serialization(ctx, tmp_path, path_to_str): batch = pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 5, 6])], diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 97cf81f3c..453ff6f4f 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -17,8 +17,6 @@ from __future__ import annotations -from typing import List - import pyarrow as pa import pyarrow.compute as pc import pytest @@ -31,7 +29,7 @@ class Summarize(Accumulator): def __init__(self, initial_value: float = 0.0): self._sum = pa.scalar(initial_value) - def state(self) -> List[pa.Scalar]: + def state(self) -> list[pa.Scalar]: return [self._sum] def update(self, values: pa.Array) -> None: @@ -39,7 +37,7 @@ def update(self, values: pa.Array) -> None: # This breaks on `None` self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) - def merge(self, states: List[pa.Array]) -> None: + def merge(self, states: list[pa.Array]) -> None: # Not nice since pyarrow scalars can't be summed yet. # This breaks on `None` self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py()) @@ -56,7 +54,7 @@ class MissingMethods(Accumulator): def __init__(self): self._sum = pa.scalar(0) - def state(self) -> List[pa.Scalar]: + def state(self) -> list[pa.Scalar]: return [self._sum] @@ -86,7 +84,7 @@ def test_errors(df): "evaluate, merge, update)" ) with pytest.raises(Exception, match=msg): - accum = udaf( # noqa F841 + accum = udaf( # noqa: F841 MissingMethods, pa.int64(), pa.int64(), diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 2fea34aa3..3d6dcf9d8 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -298,7 +298,7 @@ def test_udwf_errors(df): ] -@pytest.mark.parametrize("name,expr,expected", data_test_udwf_functions) +@pytest.mark.parametrize(("name", "expr", "expected"), data_test_udwf_functions) def test_udwf_functions(df, name, expr, expected): df = df.select("a", "b", f.round(expr, lit(3)).alias(name)) diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index ac064ba95..d7f6f6e35 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -19,6 +19,7 @@ import datafusion.functions import datafusion.object_store import datafusion.substrait +import pytest # EnumType introduced in 3.11. 3.10 and prior it was called EnumMeta. try: @@ -41,10 +42,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None: internal_attr = getattr(internal_obj, attr) wrapped_attr = getattr(wrapped_obj, attr) - if internal_attr is not None: - if wrapped_attr is None: - print("Missing attribute: ", attr) - assert False + if internal_attr is not None and wrapped_attr is None: + pytest.fail(f"Missing attribute: {attr}") if attr in ["__self__", "__class__"]: continue From 3dcf7c7e5c0af0eb3c5e3bdf9c6e33fd4541b070 Mon Sep 17 00:00:00 2001 From: jsai28 <54253219+jsai28@users.noreply.github.com> Date: Thu, 13 Mar 2025 04:09:03 -0600 Subject: [PATCH 021/206] feat/making global context accessible for users (#1060) * Rename _global_ctx to global_ctx * Add global context to python wrapper code * Update context.py * singleton for global context * formatting * remove udf from import * remove _global_instance * formatting * formatting * unnecessary test * fix test_io.py * ran ruff * ran ruff format --- python/datafusion/context.py | 12 +++++++ python/datafusion/io.py | 63 ++++++++++++++++-------------------- python/tests/test_context.py | 18 +++++++++++ src/context.rs | 2 +- 4 files changed, 58 insertions(+), 37 deletions(-) diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 0ab1a908a..58ad9a943 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -496,6 +496,18 @@ def __init__( self.ctx = SessionContextInternal(config, runtime) + @classmethod + def global_ctx(cls) -> SessionContext: + """Retrieve the global context as a `SessionContext` wrapper. + + Returns: + A `SessionContext` object that wraps the global `SessionContextInternal`. + """ + internal_ctx = SessionContextInternal.global_ctx() + wrapper = cls() + wrapper.ctx = internal_ctx + return wrapper + def enable_url_table(self) -> SessionContext: """Control if local files can be queried as tables. diff --git a/python/datafusion/io.py b/python/datafusion/io.py index 3e39703e3..ef5ebf96f 100644 --- a/python/datafusion/io.py +++ b/python/datafusion/io.py @@ -21,10 +21,9 @@ from typing import TYPE_CHECKING +from datafusion.context import SessionContext from datafusion.dataframe import DataFrame -from ._internal import SessionContext as SessionContextInternal - if TYPE_CHECKING: import pathlib @@ -68,16 +67,14 @@ def read_parquet( """ if table_partition_cols is None: table_partition_cols = [] - return DataFrame( - SessionContextInternal._global_ctx().read_parquet( - str(path), - table_partition_cols, - parquet_pruning, - file_extension, - skip_metadata, - schema, - file_sort_order, - ) + return SessionContext.global_ctx().read_parquet( + str(path), + table_partition_cols, + parquet_pruning, + file_extension, + skip_metadata, + schema, + file_sort_order, ) @@ -110,15 +107,13 @@ def read_json( """ if table_partition_cols is None: table_partition_cols = [] - return DataFrame( - SessionContextInternal._global_ctx().read_json( - str(path), - schema, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) + return SessionContext.global_ctx().read_json( + str(path), + schema, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, ) @@ -161,17 +156,15 @@ def read_csv( path = [str(p) for p in path] if isinstance(path, list) else str(path) - return DataFrame( - SessionContextInternal._global_ctx().read_csv( - path, - schema, - has_header, - delimiter, - schema_infer_max_records, - file_extension, - table_partition_cols, - file_compression_type, - ) + return SessionContext.global_ctx().read_csv( + path, + schema, + has_header, + delimiter, + schema_infer_max_records, + file_extension, + table_partition_cols, + file_compression_type, ) @@ -198,8 +191,6 @@ def read_avro( """ if file_partition_cols is None: file_partition_cols = [] - return DataFrame( - SessionContextInternal._global_ctx().read_avro( - str(path), schema, file_partition_cols, file_extension - ) + return SessionContext.global_ctx().read_avro( + str(path), schema, file_partition_cols, file_extension ) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 7a0a7aa08..4a15ac9cf 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -632,3 +632,21 @@ def test_sql_with_options_no_statements(ctx): options = SQLOptions().with_allow_statements(allow=False) with pytest.raises(Exception, match="SetVariable"): ctx.sql_with_options(sql, options=options) + + +@pytest.fixture +def batch(): + return pa.RecordBatch.from_arrays( + [pa.array([4, 5, 6])], + names=["a"], + ) + + +def test_create_dataframe_with_global_ctx(batch): + ctx = SessionContext.global_ctx() + + df = ctx.create_dataframe([[batch]]) + + result = df.collect()[0].column(0) + + assert result == pa.array([4, 5, 6]) diff --git a/src/context.rs b/src/context.rs index 9ba87eb8a..0db0f4d7e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -308,7 +308,7 @@ impl PySessionContext { #[classmethod] #[pyo3(signature = ())] - fn _global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { + fn global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { Ok(Self { ctx: get_global_ctx().clone(), }) From 55141bad7c2270c14742e962d8bab1d4f1be27f5 Mon Sep 17 00:00:00 2001 From: Spaarsh <67336892+Spaarsh@users.noreply.github.com> Date: Fri, 14 Mar 2025 18:26:31 +0530 Subject: [PATCH 022/206] Renaming Internal Structs (#1059) * Renamed Expr to RawExpr * Fixed CI test for exported classes to include RawExpr as well * Fixed CI test for exported classes to check if Expr class covers RawExpr * Generalized Raw* class checking * fixes * fixes * fixed the CI test to not look for Raw classes in the datafusion module * Add additional text to unit test describing operation and ensure wrapped Raw classes are checked * New ruff rule on main * Resolve ruff errors --------- Co-authored-by: Tim Saucer --- python/datafusion/expr.py | 8 ++-- python/tests/test_wrapper_coverage.py | 55 +++++++++++++++++++-------- src/expr.rs | 2 +- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 702f75aed..77b6c272d 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -193,7 +193,7 @@ class Expr: :ref:`Expressions` in the online documentation for more information. """ - def __init__(self, expr: expr_internal.Expr) -> None: + def __init__(self, expr: expr_internal.RawExpr) -> None: """This constructor should not be called by the end user.""" self.expr = expr @@ -383,7 +383,7 @@ def literal(value: Any) -> Expr: value = pa.scalar(value, type=pa.string_view()) if not isinstance(value, pa.Scalar): value = pa.scalar(value) - return Expr(expr_internal.Expr.literal(value)) + return Expr(expr_internal.RawExpr.literal(value)) @staticmethod def string_literal(value: str) -> Expr: @@ -398,13 +398,13 @@ def string_literal(value: str) -> Expr: """ if isinstance(value, str): value = pa.scalar(value, type=pa.string()) - return Expr(expr_internal.Expr.literal(value)) + return Expr(expr_internal.RawExpr.literal(value)) return Expr.literal(value) @staticmethod def column(value: str) -> Expr: """Creates a new expression representing a column.""" - return Expr(expr_internal.Expr.column(value)) + return Expr(expr_internal.RawExpr.column(value)) def alias(self, name: str) -> Expr: """Assign a name to the expression.""" diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index d7f6f6e35..a2de2d32b 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -28,37 +28,62 @@ from enum import EnumMeta as EnumType -def missing_exports(internal_obj, wrapped_obj) -> None: - # Special case enums - just make sure they exist since dir() - # and other functions get overridden. +def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 + """ + Identify if any of the rust exposted structs or functions do not have wrappers. + + Special handling for: + - Raw* classes: Internal implementation details that shouldn't be exposed + - _global_ctx: Internal implementation detail + - __self__, __class__: Python special attributes + """ + # Special case enums - EnumType overrides a some of the internal functions, + # so check all of the values exist and move on if isinstance(wrapped_obj, EnumType): + expected_values = [v for v in dir(internal_obj) if not v.startswith("__")] + for value in expected_values: + assert value in dir(wrapped_obj) return - for attr in dir(internal_obj): - if attr in ["_global_ctx"]: - continue - assert attr in dir(wrapped_obj) + for internal_attr_name in dir(internal_obj): + wrapped_attr_name = internal_attr_name.removeprefix("Raw") + assert wrapped_attr_name in dir(wrapped_obj) - internal_attr = getattr(internal_obj, attr) - wrapped_attr = getattr(wrapped_obj, attr) + internal_attr = getattr(internal_obj, internal_attr_name) + wrapped_attr = getattr(wrapped_obj, wrapped_attr_name) - if internal_attr is not None and wrapped_attr is None: - pytest.fail(f"Missing attribute: {attr}") + # There are some auto generated attributes that can be None, such as + # __kwdefaults__ and __doc__. As long as these are None on the internal + # object, it's okay to skip them. However if they do exist on the internal + # object they must also exist on the wrapped object. + if internal_attr is not None: + if wrapped_attr is None: + pytest.fail(f"Missing attribute: {internal_attr_name}") - if attr in ["__self__", "__class__"]: + if internal_attr_name in ["__self__", "__class__"]: continue + if isinstance(internal_attr, list): assert isinstance(wrapped_attr, list) + + # We have cases like __all__ that are a list and we want to be certain that + # every value in the list in the internal object is also in the wrapper list for val in internal_attr: - assert val in wrapped_attr + if isinstance(val, str) and val.startswith("Raw"): + assert val[3:] in wrapped_attr + else: + assert val in wrapped_attr elif hasattr(internal_attr, "__dict__"): + # Check all submodules recursively missing_exports(internal_attr, wrapped_attr) def test_datafusion_missing_exports() -> None: """Check for any missing python exports. - This test verifies that every exposed class, attribute, and function in - the internal (pyo3) module is also exposed in our python wrappers. + This test verifies that every exposed class, attribute, + and function in the internal (pyo3) module - datafusion._internal + is also exposed in our python wrappers - datafusion - + i.e., the ones exposed to the public. """ missing_exports(datafusion._internal, datafusion) diff --git a/src/expr.rs b/src/expr.rs index e750be6a4..d3c528eb4 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -101,7 +101,7 @@ pub mod window; use sort_expr::{to_sort_expressions, PySortExpr}; /// A PyExpr that can be used on a DataFrame -#[pyclass(name = "Expr", module = "datafusion.expr", subclass)] +#[pyclass(name = "RawExpr", module = "datafusion.expr", subclass)] #[derive(Debug, Clone)] pub struct PyExpr { pub expr: Expr, From 4f457030f171a26d0c4cce4d55cf541519956fcc Mon Sep 17 00:00:00 2001 From: jsai28 <54253219+jsai28@users.noreply.github.com> Date: Sat, 15 Mar 2025 04:57:38 -0600 Subject: [PATCH 023/206] added pytest asyncio tests (#1063) --- pyproject.toml | 1 + python/tests/test_dataframe.py | 54 ++++++++++++++++++++++++++++++++++ uv.lock | 17 ++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 060e3b80a..a4ed18c4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,6 +150,7 @@ dev = [ "maturin>=1.8.1", "numpy>1.25.0", "pytest>=7.4.4", + "pytest-asyncio>=0.23.3", "ruff>=0.9.1", "toml>=0.10.2", "pygithub==2.5.0", diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index d084f12dd..384b17878 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -771,6 +771,16 @@ def test_execution_plan(aggregate_df): assert rows_returned == 5 +@pytest.mark.asyncio +async def test_async_iteration_of_df(aggregate_df): + rows_returned = 0 + async for batch in aggregate_df.execute_stream(): + assert batch is not None + rows_returned += len(batch.to_pyarrow()[0]) + + assert rows_returned == 5 + + def test_repartition(df): df.repartition(2) @@ -958,6 +968,18 @@ def test_execute_stream(df): assert not list(stream) # after one iteration the generator must be exhausted +@pytest.mark.asyncio +async def test_execute_stream_async(df): + stream = df.execute_stream() + batches = [batch async for batch in stream] + + assert all(batch is not None for batch in batches) + + # After consuming all batches, the stream should be exhausted + remaining_batches = [batch async for batch in stream] + assert not remaining_batches + + @pytest.mark.parametrize("schema", [True, False]) def test_execute_stream_to_arrow_table(df, schema): stream = df.execute_stream() @@ -974,6 +996,25 @@ def test_execute_stream_to_arrow_table(df, schema): assert set(pyarrow_table.column_names) == {"a", "b", "c"} +@pytest.mark.asyncio +@pytest.mark.parametrize("schema", [True, False]) +async def test_execute_stream_to_arrow_table_async(df, schema): + stream = df.execute_stream() + + if schema: + pyarrow_table = pa.Table.from_batches( + [batch.to_pyarrow() async for batch in stream], schema=df.schema() + ) + else: + pyarrow_table = pa.Table.from_batches( + [batch.to_pyarrow() async for batch in stream] + ) + + assert isinstance(pyarrow_table, pa.Table) + assert pyarrow_table.shape == (3, 3) + assert set(pyarrow_table.column_names) == {"a", "b", "c"} + + def test_execute_stream_partitioned(df): streams = df.execute_stream_partitioned() assert all(batch is not None for stream in streams for batch in stream) @@ -982,6 +1023,19 @@ def test_execute_stream_partitioned(df): ) # after one iteration all generators must be exhausted +@pytest.mark.asyncio +async def test_execute_stream_partitioned_async(df): + streams = df.execute_stream_partitioned() + + for stream in streams: + batches = [batch async for batch in stream] + assert all(batch is not None for batch in batches) + + # Ensure the stream is exhausted after iteration + remaining_batches = [batch async for batch in stream] + assert not remaining_batches + + def test_empty_to_arrow_table(df): # Convert empty datafusion dataframe to pyarrow Table pyarrow_table = df.limit(0).to_arrow_table() diff --git a/uv.lock b/uv.lock index 619b92856..7e4bc4c6b 100644 --- a/uv.lock +++ b/uv.lock @@ -284,9 +284,11 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "maturin" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "pygithub" }, { name = "pytest" }, + { name = "pytest-asyncio" }, { name = "ruff" }, { name = "toml" }, ] @@ -314,9 +316,10 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "maturin", specifier = ">=1.8.1" }, - { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" }, + { name = "numpy", specifier = ">1.25.0" }, { name = "pygithub", specifier = "==2.5.0" }, { name = "pytest", specifier = ">=7.4.4" }, + { name = "pytest-asyncio", specifier = ">=0.23.3" }, { name = "ruff", specifier = ">=0.9.1" }, { name = "toml", specifier = ">=0.10.2" }, ] @@ -1079,6 +1082,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, ] +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From 2f52688d76e84794343c17ffaf3002534ecfd716 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sat, 15 Mar 2025 19:00:50 +0800 Subject: [PATCH 024/206] Add decorator for udwf (#1061) * feat: Introduce create_udwf method for User-Defined Window Functions - Added `create_udwf` static method to `WindowUDF` class, allowing users to create User-Defined Window Functions (UDWF) as both a function and a decorator. - Updated type hinting for `_R` using `TypeAlias` for better clarity. - Enhanced documentation with usage examples for both function and decorator styles, improving usability and understanding. * refactor: Simplify UDWF test suite and introduce SimpleWindowCount evaluator - Removed multiple exponential smoothing classes to streamline the code. - Introduced SimpleWindowCount class for basic row counting functionality. - Updated test cases to validate the new SimpleWindowCount evaluator. - Refactored fixture and test functions for clarity and consistency. - Enhanced error handling in UDWF creation tests. * fix: Update type alias import to use typing_extensions for compatibility * Add udwf tests for multiple input types and decorator syntax * replace old def udwf * refactor: Simplify df fixture by passing ctx as an argument * refactor: Rename DataFrame fixtures and update test functions - Renamed `df` fixture to `complex_window_df` for clarity. - Renamed `simple_df` fixture to `count_window_df` to better reflect its purpose. - Updated test functions to use the new fixture names, enhancing readability and maintainability. * refactor: Update udwf calls in WindowUDF to use BiasedNumbers directly - Changed udwf1 to use BiasedNumbers instead of bias_10. - Added udwf2 to call udwf with bias_10. - Introduced udwf3 to demonstrate a lambda function returning BiasedNumbers(20). * feat: Add overloads for udwf function to support multiple input types and decorator syntax * refactor: Simplify udwf method signature by removing redundant type hints * refactor: Remove state_type from udwf method signature and update return type handling - Eliminated the state_type parameter from the udwf method to simplify the function signature. - Updated return type handling in the _function and _decorator methods to use a generic type _R for better type flexibility. - Enhanced the decorator to wrap the original function, allowing for improved argument handling and expression return. * refactor: Update volatility parameter type in udwf method signature to support Volatility enum * Fix ruff errors * fix C901 for def udwf * refactor: Update udwf method signature and simplify input handling - Changed the type hint for the return type in the _create_window_udf_decorator method to use pa.DataType directly instead of a TypeVar. - Simplified the handling of input types by removing redundant checks and directly using the input types list. - Removed unnecessary comments and cleaned up the code for better readability. - Updated the test for udwf to use parameterized tests for better coverage and maintainability. * refactor: Rename input_type to input_types in udwf method signature for clarity * refactor: Enhance typing in udf.py by introducing Protocol for WindowEvaluator and improving import organization * Revert "refactor: Enhance typing in udf.py by introducing Protocol for WindowEvaluator and improving import organization" This reverts commit 16dbe5f3fd88f42d0a304384b162009bd9e49a35. --- python/datafusion/udf.py | 123 +++++++++++++++++++++------ python/tests/test_udwf.py | 170 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 264 insertions(+), 29 deletions(-) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index 603b7063d..e93a34ca5 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -621,6 +621,16 @@ def __call__(self, *args: Expr) -> Expr: args_raw = [arg.expr for arg in args] return Expr(self._udwf.__call__(*args_raw)) + @overload + @staticmethod + def udwf( + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[..., WindowUDF]: ... + + @overload @staticmethod def udwf( func: Callable[[], WindowEvaluator], @@ -628,24 +638,31 @@ def udwf( return_type: pa.DataType, volatility: Volatility | str, name: Optional[str] = None, - ) -> WindowUDF: - """Create a new User-Defined Window Function. + ) -> WindowUDF: ... - If your :py:class:`WindowEvaluator` can be instantiated with no arguments, you - can simply pass it's type as ``func``. If you need to pass additional arguments - to it's constructor, you can define a lambda or a factory method. During runtime - the :py:class:`WindowEvaluator` will be constructed for every instance in - which this UDWF is used. The following examples are all valid. + @staticmethod + def udwf(*args: Any, **kwargs: Any): # noqa: D417 + """Create a new User-Defined Window Function (UDWF). - .. code-block:: python + This class can be used both as a **function** and as a **decorator**. + + Usage: + - **As a function**: Call `udwf(func, input_types, return_type, volatility, + name)`. + - **As a decorator**: Use `@udwf(input_types, return_type, volatility, + name)`. When using `udwf` as a decorator, **do not pass `func` + explicitly**. + **Function example:** + ``` import pyarrow as pa class BiasedNumbers(WindowEvaluator): def __init__(self, start: int = 0) -> None: self.start = start - def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: + def evaluate_all(self, values: list[pa.Array], + num_rows: int) -> pa.Array: return pa.array([self.start + i for i in range(num_rows)]) def bias_10() -> BiasedNumbers: @@ -655,35 +672,93 @@ def bias_10() -> BiasedNumbers: udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable") udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable") + ``` + + **Decorator example:** + ``` + @udwf(pa.int64(), pa.int64(), "immutable") + def biased_numbers() -> BiasedNumbers: + return BiasedNumbers(10) + ``` + Args: - func: A callable to create the window function. - input_types: The data types of the arguments to ``func``. + func: **Only needed when calling as a function. Skip this argument when + using `udwf` as a decorator.** + input_types: The data types of the arguments. return_type: The data type of the return value. volatility: See :py:class:`Volatility` for allowed values. - arguments: A list of arguments to pass in to the __init__ method for accum. name: A descriptive name for the function. Returns: - A user-defined window function. - """ # noqa: W505, E501 + A user-defined window function that can be used in window function calls. + """ + if args and callable(args[0]): + # Case 1: Used as a function, require the first parameter to be callable + return WindowUDF._create_window_udf(*args, **kwargs) + # Case 2: Used as a decorator with parameters + return WindowUDF._create_window_udf_decorator(*args, **kwargs) + + @staticmethod + def _create_window_udf( + func: Callable[[], WindowEvaluator], + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> WindowUDF: + """Create a WindowUDF instance from function arguments.""" if not callable(func): msg = "`func` must be callable." raise TypeError(msg) if not isinstance(func(), WindowEvaluator): msg = "`func` must implement the abstract base class WindowEvaluator" raise TypeError(msg) - if name is None: - name = func().__class__.__qualname__.lower() - if isinstance(input_types, pa.DataType): - input_types = [input_types] - return WindowUDF( - name=name, - func=func, - input_types=input_types, - return_type=return_type, - volatility=volatility, + + name = name or func.__qualname__.lower() + input_types = ( + [input_types] if isinstance(input_types, pa.DataType) else input_types ) + return WindowUDF(name, func, input_types, return_type, volatility) + + @staticmethod + def _get_default_name(func: Callable) -> str: + """Get the default name for a function based on its attributes.""" + if hasattr(func, "__qualname__"): + return func.__qualname__.lower() + return func.__class__.__name__.lower() + + @staticmethod + def _normalize_input_types( + input_types: pa.DataType | list[pa.DataType], + ) -> list[pa.DataType]: + """Convert a single DataType to a list if needed.""" + if isinstance(input_types, pa.DataType): + return [input_types] + return input_types + + @staticmethod + def _create_window_udf_decorator( + input_types: pa.DataType | list[pa.DataType], + return_type: pa.DataType, + volatility: Volatility | str, + name: Optional[str] = None, + ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]: + """Create a decorator for a WindowUDF.""" + + def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]: + udwf_caller = WindowUDF._create_window_udf( + func, input_types, return_type, volatility, name + ) + + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Expr: + return udwf_caller(*args, **kwargs) + + return wrapper + + return decorator + # Convenience exports so we can import instead of treating as # variables at the package root diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 3d6dcf9d8..4190e7d64 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -162,14 +162,27 @@ def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: return pa.array(results) +class SimpleWindowCount(WindowEvaluator): + """A simple window evaluator that counts rows.""" + + def __init__(self, base: int = 0) -> None: + self.base = base + + def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array: + return pa.array([self.base + i for i in range(num_rows)]) + + class NotSubclassOfWindowEvaluator: pass @pytest.fixture -def df(): - ctx = SessionContext() +def ctx(): + return SessionContext() + +@pytest.fixture +def complex_window_df(ctx): # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [ @@ -182,7 +195,17 @@ def df(): return ctx.create_dataframe([[batch]]) -def test_udwf_errors(df): +@pytest.fixture +def count_window_df(ctx): + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]], name="test_table") + + +def test_udwf_errors(complex_window_df): with pytest.raises(TypeError): udwf( NotSubclassOfWindowEvaluator, @@ -192,6 +215,103 @@ def test_udwf_errors(df): ) +def test_udwf_errors_with_message(): + """Test error cases for UDWF creation.""" + with pytest.raises( + TypeError, match="`func` must implement the abstract base class WindowEvaluator" + ): + udwf( + NotSubclassOfWindowEvaluator, pa.int64(), pa.int64(), volatility="immutable" + ) + + +def test_udwf_basic_usage(count_window_df): + """Test basic UDWF usage with a simple counting window function.""" + simple_count = udwf( + SimpleWindowCount, pa.int64(), pa.int64(), volatility="immutable" + ) + + df = count_window_df.select( + simple_count(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + +def test_udwf_with_args(count_window_df): + """Test UDWF with constructor arguments.""" + count_base10 = udwf( + lambda: SimpleWindowCount(10), pa.int64(), pa.int64(), volatility="immutable" + ) + + df = count_window_df.select( + count_base10(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([10, 11, 12]) + + +def test_udwf_decorator_basic(count_window_df): + """Test UDWF used as a decorator.""" + + @udwf([pa.int64()], pa.int64(), "immutable") + def window_count() -> WindowEvaluator: + return SimpleWindowCount() + + df = count_window_df.select( + window_count(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + +def test_udwf_decorator_with_args(count_window_df): + """Test UDWF decorator with constructor arguments.""" + + @udwf([pa.int64()], pa.int64(), "immutable") + def window_count_base10() -> WindowEvaluator: + return SimpleWindowCount(10) + + df = count_window_df.select( + window_count_base10(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([10, 11, 12]) + + +def test_register_udwf(ctx, count_window_df): + """Test registering and using UDWF in SQL context.""" + window_count = udwf( + SimpleWindowCount, + [pa.int64()], + pa.int64(), + volatility="immutable", + name="window_count", + ) + + ctx.register_udwf(window_count) + result = ctx.sql( + """ + SELECT window_count(a) + OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING) FROM test_table + """ + ).collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + smooth_default = udwf( ExponentialSmoothDefault, pa.float64(), @@ -299,10 +419,50 @@ def test_udwf_errors(df): @pytest.mark.parametrize(("name", "expr", "expected"), data_test_udwf_functions) -def test_udwf_functions(df, name, expr, expected): - df = df.select("a", "b", f.round(expr, lit(3)).alias(name)) +def test_udwf_functions(complex_window_df, name, expr, expected): + df = complex_window_df.select("a", "b", f.round(expr, lit(3)).alias(name)) # execute and collect the first (and only) batch result = df.sort(column("a")).select(column(name)).collect()[0] assert result.column(0) == pa.array(expected) + + +@pytest.mark.parametrize( + "udwf_func", + [ + udwf(SimpleWindowCount, pa.int64(), pa.int64(), "immutable"), + udwf(SimpleWindowCount, [pa.int64()], pa.int64(), "immutable"), + udwf([pa.int64()], pa.int64(), "immutable")(lambda: SimpleWindowCount()), + udwf(pa.int64(), pa.int64(), "immutable")(lambda: SimpleWindowCount()), + ], +) +def test_udwf_overloads(udwf_func, count_window_df): + df = count_window_df.select( + udwf_func(column("a")) + .window_frame(WindowFrame("rows", None, None)) + .build() + .alias("count") + ) + result = df.collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) + + +def test_udwf_named_function(ctx, count_window_df): + """Test UDWF with explicit name parameter.""" + window_count = udwf( + SimpleWindowCount, + pa.int64(), + pa.int64(), + volatility="immutable", + name="my_custom_counter", + ) + + ctx.register_udwf(window_count) + result = ctx.sql( + """ + SELECT my_custom_counter(a) + OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED + FOLLOWING) FROM test_table""" + ).collect()[0] + assert result.column(0) == pa.array([0, 1, 2]) From 7c1c08f8617ac97a2568eb0664e9d4ee30fceba9 Mon Sep 17 00:00:00 2001 From: Nirnay Roy <32942494+nirnayroy@users.noreply.github.com> Date: Sat, 15 Mar 2025 17:05:05 +0530 Subject: [PATCH 025/206] feat: expose regex_count function (#1066) * Added wrapper for regex_count function * fix comment --------- Co-authored-by: Nirnay Roy --- python/datafusion/functions.py | 18 ++++++++++++++++++ python/tests/test_functions.py | 4 ++++ src/functions.rs | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0cc7434cf..26bac149c 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -217,6 +217,7 @@ "random", "range", "rank", + "regexp_count", "regexp_like", "regexp_match", "regexp_replace", @@ -779,6 +780,23 @@ def regexp_replace( return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags)) +def regexp_count( + string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None +) -> Expr: + """Returns the number of matches in a string. + + Optional start position (the first position is 1) to search for the regular + expression. + """ + if flags is not None: + flags = flags.expr + if start is not None: + start = start.expr + else: + start = Expr.expr + return Expr(f.regexp_count(string.expr, pattern.expr, start, flags)) + + def repeat(string: Expr, n: Expr) -> Expr: """Repeats the ``string`` to ``n`` times.""" return Expr(f.repeat(string.expr, n.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index ed88a16e3..161e1e3bb 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr): f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), pa.array(["H-o", "W-d", "!"]), ), + ( + f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)), + pa.array([1, 1, 0], type=pa.int64()), + ), ], ) def test_string_functions(df, function, expected_result): diff --git a/src/functions.rs b/src/functions.rs index 6a8abb18d..8fac239b4 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -173,6 +173,25 @@ fn regexp_replace( ) .into()) } + +#[pyfunction] +#[pyo3(signature = (string, pattern, start, flags=None))] +/// Returns the number of matches found in the string. +fn regexp_count( + string: PyExpr, + pattern: PyExpr, + start: Option, + flags: Option, +) -> PyResult { + Ok(functions::expr_fn::regexp_count( + string.expr, + pattern.expr, + start.map(|x| x.expr), + flags.map(|x| x.expr), + ) + .into()) +} + /// Creates a new Sort Expr #[pyfunction] fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { @@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(power))?; m.add_wrapped(wrap_pyfunction!(radians))?; m.add_wrapped(wrap_pyfunction!(random))?; + m.add_wrapped(wrap_pyfunction!(regexp_count))?; m.add_wrapped(wrap_pyfunction!(regexp_like))?; m.add_wrapped(wrap_pyfunction!(regexp_match))?; m.add_wrapped(wrap_pyfunction!(regexp_replace))?; From b8dd97bc8eefcfecfa8dcc864c4898c654b236a9 Mon Sep 17 00:00:00 2001 From: Spaarsh <67336892+Spaarsh@users.noreply.github.com> Date: Mon, 17 Mar 2025 20:08:16 +0530 Subject: [PATCH 026/206] Add additional ruff suggestions (#1062) * Enabled ruff rule PT001 and ANN204 * Enabled ruff rule B008 * Enabled ruff rule EM101 * Enabled ruff rule PLR1714 * Enabled ruff rule ANN201 * Enabled ruff rule C400 * Enabled ruff rule B904 * Enabled ruff rule UP006 * Enabled ruff rule RUF012 * Enabled ruff rule FBT003 * Enabled ruff rule C416 * Enabled ruff rule SIM102 * Enabled ruff rule PGH003 * Enabled ruff rule PERF401 * Enabled ruff rule EM102 * Enabled ruff rule SIM108 * Enabled ruff rule ICN001 * Enabled ruff rule ICN001 * implemented reviews * Update pyproject.toml to ignore `SIM102` * Enabled ruff rule PLW2901 * Enabled ruff rule RET503 * Fixed failing ruff tests --- benchmarks/db-benchmark/groupby-datafusion.py | 24 ++-- benchmarks/db-benchmark/join-datafusion.py | 5 +- benchmarks/tpch/tpch.py | 7 +- dev/release/generate-changelog.py | 6 +- docs/source/conf.py | 4 +- examples/create-context.py | 12 +- examples/python-udaf.py | 36 +++-- examples/python-udf-comparisons.py | 9 +- examples/python-udf.py | 12 +- examples/query-pyarrow-data.py | 10 +- examples/sql-using-python-udaf.py | 2 +- examples/tpch/_tests.py | 4 +- examples/tpch/convert_data_to_parquet.py | 134 +++++++++--------- examples/tpch/q08_market_share.py | 2 +- examples/tpch/q19_discounted_revenue.py | 4 +- .../tpch/q21_suppliers_kept_orders_waiting.py | 2 +- pyproject.toml | 20 --- python/datafusion/__init__.py | 8 +- python/datafusion/catalog.py | 4 +- python/datafusion/context.py | 51 +++---- python/datafusion/dataframe.py | 55 +++---- python/datafusion/expr.py | 31 ++-- python/datafusion/functions.py | 9 +- python/tests/test_functions.py | 2 +- python/tests/test_wrapper_coverage.py | 7 +- 25 files changed, 213 insertions(+), 247 deletions(-) diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py index 04bf7a149..f9e8d638b 100644 --- a/benchmarks/db-benchmark/groupby-datafusion.py +++ b/benchmarks/db-benchmark/groupby-datafusion.py @@ -20,7 +20,7 @@ import timeit import datafusion as df -import pyarrow +import pyarrow as pa from datafusion import ( RuntimeEnvBuilder, SessionConfig, @@ -37,7 +37,7 @@ exec(open("./_helpers/helpers.py").read()) -def ans_shape(batches): +def ans_shape(batches) -> tuple[int, int]: rows, cols = 0, 0 for batch in batches: rows += batch.num_rows @@ -48,7 +48,7 @@ def ans_shape(batches): return rows, cols -def execute(df): +def execute(df) -> list: print(df.execution_plan().display_indent()) return df.collect() @@ -68,14 +68,14 @@ def execute(df): src_grp = os.path.join("data", data_name + ".csv") print("loading dataset %s" % src_grp, flush=True) -schema = pyarrow.schema( +schema = pa.schema( [ - ("id4", pyarrow.int32()), - ("id5", pyarrow.int32()), - ("id6", pyarrow.int32()), - ("v1", pyarrow.int32()), - ("v2", pyarrow.int32()), - ("v3", pyarrow.float64()), + ("id4", pa.int32()), + ("id5", pa.int32()), + ("id6", pa.int32()), + ("v1", pa.int32()), + ("v2", pa.int32()), + ("v3", pa.float64()), ] ) @@ -93,8 +93,8 @@ def execute(df): ) config = ( SessionConfig() - .with_repartition_joins(False) - .with_repartition_aggregations(False) + .with_repartition_joins(enabled=False) + .with_repartition_aggregations(enabled=False) .set("datafusion.execution.coalesce_batches", "false") ) ctx = SessionContext(config, runtime) diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py index b45ebf632..039868031 100755 --- a/benchmarks/db-benchmark/join-datafusion.py +++ b/benchmarks/db-benchmark/join-datafusion.py @@ -29,7 +29,7 @@ exec(open("./_helpers/helpers.py").read()) -def ans_shape(batches): +def ans_shape(batches) -> tuple[int, int]: rows, cols = 0, 0 for batch in batches: rows += batch.num_rows @@ -57,7 +57,8 @@ def ans_shape(batches): os.path.join("data", y_data_name[2] + ".csv"), ] if len(src_jn_y) != 3: - raise Exception("Something went wrong in preparing files used for join") + error_msg = "Something went wrong in preparing files used for join" + raise Exception(error_msg) print( "loading datasets " diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index bfb9ac398..2d1bbae5b 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -21,7 +21,7 @@ from datafusion import SessionContext -def bench(data_path, query_path): +def bench(data_path, query_path) -> None: with open("results.csv", "w") as results: # register tables start = time.time() @@ -68,10 +68,7 @@ def bench(data_path, query_path): with open(f"{query_path}/q{query}.sql") as f: text = f.read() tmp = text.split(";") - queries = [] - for str in tmp: - if len(str.strip()) > 0: - queries.append(str.strip()) + queries = [s.strip() for s in tmp if len(s.strip()) > 0] try: start = time.time() diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index e30e2def2..d86736773 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -24,7 +24,7 @@ from github import Github -def print_pulls(repo_name, title, pulls): +def print_pulls(repo_name, title, pulls) -> None: if len(pulls) > 0: print(f"**{title}:**") print() @@ -34,7 +34,7 @@ def print_pulls(repo_name, title, pulls): print() -def generate_changelog(repo, repo_name, tag1, tag2, version): +def generate_changelog(repo, repo_name, tag1, tag2, version) -> None: # get a list of commits between two tags print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) comparison = repo.compare(tag1, tag2) @@ -154,7 +154,7 @@ def generate_changelog(repo, repo_name, tag1, tag2, version): ) -def cli(args=None): +def cli(args=None) -> None: """Process command line arguments.""" if not args: args = sys.argv[1:] diff --git a/docs/source/conf.py b/docs/source/conf.py index c82a189e0..0be03d81d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,7 +73,7 @@ autoapi_python_class_content = "both" -def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001 +def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool: # noqa: ARG001 skip_contents = [ # Re-exports ("class", "datafusion.DataFrame"), @@ -93,7 +93,7 @@ def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001 return skip -def setup(sphinx): +def setup(sphinx) -> None: sphinx.connect("autoapi-skip-member", autoapi_skip_member_fn) diff --git a/examples/create-context.py b/examples/create-context.py index 760c8513e..0026d6162 100644 --- a/examples/create-context.py +++ b/examples/create-context.py @@ -25,14 +25,14 @@ runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000) config = ( SessionConfig() - .with_create_default_catalog_and_schema(True) + .with_create_default_catalog_and_schema(enabled=True) .with_default_catalog_and_schema("foo", "bar") .with_target_partitions(8) - .with_information_schema(True) - .with_repartition_joins(False) - .with_repartition_aggregations(False) - .with_repartition_windows(False) - .with_parquet_pruning(False) + .with_information_schema(enabled=True) + .with_repartition_joins(enabled=False) + .with_repartition_aggregations(enabled=False) + .with_repartition_windows(enabled=False) + .with_parquet_pruning(enabled=False) .set("datafusion.execution.parquet.pushdown_filters", "true") ) ctx = SessionContext(config, runtime) diff --git a/examples/python-udaf.py b/examples/python-udaf.py index 538f69571..6655edb0a 100644 --- a/examples/python-udaf.py +++ b/examples/python-udaf.py @@ -16,7 +16,7 @@ # under the License. import datafusion -import pyarrow +import pyarrow as pa import pyarrow.compute from datafusion import Accumulator, col, udaf @@ -26,25 +26,21 @@ class MyAccumulator(Accumulator): Interface of a user-defined accumulation. """ - def __init__(self): - self._sum = pyarrow.scalar(0.0) + def __init__(self) -> None: + self._sum = pa.scalar(0.0) - def update(self, values: pyarrow.Array) -> None: + def update(self, values: pa.Array) -> None: # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar( - self._sum.as_py() + pyarrow.compute.sum(values).as_py() - ) + self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(values).as_py()) - def merge(self, states: pyarrow.Array) -> None: + def merge(self, states: pa.Array) -> None: # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar( - self._sum.as_py() + pyarrow.compute.sum(states).as_py() - ) + self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(states).as_py()) - def state(self) -> pyarrow.Array: - return pyarrow.array([self._sum.as_py()]) + def state(self) -> pa.Array: + return pa.array([self._sum.as_py()]) - def evaluate(self) -> pyarrow.Scalar: + def evaluate(self) -> pa.Scalar: return self._sum @@ -52,17 +48,17 @@ def evaluate(self) -> pyarrow.Scalar: ctx = datafusion.SessionContext() # create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], +batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]]) my_udaf = udaf( MyAccumulator, - pyarrow.float64(), - pyarrow.float64(), - [pyarrow.float64()], + pa.float64(), + pa.float64(), + [pa.float64()], "stable", ) @@ -70,4 +66,4 @@ def evaluate(self) -> pyarrow.Scalar: result = df.collect()[0] -assert result.column(0) == pyarrow.array([6.0]) +assert result.column(0) == pa.array([6.0]) diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py index c5d5ec8dd..eb0825011 100644 --- a/examples/python-udf-comparisons.py +++ b/examples/python-udf-comparisons.py @@ -112,8 +112,8 @@ def is_of_interest_impl( returnflag_arr: pa.Array, ) -> pa.Array: result = [] - for idx, partkey in enumerate(partkey_arr): - partkey = partkey.as_py() + for idx, partkey_val in enumerate(partkey_arr): + partkey = partkey_val.as_py() suppkey = suppkey_arr[idx].as_py() returnflag = returnflag_arr[idx].as_py() value = (partkey, suppkey, returnflag) @@ -162,10 +162,7 @@ def udf_using_pyarrow_compute_impl( resultant_arr = pc.and_(filtered_partkey_arr, filtered_suppkey_arr) resultant_arr = pc.and_(resultant_arr, filtered_returnflag_arr) - if results is None: - results = resultant_arr - else: - results = pc.or_(results, resultant_arr) + results = resultant_arr if results is None else pc.or_(results, resultant_arr) return results diff --git a/examples/python-udf.py b/examples/python-udf.py index fb2bc253e..1c08acd1a 100644 --- a/examples/python-udf.py +++ b/examples/python-udf.py @@ -15,23 +15,23 @@ # specific language governing permissions and limitations # under the License. -import pyarrow +import pyarrow as pa from datafusion import SessionContext, udf from datafusion import functions as f -def is_null(array: pyarrow.Array) -> pyarrow.Array: +def is_null(array: pa.Array) -> pa.Array: return array.is_null() -is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), "stable") +is_null_arr = udf(is_null, [pa.int64()], pa.bool_(), "stable") # create a context ctx = SessionContext() # create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], +batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]]) @@ -40,4 +40,4 @@ def is_null(array: pyarrow.Array) -> pyarrow.Array: result = df.collect()[0] -assert result.column(0) == pyarrow.array([False] * 3) +assert result.column(0) == pa.array([False] * 3) diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py index e3456fb5b..9cfe8a62b 100644 --- a/examples/query-pyarrow-data.py +++ b/examples/query-pyarrow-data.py @@ -16,15 +16,15 @@ # under the License. import datafusion -import pyarrow +import pyarrow as pa from datafusion import col # create a context ctx = datafusion.SessionContext() # create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], +batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], names=["a", "b"], ) df = ctx.create_dataframe([[batch]]) @@ -38,5 +38,5 @@ # execute and collect the first (and only) batch result = df.collect()[0] -assert result.column(0) == pyarrow.array([5, 7, 9]) -assert result.column(1) == pyarrow.array([-3, -3, -3]) +assert result.column(0) == pa.array([5, 7, 9]) +assert result.column(1) == pa.array([-3, -3, -3]) diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py index 60ab8d134..32ce38900 100644 --- a/examples/sql-using-python-udaf.py +++ b/examples/sql-using-python-udaf.py @@ -25,7 +25,7 @@ class MyAccumulator(Accumulator): Interface of a user-defined accumulation. """ - def __init__(self): + def __init__(self) -> None: self._sum = pa.scalar(0.0) def update(self, values: pa.Array) -> None: diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 2be4dfabd..80ff80244 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -91,7 +91,7 @@ def check_q17(df): ("q22_global_sales_opportunity", "q22"), ], ) -def test_tpch_query_vs_answer_file(query_code: str, answer_file: str): +def test_tpch_query_vs_answer_file(query_code: str, answer_file: str) -> None: module = import_module(query_code) df: DataFrame = module.df @@ -122,3 +122,5 @@ def test_tpch_query_vs_answer_file(query_code: str, answer_file: str): assert df.join(df_expected, on=cols, how="anti").count() == 0 assert df.count() == df_expected.count() + + return None diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py index 73097fac5..fd0fcca49 100644 --- a/examples/tpch/convert_data_to_parquet.py +++ b/examples/tpch/convert_data_to_parquet.py @@ -25,112 +25,112 @@ import os import datafusion -import pyarrow +import pyarrow as pa ctx = datafusion.SessionContext() all_schemas = {} all_schemas["customer"] = [ - ("C_CUSTKEY", pyarrow.int64()), - ("C_NAME", pyarrow.string()), - ("C_ADDRESS", pyarrow.string()), - ("C_NATIONKEY", pyarrow.int64()), - ("C_PHONE", pyarrow.string()), - ("C_ACCTBAL", pyarrow.decimal128(15, 2)), - ("C_MKTSEGMENT", pyarrow.string()), - ("C_COMMENT", pyarrow.string()), + ("C_CUSTKEY", pa.int64()), + ("C_NAME", pa.string()), + ("C_ADDRESS", pa.string()), + ("C_NATIONKEY", pa.int64()), + ("C_PHONE", pa.string()), + ("C_ACCTBAL", pa.decimal128(15, 2)), + ("C_MKTSEGMENT", pa.string()), + ("C_COMMENT", pa.string()), ] all_schemas["lineitem"] = [ - ("L_ORDERKEY", pyarrow.int64()), - ("L_PARTKEY", pyarrow.int64()), - ("L_SUPPKEY", pyarrow.int64()), - ("L_LINENUMBER", pyarrow.int32()), - ("L_QUANTITY", pyarrow.decimal128(15, 2)), - ("L_EXTENDEDPRICE", pyarrow.decimal128(15, 2)), - ("L_DISCOUNT", pyarrow.decimal128(15, 2)), - ("L_TAX", pyarrow.decimal128(15, 2)), - ("L_RETURNFLAG", pyarrow.string()), - ("L_LINESTATUS", pyarrow.string()), - ("L_SHIPDATE", pyarrow.date32()), - ("L_COMMITDATE", pyarrow.date32()), - ("L_RECEIPTDATE", pyarrow.date32()), - ("L_SHIPINSTRUCT", pyarrow.string()), - ("L_SHIPMODE", pyarrow.string()), - ("L_COMMENT", pyarrow.string()), + ("L_ORDERKEY", pa.int64()), + ("L_PARTKEY", pa.int64()), + ("L_SUPPKEY", pa.int64()), + ("L_LINENUMBER", pa.int32()), + ("L_QUANTITY", pa.decimal128(15, 2)), + ("L_EXTENDEDPRICE", pa.decimal128(15, 2)), + ("L_DISCOUNT", pa.decimal128(15, 2)), + ("L_TAX", pa.decimal128(15, 2)), + ("L_RETURNFLAG", pa.string()), + ("L_LINESTATUS", pa.string()), + ("L_SHIPDATE", pa.date32()), + ("L_COMMITDATE", pa.date32()), + ("L_RECEIPTDATE", pa.date32()), + ("L_SHIPINSTRUCT", pa.string()), + ("L_SHIPMODE", pa.string()), + ("L_COMMENT", pa.string()), ] all_schemas["nation"] = [ - ("N_NATIONKEY", pyarrow.int64()), - ("N_NAME", pyarrow.string()), - ("N_REGIONKEY", pyarrow.int64()), - ("N_COMMENT", pyarrow.string()), + ("N_NATIONKEY", pa.int64()), + ("N_NAME", pa.string()), + ("N_REGIONKEY", pa.int64()), + ("N_COMMENT", pa.string()), ] all_schemas["orders"] = [ - ("O_ORDERKEY", pyarrow.int64()), - ("O_CUSTKEY", pyarrow.int64()), - ("O_ORDERSTATUS", pyarrow.string()), - ("O_TOTALPRICE", pyarrow.decimal128(15, 2)), - ("O_ORDERDATE", pyarrow.date32()), - ("O_ORDERPRIORITY", pyarrow.string()), - ("O_CLERK", pyarrow.string()), - ("O_SHIPPRIORITY", pyarrow.int32()), - ("O_COMMENT", pyarrow.string()), + ("O_ORDERKEY", pa.int64()), + ("O_CUSTKEY", pa.int64()), + ("O_ORDERSTATUS", pa.string()), + ("O_TOTALPRICE", pa.decimal128(15, 2)), + ("O_ORDERDATE", pa.date32()), + ("O_ORDERPRIORITY", pa.string()), + ("O_CLERK", pa.string()), + ("O_SHIPPRIORITY", pa.int32()), + ("O_COMMENT", pa.string()), ] all_schemas["part"] = [ - ("P_PARTKEY", pyarrow.int64()), - ("P_NAME", pyarrow.string()), - ("P_MFGR", pyarrow.string()), - ("P_BRAND", pyarrow.string()), - ("P_TYPE", pyarrow.string()), - ("P_SIZE", pyarrow.int32()), - ("P_CONTAINER", pyarrow.string()), - ("P_RETAILPRICE", pyarrow.decimal128(15, 2)), - ("P_COMMENT", pyarrow.string()), + ("P_PARTKEY", pa.int64()), + ("P_NAME", pa.string()), + ("P_MFGR", pa.string()), + ("P_BRAND", pa.string()), + ("P_TYPE", pa.string()), + ("P_SIZE", pa.int32()), + ("P_CONTAINER", pa.string()), + ("P_RETAILPRICE", pa.decimal128(15, 2)), + ("P_COMMENT", pa.string()), ] all_schemas["partsupp"] = [ - ("PS_PARTKEY", pyarrow.int64()), - ("PS_SUPPKEY", pyarrow.int64()), - ("PS_AVAILQTY", pyarrow.int32()), - ("PS_SUPPLYCOST", pyarrow.decimal128(15, 2)), - ("PS_COMMENT", pyarrow.string()), + ("PS_PARTKEY", pa.int64()), + ("PS_SUPPKEY", pa.int64()), + ("PS_AVAILQTY", pa.int32()), + ("PS_SUPPLYCOST", pa.decimal128(15, 2)), + ("PS_COMMENT", pa.string()), ] all_schemas["region"] = [ - ("r_REGIONKEY", pyarrow.int64()), - ("r_NAME", pyarrow.string()), - ("r_COMMENT", pyarrow.string()), + ("r_REGIONKEY", pa.int64()), + ("r_NAME", pa.string()), + ("r_COMMENT", pa.string()), ] all_schemas["supplier"] = [ - ("S_SUPPKEY", pyarrow.int64()), - ("S_NAME", pyarrow.string()), - ("S_ADDRESS", pyarrow.string()), - ("S_NATIONKEY", pyarrow.int32()), - ("S_PHONE", pyarrow.string()), - ("S_ACCTBAL", pyarrow.decimal128(15, 2)), - ("S_COMMENT", pyarrow.string()), + ("S_SUPPKEY", pa.int64()), + ("S_NAME", pa.string()), + ("S_ADDRESS", pa.string()), + ("S_NATIONKEY", pa.int32()), + ("S_PHONE", pa.string()), + ("S_ACCTBAL", pa.decimal128(15, 2)), + ("S_COMMENT", pa.string()), ] curr_dir = os.path.dirname(os.path.abspath(__file__)) -for filename, curr_schema in all_schemas.items(): +for filename, curr_schema_val in all_schemas.items(): # For convenience, go ahead and convert the schema column names to lowercase - curr_schema = [(s[0].lower(), s[1]) for s in curr_schema] + curr_schema = [(s[0].lower(), s[1]) for s in curr_schema_val] # Pre-collect the output columns so we can ignore the null field we add # in to handle the trailing | in the file output_cols = [r[0] for r in curr_schema] - curr_schema = [pyarrow.field(r[0], r[1], nullable=False) for r in curr_schema] + curr_schema = [pa.field(r[0], r[1], nullable=False) for r in curr_schema] # Trailing | requires extra field for in processing - curr_schema.append(("some_null", pyarrow.null())) + curr_schema.append(("some_null", pa.null())) - schema = pyarrow.schema(curr_schema) + schema = pa.schema(curr_schema) source_file = os.path.abspath( os.path.join(curr_dir, f"../../benchmarks/tpch/data/{filename}.csv") diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py index d46df30f2..4bf50efba 100644 --- a/examples/tpch/q08_market_share.py +++ b/examples/tpch/q08_market_share.py @@ -150,7 +150,7 @@ df = df.with_column( "national_volume", F.case(col("s_suppkey").is_null()) - .when(lit(False), col("volume")) + .when(lit(value=False), col("volume")) .otherwise(lit(0.0)), ) diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py index 2b87e1120..bd492aac0 100644 --- a/examples/tpch/q19_discounted_revenue.py +++ b/examples/tpch/q19_discounted_revenue.py @@ -89,8 +89,8 @@ def is_of_interest( same number of rows in the output. """ result = [] - for idx, brand in enumerate(brand_arr): - brand = brand.as_py() + for idx, brand_val in enumerate(brand_arr): + brand = brand_val.as_py() if brand in items_of_interest: values_of_interest = items_of_interest[brand] diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py index 9bbaad779..619c4406b 100644 --- a/examples/tpch/q21_suppliers_kept_orders_waiting.py +++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py @@ -65,7 +65,7 @@ df = df.with_column( "failed_supp", F.case(col("l_receiptdate") > col("l_commitdate")) - .when(lit(True), col("l_suppkey")) + .when(lit(value=True), col("l_suppkey")) .end(), ) diff --git a/pyproject.toml b/pyproject.toml index a4ed18c4c..d86b657ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,37 +80,17 @@ ignore = [ "TD003", # Allow TODO lines "UP007", # Disallowing Union is pedantic # TODO: Enable all of the following, but this PR is getting too large already - "PT001", - "ANN204", - "B008", - "EM101", "PLR0913", - "PLR1714", - "ANN201", - "C400", "TRY003", - "B904", - "UP006", - "RUF012", - "FBT003", - "C416", - "SIM102", - "PGH003", "PLR2004", - "PERF401", "PD901", - "EM102", "ERA001", - "SIM108", - "ICN001", "ANN001", "ANN202", "PTH", "N812", "INP001", "DTZ007", - "PLW2901", - "RET503", "RUF015", "A005", "TC001", diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 286e5dc31..d871fdb71 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -92,17 +92,17 @@ ] -def column(value: str): +def column(value: str) -> Expr: """Create a column expression.""" return Expr.column(value) -def col(value: str): +def col(value: str) -> Expr: """Create a column expression.""" return Expr.column(value) -def literal(value): +def literal(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) @@ -120,6 +120,6 @@ def str_lit(value): return string_literal(value) -def lit(value): +def lit(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 0560f4704..6c3f188cc 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -24,7 +24,7 @@ import datafusion._internal as df_internal if TYPE_CHECKING: - import pyarrow + import pyarrow as pa class Catalog: @@ -67,7 +67,7 @@ def __init__(self, table: df_internal.Table) -> None: self.table = table @property - def schema(self) -> pyarrow.Schema: + def schema(self) -> pa.Schema: """Returns the schema associated with this table.""" return self.table.schema diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 58ad9a943..1429a4975 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -40,9 +40,9 @@ if TYPE_CHECKING: import pathlib - import pandas - import polars - import pyarrow + import pandas as pd + import polars as pl + import pyarrow as pa from datafusion.plan import ExecutionPlan, LogicalPlan @@ -537,7 +537,7 @@ def register_listing_table( path: str | pathlib.Path, table_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".parquet", - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> None: """Register multiple files as a single table. @@ -606,14 +606,14 @@ def sql_with_options(self, query: str, options: SQLOptions) -> DataFrame: def create_dataframe( self, - partitions: list[list[pyarrow.RecordBatch]], + partitions: list[list[pa.RecordBatch]], name: str | None = None, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, ) -> DataFrame: """Create and return a dataframe using the provided partitions. Args: - partitions: :py:class:`pyarrow.RecordBatch` partitions to register. + partitions: :py:class:`pa.RecordBatch` partitions to register. name: Resultant dataframe name. schema: Schema for the partitions. @@ -684,16 +684,14 @@ def from_arrow( return DataFrame(self.ctx.from_arrow(data, name)) @deprecated("Use ``from_arrow`` instead.") - def from_arrow_table( - self, data: pyarrow.Table, name: str | None = None - ) -> DataFrame: + def from_arrow_table(self, data: pa.Table, name: str | None = None) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow table. This is an alias for :py:func:`from_arrow`. """ return self.from_arrow(data, name) - def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFrame: + def from_pandas(self, data: pd.DataFrame, name: str | None = None) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Pandas DataFrame. Args: @@ -705,7 +703,7 @@ def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFr """ return DataFrame(self.ctx.from_pandas(data, name)) - def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFrame: + def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame: """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Polars DataFrame. Args: @@ -719,7 +717,7 @@ def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFr # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116 # is the discussion on how we arrived at adding register_view - def register_view(self, name: str, df: DataFrame): + def register_view(self, name: str, df: DataFrame) -> None: """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view. Args: @@ -755,7 +753,7 @@ def register_table_provider( self.ctx.register_table_provider(name, provider) def register_record_batches( - self, name: str, partitions: list[list[pyarrow.RecordBatch]] + self, name: str, partitions: list[list[pa.RecordBatch]] ) -> None: """Register record batches as a table. @@ -776,7 +774,7 @@ def register_parquet( parquet_pruning: bool = True, file_extension: str = ".parquet", skip_metadata: bool = True, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[SortExpr]] | None = None, ) -> None: """Register a Parquet file as a table. @@ -817,7 +815,7 @@ def register_csv( self, name: str, path: str | pathlib.Path | list[str | pathlib.Path], - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, has_header: bool = True, delimiter: str = ",", schema_infer_max_records: int = 1000, @@ -843,10 +841,7 @@ def register_csv( selected for data input. file_compression_type: File compression type. """ - if isinstance(path, list): - path = [str(p) for p in path] - else: - path = str(path) + path = [str(p) for p in path] if isinstance(path, list) else str(path) self.ctx.register_csv( name, @@ -863,7 +858,7 @@ def register_json( self, name: str, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = ".json", table_partition_cols: list[tuple[str, str]] | None = None, @@ -901,7 +896,7 @@ def register_avro( self, name: str, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_extension: str = ".avro", table_partition_cols: list[tuple[str, str]] | None = None, ) -> None: @@ -923,8 +918,8 @@ def register_avro( name, str(path), schema, file_extension, table_partition_cols ) - def register_dataset(self, name: str, dataset: pyarrow.dataset.Dataset) -> None: - """Register a :py:class:`pyarrow.dataset.Dataset` as a table. + def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None: + """Register a :py:class:`pa.dataset.Dataset` as a table. Args: name: Name of the table to register. @@ -975,7 +970,7 @@ def session_id(self) -> str: def read_json( self, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, schema_infer_max_records: int = 1000, file_extension: str = ".json", table_partition_cols: list[tuple[str, str]] | None = None, @@ -1012,7 +1007,7 @@ def read_json( def read_csv( self, path: str | pathlib.Path | list[str] | list[pathlib.Path], - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, has_header: bool = True, delimiter: str = ",", schema_infer_max_records: int = 1000, @@ -1065,7 +1060,7 @@ def read_parquet( parquet_pruning: bool = True, file_extension: str = ".parquet", skip_metadata: bool = True, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_sort_order: list[list[Expr | SortExpr]] | None = None, ) -> DataFrame: """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`. @@ -1110,7 +1105,7 @@ def read_parquet( def read_avro( self, path: str | pathlib.Path, - schema: pyarrow.Schema | None = None, + schema: pa.Schema | None = None, file_partition_cols: list[tuple[str, str]] | None = None, file_extension: str = ".avro", ) -> DataFrame: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index d1c71c2bb..26fe8f453 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -26,10 +26,8 @@ TYPE_CHECKING, Any, Iterable, - List, Literal, Optional, - Type, Union, overload, ) @@ -75,7 +73,7 @@ class Compression(Enum): LZ4_RAW = "lz4_raw" @classmethod - def from_str(cls: Type[Compression], value: str) -> Compression: + def from_str(cls: type[Compression], value: str) -> Compression: """Convert a string to a Compression enum value. Args: @@ -89,11 +87,13 @@ def from_str(cls: Type[Compression], value: str) -> Compression: """ try: return cls(value.lower()) - except ValueError: + except ValueError as err: valid_values = str([item.value for item in Compression]) - raise ValueError( - f"{value} is not a valid Compression. Valid values are: {valid_values}" - ) + error_msg = f""" + {value} is not a valid Compression. + Valid values are: {valid_values} + """ + raise ValueError(error_msg) from err def get_default_level(self) -> Optional[int]: """Get the default compression level for the compression type. @@ -132,7 +132,7 @@ def into_view(self) -> pa.Table: """Convert DataFrame as a ViewTable which can be used in register_table.""" return self.df.into_view() - def __getitem__(self, key: str | List[str]) -> DataFrame: + def __getitem__(self, key: str | list[str]) -> DataFrame: """Return a new :py:class`DataFrame` with the specified column or columns. Args: @@ -287,8 +287,7 @@ def _simplify_expression( if isinstance(expr, Expr): expr_list.append(expr.expr) elif isinstance(expr, Iterable): - for inner_expr in expr: - expr_list.append(inner_expr.expr) + expr_list.extend(inner_expr.expr for inner_expr in expr) else: raise NotImplementedError if named_exprs: @@ -513,10 +512,15 @@ def join( # This check is to prevent breaking API changes where users prior to # DF 43.0.0 would pass the join_keys as a positional argument instead # of a keyword argument. - if isinstance(on, tuple) and len(on) == 2: - if isinstance(on[0], list) and isinstance(on[1], list): - join_keys = on # type: ignore - on = None + if ( + isinstance(on, tuple) + and len(on) == 2 + and isinstance(on[0], list) + and isinstance(on[1], list) + ): + # We know this is safe because we've checked the types + join_keys = on # type: ignore[assignment] + on = None if join_keys is not None: warnings.warn( @@ -529,18 +533,17 @@ def join( if on is not None: if left_on is not None or right_on is not None: - raise ValueError( - "`left_on` or `right_on` should not provided with `on`" - ) + error_msg = "`left_on` or `right_on` should not provided with `on`" + raise ValueError(error_msg) left_on = on right_on = on elif left_on is not None or right_on is not None: if left_on is None or right_on is None: - raise ValueError("`left_on` and `right_on` should both be provided.") + error_msg = "`left_on` and `right_on` should both be provided." + raise ValueError(error_msg) else: - raise ValueError( - "either `on` or `left_on` and `right_on` should be provided." - ) + error_msg = "either `on` or `left_on` and `right_on` should be provided." + raise ValueError(error_msg) if isinstance(left_on, str): left_on = [left_on] if isinstance(right_on, str): @@ -726,9 +729,11 @@ def write_parquet( if isinstance(compression, str): compression = Compression.from_str(compression) - if compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}: - if compression_level is None: - compression_level = compression.get_default_level() + if ( + compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD} + and compression_level is None + ): + compression_level = compression.get_default_level() self.df.write_parquet(str(path), compression.value, compression_level) @@ -824,7 +829,7 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram Returns: A DataFrame with the columns expanded. """ - columns = [c for c in columns] + columns = list(columns) return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls)) def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any: diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 77b6c272d..2697d8143 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -22,7 +22,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Type +from typing import TYPE_CHECKING, Any, ClassVar, Optional import pyarrow as pa @@ -176,7 +176,7 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr: """Helper function to return a default Sort if an Expr is provided.""" if isinstance(e, SortExpr): return e.raw_sort - return SortExpr(e, True, True).raw_sort + return SortExpr(e, ascending=True, nulls_first=True).raw_sort def sort_list_to_raw_sort_list( @@ -439,24 +439,21 @@ def fill_null(self, value: Any | Expr | None = None) -> Expr: value = Expr.literal(value) return Expr(functions_internal.nvl(self.expr, value.expr)) - _to_pyarrow_types = { + _to_pyarrow_types: ClassVar[dict[type, pa.DataType]] = { float: pa.float64(), int: pa.int64(), str: pa.string(), bool: pa.bool_(), } - def cast( - self, to: pa.DataType[Any] | Type[float] | Type[int] | Type[str] | Type[bool] - ) -> Expr: + def cast(self, to: pa.DataType[Any] | type[float | int | str | bool]) -> Expr: """Cast to a new data type.""" if not isinstance(to, pa.DataType): try: to = self._to_pyarrow_types[to] - except KeyError: - raise TypeError( - "Expected instance of pyarrow.DataType or builtins.type" - ) + except KeyError as err: + error_msg = "Expected instance of pyarrow.DataType or builtins.type" + raise TypeError(error_msg) from err return Expr(self.expr.cast(to)) @@ -565,9 +562,7 @@ def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder: set parameters for either window or aggregate functions. If used on any other type of expression, an error will be generated when ``build()`` is called. """ - return ExprFuncBuilder( - self.expr.partition_by(list(e.expr for e in partition_by)) - ) + return ExprFuncBuilder(self.expr.partition_by([e.expr for e in partition_by])) def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: """Set the frame fora window function. @@ -610,7 +605,7 @@ def over(self, window: Window) -> Expr: class ExprFuncBuilder: - def __init__(self, builder: expr_internal.ExprFuncBuilder): + def __init__(self, builder: expr_internal.ExprFuncBuilder) -> None: self.builder = builder def order_by(self, *exprs: Expr) -> ExprFuncBuilder: @@ -638,7 +633,7 @@ def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder: def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder: """Set partitioning for window functions.""" return ExprFuncBuilder( - self.builder.partition_by(list(e.expr for e in partition_by)) + self.builder.partition_by([e.expr for e in partition_by]) ) def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder: @@ -693,11 +688,11 @@ def __init__( """ if not isinstance(start_bound, pa.Scalar) and start_bound is not None: start_bound = pa.scalar(start_bound) - if units == "rows" or units == "groups": + if units in ("rows", "groups"): start_bound = start_bound.cast(pa.uint64()) if not isinstance(end_bound, pa.Scalar) and end_bound is not None: end_bound = pa.scalar(end_bound) - if units == "rows" or units == "groups": + if units in ("rows", "groups"): end_bound = end_bound.cast(pa.uint64()) self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound) @@ -709,7 +704,7 @@ def get_lower_bound(self) -> WindowFrameBound: """Returns starting bound.""" return WindowFrameBound(self.window_frame.get_lower_bound()) - def get_upper_bound(self): + def get_upper_bound(self) -> WindowFrameBound: """Returns end bound.""" return WindowFrameBound(self.window_frame.get_upper_bound()) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 26bac149c..5cf914e16 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -790,10 +790,7 @@ def regexp_count( """ if flags is not None: flags = flags.expr - if start is not None: - start = start.expr - else: - start = Expr.expr + start = start.expr if start is not None else Expr.expr return Expr(f.regexp_count(string.expr, pattern.expr, start, flags)) @@ -817,13 +814,15 @@ def right(string: Expr, n: Expr) -> Expr: return Expr(f.right(string.expr, n.expr)) -def round(value: Expr, decimal_places: Expr = Expr.literal(0)) -> Expr: +def round(value: Expr, decimal_places: Expr | None = None) -> Expr: """Round the argument to the nearest integer. If the optional ``decimal_places`` is specified, round to the nearest number of decimal places. You can specify a negative number of decimal places. For example ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``. """ + if decimal_places is None: + decimal_places = Expr.literal(0) return Expr(f.round(value.expr, decimal_places.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 161e1e3bb..37f2075f5 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -81,7 +81,7 @@ def test_literal(df): literal("1"), literal("OK"), literal(3.14), - literal(True), + literal(value=True), literal(b"hello world"), ) result = df.collect() diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py index a2de2d32b..926a65961 100644 --- a/python/tests/test_wrapper_coverage.py +++ b/python/tests/test_wrapper_coverage.py @@ -28,7 +28,7 @@ from enum import EnumMeta as EnumType -def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 +def missing_exports(internal_obj, wrapped_obj) -> None: """ Identify if any of the rust exposted structs or functions do not have wrappers. @@ -56,9 +56,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None: # noqa: C901 # __kwdefaults__ and __doc__. As long as these are None on the internal # object, it's okay to skip them. However if they do exist on the internal # object they must also exist on the wrapped object. - if internal_attr is not None: - if wrapped_attr is None: - pytest.fail(f"Missing attribute: {internal_attr_name}") + if internal_attr is not None and wrapped_attr is None: + pytest.fail(f"Missing attribute: {internal_attr_name}") if internal_attr_name in ["__self__", "__class__"]: continue From 42982dad27ad03e7e9395d4c3ae3064c2b489434 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 22 Mar 2025 10:14:55 -0400 Subject: [PATCH 027/206] Improve collection during repr and repr_html (#1036) * Improve table readout of a dataframe in jupyter notebooks by making the table scrollable and displaying the first record batch up to 2MB * Add option to only display a portion of a cell data and the user can click on a button to toggle showing more or less * We cannot expect that the first non-empy batch is sufficient for our 2MB limit, so switch over to collecting until we run out or use up the size * Update python unit test to allow the additional formatting data to exist and only check the table contents * Combining collection for repr and repr_html into one function * Small clippy suggestion * Collect was occuring twice on repr * Switch to execute_stream_partitioned --- python/tests/test_dataframe.py | 23 ++-- src/dataframe.rs | 240 ++++++++++++++++++++++++++++----- src/utils.rs | 2 +- 3 files changed, 225 insertions(+), 40 deletions(-) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 384b17878..718ebf69d 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import os +import re from typing import Any import pyarrow as pa @@ -1245,13 +1246,17 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: def test_dataframe_repr_html(df) -> None: output = df._repr_html_() - ref_html = """
- - - - -
abc
148
255
368
- """ + # Since we've added a fair bit of processing to the html output, lets just verify + # the values we are expecting in the table exist. Use regex and ignore everything + # between the and . We also don't want the closing > on the + # td and th segments because that is where the formatting data is written. - # Ignore whitespace just to make this test look cleaner - assert output.replace(" ", "") == ref_html.replace(" ", "") + headers = ["a", "b", "c"] + headers = [f"{v}" for v in headers] + header_pattern = "(.*?)".join(headers) + assert len(re.findall(header_pattern, output, re.DOTALL)) == 1 + + body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] + body_lines = [f"{v}" for inner in body_data for v in inner] + body_pattern = "(.*?)".join(body_lines) + assert len(re.findall(body_pattern, output, re.DOTALL)) == 1 diff --git a/src/dataframe.rs b/src/dataframe.rs index 243e2e14f..be10b8c28 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -31,9 +31,11 @@ use datafusion::common::UnnestOptions; use datafusion::config::{CsvOptions, TableParquetOptions}; use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; use datafusion::datasource::TableProvider; +use datafusion::error::DataFusionError; use datafusion::execution::SendableRecordBatchStream; use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use datafusion::prelude::*; +use futures::{StreamExt, TryStreamExt}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; @@ -70,6 +72,9 @@ impl PyTableProvider { PyTable::new(table_provider) } } +const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB +const MIN_TABLE_ROWS_TO_DISPLAY: usize = 20; +const MAX_LENGTH_CELL_WITHOUT_MINIMIZE: usize = 25; /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. @@ -111,56 +116,151 @@ impl PyDataFrame { } fn __repr__(&self, py: Python) -> PyDataFusionResult { - let df = self.df.as_ref().clone().limit(0, Some(10))?; - let batches = wait_for_future(py, df.collect())?; - let batches_as_string = pretty::pretty_format_batches(&batches); - match batches_as_string { - Ok(batch) => Ok(format!("DataFrame()\n{batch}")), - Err(err) => Ok(format!("Error: {:?}", err.to_string())), + let (batches, has_more) = wait_for_future( + py, + collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10), + )?; + if batches.is_empty() { + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); } - } - fn _repr_html_(&self, py: Python) -> PyDataFusionResult { - let mut html_str = "\n".to_string(); + let batches_as_displ = + pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?; + + let additional_str = match has_more { + true => "\nData truncated.", + false => "", + }; - let df = self.df.as_ref().clone().limit(0, Some(10))?; - let batches = wait_for_future(py, df.collect())?; + Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}")) + } + fn _repr_html_(&self, py: Python) -> PyDataFusionResult { + let (batches, has_more) = wait_for_future( + py, + collect_record_batches_to_display( + self.df.as_ref().clone(), + MIN_TABLE_ROWS_TO_DISPLAY, + usize::MAX, + ), + )?; if batches.is_empty() { - html_str.push_str("
\n"); - return Ok(html_str); + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); } + let table_uuid = uuid::Uuid::new_v4().to_string(); + + let mut html_str = " + + +
+ + \n".to_string(); + let schema = batches[0].schema(); let mut header = Vec::new(); for field in schema.fields() { - header.push(format!("", field.name())); } let header_str = header.join(""); - html_str.push_str(&format!("{}\n", header_str)); - - for batch in batches { - let formatters = batch - .columns() - .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) - .map(|c| { - c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) - }) - .collect::, _>>()?; - - for row in 0..batch.num_rows() { + html_str.push_str(&format!("{}\n", header_str)); + + let batch_formatters = batches + .iter() + .map(|batch| { + batch + .columns() + .iter() + .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) + .map(|c| { + c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) + }) + .collect::, _>>() + }) + .collect::, _>>()?; + + let rows_per_batch = batches.iter().map(|batch| batch.num_rows()); + + // We need to build up row by row for html + let mut table_row = 0; + for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) { + for batch_row in 0..num_rows_in_batch { + table_row += 1; let mut cells = Vec::new(); - for formatter in &formatters { - cells.push(format!("", formatter.value(row))); + for (col, formatter) in batch_formatter.iter().enumerate() { + let cell_data = formatter.value(batch_row).to_string(); + // From testing, primitive data types do not typically get larger than 21 characters + if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE { + let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE]; + cells.push(format!(" + ")); + } else { + cells.push(format!("", formatter.value(batch_row))); + } } let row_str = cells.join(""); html_str.push_str(&format!("{}\n", row_str)); } } + html_str.push_str("
{}", field.name())); + header.push(format!("{}
{} +
+ {short_cell_data} + {cell_data} + +
+
{}
\n"); + + html_str.push_str(" + + "); - html_str.push_str("\n"); + if has_more { + html_str.push_str("Data truncated due to size."); + } Ok(html_str) } @@ -771,3 +871,83 @@ fn record_batch_into_schema( RecordBatch::try_new(schema, data_arrays) } + +/// This is a helper function to return the first non-empty record batch from executing a DataFrame. +/// It additionally returns a bool, which indicates if there are more record batches available. +/// We do this so we can determine if we should indicate to the user that the data has been +/// truncated. This collects until we have achived both of these two conditions +/// +/// - We have collected our minimum number of rows +/// - We have reached our limit, either data size or maximum number of rows +/// +/// Otherwise it will return when the stream has exhausted. If you want a specific number of +/// rows, set min_rows == max_rows. +async fn collect_record_batches_to_display( + df: DataFrame, + min_rows: usize, + max_rows: usize, +) -> Result<(Vec, bool), DataFusionError> { + let partitioned_stream = df.execute_stream_partitioned().await?; + let mut stream = futures::stream::iter(partitioned_stream).flatten(); + let mut size_estimate_so_far = 0; + let mut rows_so_far = 0; + let mut record_batches = Vec::default(); + let mut has_more = false; + + while (size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows) + || rows_so_far < min_rows + { + let mut rb = match stream.next().await { + None => { + break; + } + Some(Ok(r)) => r, + Some(Err(e)) => return Err(e), + }; + + let mut rows_in_rb = rb.num_rows(); + if rows_in_rb > 0 { + size_estimate_so_far += rb.get_array_memory_size(); + + if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY { + let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32; + let total_rows = rows_in_rb + rows_so_far; + + let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize; + if reduced_row_num < min_rows { + reduced_row_num = min_rows.min(total_rows); + } + + let limited_rows_this_rb = reduced_row_num - rows_so_far; + if limited_rows_this_rb < rows_in_rb { + rows_in_rb = limited_rows_this_rb; + rb = rb.slice(0, limited_rows_this_rb); + has_more = true; + } + } + + if rows_in_rb + rows_so_far > max_rows { + rb = rb.slice(0, max_rows - rows_so_far); + has_more = true; + } + + rows_so_far += rb.num_rows(); + record_batches.push(rb); + } + } + + if record_batches.is_empty() { + return Ok((Vec::default(), false)); + } + + if !has_more { + // Data was not already truncated, so check to see if more record batches remain + has_more = match stream.try_next().await { + Ok(None) => false, // reached end + Ok(Some(_)) => true, + Err(_) => false, // Stream disconnected + }; + } + + Ok((record_batches, has_more)) +} diff --git a/src/utils.rs b/src/utils.rs index 999aad755..3487de21b 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -42,7 +42,7 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { #[inline] pub(crate) fn get_global_ctx() -> &'static SessionContext { static CTX: OnceLock = OnceLock::new(); - CTX.get_or_init(|| SessionContext::new()) + CTX.get_or_init(SessionContext::new) } /// Utility to collect rust futures with GIL released From d0315ffa704aba467f769f444208b7ce26d83037 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 22 Mar 2025 14:37:24 -0400 Subject: [PATCH 028/206] feat: Update DataFusion dependency to 46 (#1079) * Update DataFusion dependency to 46 * There was an update upstream in the exec but it is not a breaking change and only needs unit test updates --- Cargo.lock | 296 +++++++++++++++++++-------------- Cargo.toml | 18 +- python/tests/test_dataframe.py | 3 +- src/expr.rs | 39 +++-- src/expr/aggregate.rs | 10 +- src/expr/aggregate_expr.rs | 11 +- src/expr/window.rs | 24 ++- src/functions.rs | 34 ++-- 8 files changed, 252 insertions(+), 183 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c7f2bf3c..3a4915f23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" +checksum = "84ef243634a39fb6e9d1710737e7a5ef96c9bacabd2326859ff889bc9ef755e5" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" +checksum = "8f420c6aef51dad2e4a96ce29c0ec90ad84880bdb60b321c74c652a6be07b93f" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" +checksum = "24bda5ff6461a4ff9739959b3d57b377f45e3f878f7be1a4f28137c0a8f339fa" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" +checksum = "bc6ed265c73f134a583d02c3cab5e16afab9446d8048ede8707e31f85fad58a0" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" +checksum = "01c648572391edcef10e5fd458db70ba27ed6f71bcaee04397d0cfb100b34f8b" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" +checksum = "a02fb265a6d8011a7d3ad1a36f25816ad0a3bb04cb8e9fe7929c165b98c0cbcd" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" +checksum = "5f2cebf504bb6a92a134a87fff98f01b14fbb3a93ecf7aef90cd0f888c5fffa4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" +checksum = "8e6405b287671c88846e7751f7291f717b164911474cabac6d3d8614d5aa7374" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" +checksum = "5329bf9e7390cbb6b117ddd4d82e94c5362ea4cab5095697139429f36a38350c" dependencies = [ "arrow-array", "arrow-buffer", @@ -319,16 +319,18 @@ dependencies = [ "half", "indexmap", "lexical-core", + "memchr", "num", "serde", "serde_json", + "simdutf8", ] [[package]] name = "arrow-ord" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" +checksum = "e103c13d4b80da28339c1d7aa23dd85bd59f42158acc45d39eeb6770627909ce" dependencies = [ "arrow-array", "arrow-buffer", @@ -339,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" +checksum = "170549a11b8534f3097a0619cfe89c42812345dc998bcf81128fc700b84345b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -352,18 +354,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" +checksum = "a5c53775bba63f319189f366d2b86e9a8889373eb198f07d8544938fc9f8ed9a" dependencies = [ "bitflags 2.8.0", ] [[package]] name = "arrow-select" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" +checksum = "0a99003b2eb562b8d9c99dfb672306f15e94b20d3734179d596895703e821dcf" dependencies = [ "ahash", "arrow-array", @@ -375,9 +377,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.2.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" +checksum = "90fdb130ee8325f4cd8262e19bb6baa3cbcef2b2573c4bee8c6fda7ea08199d7" dependencies = [ "arrow-array", "arrow-buffer", @@ -535,9 +537,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.5" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" +checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" dependencies = [ "arrayref", "arrayvec", @@ -649,15 +651,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-targets", + "windows-link", ] [[package]] @@ -864,30 +866,32 @@ dependencies = [ [[package]] name = "datafusion" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" dependencies = [ "apache-avro", "arrow", - "arrow-array", "arrow-ipc", "arrow-schema", - "async-compression", "async-trait", "bytes", "bzip2 0.5.1", "chrono", "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", + "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -896,7 +900,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "glob", "itertools 0.14.0", "log", "num-traits", @@ -908,7 +911,6 @@ dependencies = [ "sqlparser", "tempfile", "tokio", - "tokio-util", "url", "uuid", "xz2", @@ -917,9 +919,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" dependencies = [ "arrow", "async-trait", @@ -933,22 +935,40 @@ dependencies = [ "itertools 0.14.0", "log", "parking_lot", - "sqlparser", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "log", + "object_store", + "tokio", ] [[package]] name = "datafusion-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" dependencies = [ "ahash", "apache-avro", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ipc", - "arrow-schema", "base64 0.22.1", "half", "hashbrown 0.14.5", @@ -966,25 +986,59 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-datasource" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.5.1", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + [[package]] name = "datafusion-doc" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" +checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" [[package]] name = "datafusion-execution" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" dependencies = [ "arrow", "dashmap", @@ -1001,9 +1055,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" dependencies = [ "arrow", "chrono", @@ -1022,26 +1076,25 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" dependencies = [ "arrow", "datafusion-common", + "indexmap", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-ffi" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8" +checksum = "d740dd9f32a4f4ed1b907e6934201bb059efe6c877532512c661771d973c7b21" dependencies = [ "abi_stable", "arrow", - "arrow-array", - "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1055,9 +1108,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" dependencies = [ "arrow", "arrow-buffer", @@ -1071,7 +1124,6 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-macros", - "hashbrown 0.14.5", "hex", "itertools 0.14.0", "log", @@ -1085,14 +1137,12 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1108,9 +1158,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" dependencies = [ "ahash", "arrow", @@ -1121,15 +1171,12 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" dependencies = [ "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", - "arrow-schema", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1145,9 +1192,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" dependencies = [ "arrow", "async-trait", @@ -1161,9 +1208,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1178,9 +1225,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1188,9 +1235,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" dependencies = [ "datafusion-expr", "quote", @@ -1199,9 +1246,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" dependencies = [ "arrow", "chrono", @@ -1218,15 +1265,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1243,13 +1287,12 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1258,12 +1301,11 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1271,23 +1313,19 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "futures", "itertools 0.14.0", "log", "recursive", - "url", ] [[package]] name = "datafusion-physical-plan" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" dependencies = [ "ahash", "arrow", - "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait", @@ -1312,9 +1350,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c" +checksum = "6f6ef4c6eb52370cb48639e25e2331a415aac0b2b0a0a472b36e26603bdf184f" dependencies = [ "arrow", "chrono", @@ -1328,9 +1366,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578" +checksum = "5faf4a9bbb0d0a305fea8a6db21ba863286b53e53a212e687d2774028dd6f03f" dependencies = [ "arrow", "datafusion-common", @@ -1362,13 +1400,11 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" dependencies = [ "arrow", - "arrow-array", - "arrow-schema", "bigdecimal", "datafusion-common", "datafusion-expr", @@ -1381,11 +1417,10 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "45.0.0" +version = "46.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944" +checksum = "2c2be3226a683e02cff65181e66e62eba9f812ed0e9b7ec8fe11ac8dabf1a73f" dependencies = [ - "arrow-buffer", "async-recursion", "async-trait", "chrono", @@ -1395,6 +1430,7 @@ dependencies = [ "pbjson-types", "prost", "substrait", + "tokio", "url", ] @@ -1472,9 +1508,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" dependencies = [ "crc32fast", "miniz_oxide", @@ -2117,9 +2153,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libflate" @@ -2447,9 +2483,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.1.0" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" +checksum = "94243778210509a5a5e9e012872127180c155d73a9cd6e2df9243d213e81e100" dependencies = [ "ahash", "arrow-array", @@ -2479,7 +2515,6 @@ dependencies = [ "tokio", "twox-hash", "zstd", - "zstd-sys", ] [[package]] @@ -3401,11 +3436,12 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.54.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -3466,9 +3502,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.52.3" +version = "0.53.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756" +checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b" dependencies = [ "heck", "pbjson", @@ -3922,12 +3958,14 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.13.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "getrandom 0.3.1", + "js-sys", "serde", + "wasm-bindgen", ] [[package]] @@ -4114,6 +4152,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-registry" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 50967a219..8afabdd82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,24 +34,24 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.43", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} -arrow = { version = "54", features = ["pyarrow"] } -datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "45.0.0", optional = true } -datafusion-proto = { version = "45.0.0" } -datafusion-ffi = { version = "45.0.0" } -prost = "0.13" # keep in line with `datafusion-substrait` +arrow = { version = "54.2.1", features = ["pyarrow"] } +datafusion = { version = "46.0.1", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "46.0.1", optional = true } +datafusion-proto = { version = "46.0.1" } +datafusion-ffi = { version = "46.0.1" } +prost = "0.13.1" # keep in line with `datafusion-substrait` uuid = { version = "1.12", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } -async-trait = "0.1" +async-trait = "0.1.73" futures = "0.3" object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] } url = "2" [build-dependencies] -prost-types = "0.13" # keep in line with `datafusion-substrait` +prost-types = "0.13.1" # keep in line with `datafusion-substrait` pyo3-build-config = "0.23" [lib] diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 718ebf69d..eda13930d 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -753,7 +753,8 @@ def test_execution_plan(aggregate_df): assert "AggregateExec:" in indent assert "CoalesceBatchesExec:" in indent assert "RepartitionExec:" in indent - assert "CsvExec:" in indent + assert "DataSourceExec:" in indent + assert "file_type=csv" in indent ctx = SessionContext() rows_returned = 0 diff --git a/src/expr.rs b/src/expr.rs index d3c528eb4..561170289 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams}; use datafusion::logical_expr::utils::exprlist_to_fields; use datafusion::logical_expr::{ ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition, @@ -172,6 +173,7 @@ impl PyExpr { Expr::ScalarSubquery(value) => { Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?) } + #[allow(deprecated)] Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", qualifier, options @@ -332,7 +334,6 @@ impl PyExpr { | Expr::AggregateFunction { .. } | Expr::WindowFunction { .. } | Expr::InList { .. } - | Expr::Wildcard { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } | Expr::GroupingSet(..) @@ -346,6 +347,10 @@ impl PyExpr { | Expr::Unnest(_) | Expr::IsNotUnknown(_) => RexType::Call, Expr::ScalarSubquery(..) => RexType::ScalarSubquery, + #[allow(deprecated)] + Expr::Wildcard { .. } => { + return Err(py_unsupported_variant_err("Expr::Wildcard is unsupported")) + } }) } @@ -394,11 +399,15 @@ impl PyExpr { | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]), // Expr variants containing a collection of Expr(s) for operands - Expr::AggregateFunction(AggregateFunction { args, .. }) + Expr::AggregateFunction(AggregateFunction { + params: AggregateFunctionParams { args, .. }, + .. + }) | Expr::ScalarFunction(ScalarFunction { args, .. }) - | Expr::WindowFunction(WindowFunction { args, .. }) => { - Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()) - } + | Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { args, .. }, + .. + }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()), // Expr(s) that require more specific processing Expr::Case(Case { @@ -465,13 +474,17 @@ impl PyExpr { Expr::GroupingSet(..) | Expr::Unnest(_) | Expr::OuterReferenceColumn(_, _) - | Expr::Wildcard { .. } | Expr::ScalarSubquery(..) | Expr::Placeholder { .. } | Expr::Exists { .. } => Err(py_runtime_err(format!( "Unimplemented Expr type: {}", self.expr ))), + + #[allow(deprecated)] + Expr::Wildcard { .. } => { + Err(py_unsupported_variant_err("Expr::Wildcard is unsupported")) + } } } @@ -575,7 +588,7 @@ impl PyExpr { Expr::AggregateFunction(agg_fn) => { let window_fn = Expr::WindowFunction(WindowFunction::new( WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()), - agg_fn.args.clone(), + agg_fn.params.args.clone(), )); add_builder_fns_to_window( @@ -663,16 +676,8 @@ impl PyExpr { /// Create a [Field] representing an [Expr], given an input [LogicalPlan] to resolve against pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> PyDataFusionResult> { - match expr { - Expr::Wildcard { .. } => { - // Since * could be any of the valid column names just return the first one - Ok(Arc::new(input_plan.schema().field(0).clone())) - } - _ => { - let fields = exprlist_to_fields(&[expr.clone()], input_plan)?; - Ok(fields[0].1.clone()) - } - } + let fields = exprlist_to_fields(&[expr.clone()], input_plan)?; + Ok(fields[0].1.clone()) } fn _types(expr: &Expr) -> PyResult { match expr { diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index 8fc9da5b0..a99d83d23 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::common::DataFusionError; -use datafusion::logical_expr::expr::{AggregateFunction, Alias}; +use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias}; use datafusion::logical_expr::logical_plan::Aggregate; use datafusion::logical_expr::Expr; use pyo3::{prelude::*, IntoPyObjectExt}; @@ -126,9 +126,11 @@ impl PyAggregate { match expr { // TODO: This Alias logic seems to be returning some strange results that we should investigate Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()), - Expr::AggregateFunction(AggregateFunction { func: _, args, .. }) => { - Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()) - } + Expr::AggregateFunction(AggregateFunction { + func: _, + params: AggregateFunctionParams { args, .. }, + .. + }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()), _ => Err(py_type_err( "Encountered a non Aggregate type in aggregation_arguments", )), diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs index 09471097f..c09f116e3 100644 --- a/src/expr/aggregate_expr.rs +++ b/src/expr/aggregate_expr.rs @@ -40,7 +40,13 @@ impl From for PyAggregateFunction { impl Display for PyAggregateFunction { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let args: Vec = self.aggr.args.iter().map(|expr| expr.to_string()).collect(); + let args: Vec = self + .aggr + .params + .args + .iter() + .map(|expr| expr.to_string()) + .collect(); write!(f, "{}({})", self.aggr.func.name(), args.join(", ")) } } @@ -54,12 +60,13 @@ impl PyAggregateFunction { /// is this a distinct aggregate such as `COUNT(DISTINCT expr)` fn is_distinct(&self) -> bool { - self.aggr.distinct + self.aggr.params.distinct } /// Get the arguments to the aggregate function fn args(&self) -> Vec { self.aggr + .params .args .iter() .map(|expr| PyExpr::from(expr.clone())) diff --git a/src/expr/window.rs b/src/expr/window.rs index 13deaec25..c5467bf94 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -16,7 +16,7 @@ // under the License. use datafusion::common::{DataFusionError, ScalarValue}; -use datafusion::logical_expr::expr::WindowFunction; +use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits}; use pyo3::{prelude::*, IntoPyObjectExt}; use std::fmt::{self, Display, Formatter}; @@ -118,7 +118,10 @@ impl PyWindowExpr { /// Returns order by columns in a window function expression pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { order_by, .. }) => py_sort_expr_list(&order_by), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { order_by, .. }, + .. + }) => py_sort_expr_list(&order_by), other => Err(not_window_function_err(other)), } } @@ -126,9 +129,10 @@ impl PyWindowExpr { /// Return partition by columns in a window function expression pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { partition_by, .. }) => { - py_expr_list(&partition_by) - } + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { partition_by, .. }, + .. + }) => py_expr_list(&partition_by), other => Err(not_window_function_err(other)), } } @@ -136,7 +140,10 @@ impl PyWindowExpr { /// Return input args for window function pub fn get_args(&self, expr: PyExpr) -> PyResult> { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { args, .. }) => py_expr_list(&args), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { args, .. }, + .. + }) => py_expr_list(&args), other => Err(not_window_function_err(other)), } } @@ -152,7 +159,10 @@ impl PyWindowExpr { /// Returns a Pywindow frame for a given window function expression pub fn get_frame(&self, expr: PyExpr) -> Option { match expr.expr.unalias() { - Expr::WindowFunction(WindowFunction { window_frame, .. }) => Some(window_frame.into()), + Expr::WindowFunction(WindowFunction { + params: WindowFunctionParams { window_frame, .. }, + .. + }) => Some(window_frame.into()), _ => None, } } diff --git a/src/functions.rs b/src/functions.rs index 8fac239b4..9c406b95a 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -17,6 +17,7 @@ use datafusion::functions_aggregate::all_default_aggregate_functions; use datafusion::functions_window::all_default_window_functions; +use datafusion::logical_expr::expr::WindowFunctionParams; use datafusion::logical_expr::ExprFunctionExt; use datafusion::logical_expr::WindowFrame; use pyo3::{prelude::*, wrap_pyfunction}; @@ -215,10 +216,7 @@ fn alias(expr: PyExpr, name: &str) -> PyResult { #[pyfunction] fn col(name: &str) -> PyResult { Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Column(Column { - relation: None, - name: name.to_string(), - }), + expr: datafusion::logical_expr::Expr::Column(Column::new_unqualified(name)), }) } @@ -333,19 +331,21 @@ fn window( Ok(PyExpr { expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction { fun, - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.into()) - .collect::>(), - window_frame, - null_treatment: None, + params: WindowFunctionParams { + args: args.into_iter().map(|x| x.expr).collect::>(), + partition_by: partition_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + order_by: order_by + .unwrap_or_default() + .into_iter() + .map(|x| x.into()) + .collect::>(), + window_frame, + null_treatment: None, + }, }), }) } From 583e1e9420906c99b1fbdf57c0138f1e67548008 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 30 Mar 2025 08:44:55 -0400 Subject: [PATCH 029/206] Update changelog and version number (#1089) --- Cargo.lock | 2 +- Cargo.toml | 2 +- dev/changelog/46.0.0.md | 73 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 dev/changelog/46.0.0.md diff --git a/Cargo.lock b/Cargo.lock index 3a4915f23..f90038c50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1377,7 +1377,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "45.2.0" +version = "46.0.0" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 8afabdd82..bc8639d4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "45.2.0" +version = "46.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] diff --git a/dev/changelog/46.0.0.md b/dev/changelog/46.0.0.md new file mode 100644 index 000000000..3e5768099 --- /dev/null +++ b/dev/changelog/46.0.0.md @@ -0,0 +1,73 @@ + + +# Apache DataFusion Python 46.0.0 Changelog + +This release consists of 21 commits from 11 contributors. See credits at the end of this changelog for more information. + +**Implemented enhancements:** + +- feat: reads using global ctx [#982](https://github.com/apache/datafusion-python/pull/982) (ion-elgreco) +- feat: Implementation of udf and udaf decorator [#1040](https://github.com/apache/datafusion-python/pull/1040) (CrystalZhou0529) +- feat: expose regex_count function [#1066](https://github.com/apache/datafusion-python/pull/1066) (nirnayroy) +- feat: Update DataFusion dependency to 46 [#1079](https://github.com/apache/datafusion-python/pull/1079) (timsaucer) + +**Fixed bugs:** + +- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky) +- fix: type checking [#993](https://github.com/apache/datafusion-python/pull/993) (chenkovsky) + +**Other:** + +- [infra] Fail Clippy on rust build warnings [#1029](https://github.com/apache/datafusion-python/pull/1029) (kevinjqliu) +- Add user documentation for the FFI approach [#1031](https://github.com/apache/datafusion-python/pull/1031) (timsaucer) +- build(deps): bump arrow from 54.1.0 to 54.2.0 [#1035](https://github.com/apache/datafusion-python/pull/1035) (dependabot[bot]) +- Chore: Release datafusion-python 45 [#1024](https://github.com/apache/datafusion-python/pull/1024) (timsaucer) +- Enable Dataframe to be converted into views which can be used in register_table [#1016](https://github.com/apache/datafusion-python/pull/1016) (kosiew) +- Add ruff check for missing futures import [#1052](https://github.com/apache/datafusion-python/pull/1052) (timsaucer) +- Enable take comments to assign issues to users [#1058](https://github.com/apache/datafusion-python/pull/1058) (timsaucer) +- Update python min version to 3.9 [#1043](https://github.com/apache/datafusion-python/pull/1043) (kevinjqliu) +- feat/improve ruff test coverage [#1055](https://github.com/apache/datafusion-python/pull/1055) (timsaucer) +- feat/making global context accessible for users [#1060](https://github.com/apache/datafusion-python/pull/1060) (jsai28) +- Renaming Internal Structs [#1059](https://github.com/apache/datafusion-python/pull/1059) (Spaarsh) +- test: add pytest asyncio tests [#1063](https://github.com/apache/datafusion-python/pull/1063) (jsai28) +- Add decorator for udwf [#1061](https://github.com/apache/datafusion-python/pull/1061) (kosiew) +- Add additional ruff suggestions [#1062](https://github.com/apache/datafusion-python/pull/1062) (Spaarsh) +- Improve collection during repr and repr_html [#1036](https://github.com/apache/datafusion-python/pull/1036) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 7 Tim Saucer + 2 Kevin Liu + 2 Spaarsh + 2 jsai28 + 2 kosiew + 1 Chen Chongchen + 1 Chongchen Chen + 1 Crystal Zhou + 1 Ion Koutsouris + 1 Nirnay Roy + 1 dependabot[bot] +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + From ffafb59e1b1b7f49f4ba4507b28ba1cecfb0225a Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Sun, 30 Mar 2025 20:45:15 +0800 Subject: [PATCH 030/206] feat: support unparser (#1088) * support unparser * add license * add export * format * format --- python/datafusion/__init__.py | 3 +- python/datafusion/unparser.py | 80 +++++++++++++++++++++++++++++++++++ python/tests/test_unparser.py | 33 +++++++++++++++ src/lib.rs | 5 +++ src/unparser/dialect.rs | 63 +++++++++++++++++++++++++++ src/unparser/mod.rs | 66 +++++++++++++++++++++++++++++ 6 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 python/datafusion/unparser.py create mode 100644 python/tests/test_unparser.py create mode 100644 src/unparser/dialect.rs create mode 100644 src/unparser/mod.rs diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index d871fdb71..ecf5545bc 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,7 +26,7 @@ except ImportError: import importlib_metadata -from . import functions, object_store, substrait +from . import functions, object_store, substrait, unparser # The following imports are okay to remain as opaque to the user. from ._internal import Config @@ -89,6 +89,7 @@ "udaf", "udf", "udwf", + "unparser", ] diff --git a/python/datafusion/unparser.py b/python/datafusion/unparser.py new file mode 100644 index 000000000..7ca5b9190 --- /dev/null +++ b/python/datafusion/unparser.py @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""This module provides support for unparsing datafusion plans to SQL. + +For additional information about unparsing, see https://docs.rs/datafusion-sql/latest/datafusion_sql/unparser/index.html +""" + +from ._internal import unparser as unparser_internal +from .plan import LogicalPlan + + +class Dialect: + """DataFusion data catalog.""" + + def __init__(self, dialect: unparser_internal.Dialect) -> None: + """This constructor is not typically called by the end user.""" + self.dialect = dialect + + @staticmethod + def default() -> "Dialect": + """Create a new default dialect.""" + return Dialect(unparser_internal.Dialect.default()) + + @staticmethod + def mysql() -> "Dialect": + """Create a new MySQL dialect.""" + return Dialect(unparser_internal.Dialect.mysql()) + + @staticmethod + def postgres() -> "Dialect": + """Create a new PostgreSQL dialect.""" + return Dialect(unparser_internal.Dialect.postgres()) + + @staticmethod + def sqlite() -> "Dialect": + """Create a new SQLite dialect.""" + return Dialect(unparser_internal.Dialect.sqlite()) + + @staticmethod + def duckdb() -> "Dialect": + """Create a new DuckDB dialect.""" + return Dialect(unparser_internal.Dialect.duckdb()) + + +class Unparser: + """DataFusion unparser.""" + + def __init__(self, dialect: Dialect) -> None: + """This constructor is not typically called by the end user.""" + self.unparser = unparser_internal.Unparser(dialect.dialect) + + def plan_to_sql(self, plan: LogicalPlan) -> str: + """Convert a logical plan to a SQL string.""" + return self.unparser.plan_to_sql(plan._raw_plan) + + def with_pretty(self, pretty: bool) -> "Unparser": + """Set the pretty flag.""" + self.unparser = self.unparser.with_pretty(pretty) + return self + + +__all__ = [ + "Dialect", + "Unparser", +] diff --git a/python/tests/test_unparser.py b/python/tests/test_unparser.py new file mode 100644 index 000000000..c4e05780c --- /dev/null +++ b/python/tests/test_unparser.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datafusion.context import SessionContext +from datafusion.unparser import Dialect, Unparser + + +def test_unparser(): + ctx = SessionContext() + df = ctx.sql("SELECT 1") + for dialect in [ + Dialect.mysql(), + Dialect.postgres(), + Dialect.sqlite(), + Dialect.duckdb(), + ]: + unparser = Unparser(dialect) + sql = unparser.plan_to_sql(df.logical_plan()) + assert sql == "SELECT 1" diff --git a/src/lib.rs b/src/lib.rs index ce93ff0c3..6eeda0878 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,6 +52,7 @@ pub mod pyarrow_util; mod record_batch; pub mod sql; pub mod store; +pub mod unparser; #[cfg(feature = "substrait")] pub mod substrait; @@ -103,6 +104,10 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { expr::init_module(&expr)?; m.add_submodule(&expr)?; + let unparser = PyModule::new(py, "unparser")?; + unparser::init_module(&unparser)?; + m.add_submodule(&unparser)?; + // Register the functions as a submodule let funcs = PyModule::new(py, "functions")?; functions::init_module(&funcs)?; diff --git a/src/unparser/dialect.rs b/src/unparser/dialect.rs new file mode 100644 index 000000000..caeef9949 --- /dev/null +++ b/src/unparser/dialect.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion::sql::unparser::dialect::{ + DefaultDialect, Dialect, DuckDBDialect, MySqlDialect, PostgreSqlDialect, SqliteDialect, +}; +use pyo3::prelude::*; + +#[pyclass(name = "Dialect", module = "datafusion.unparser", subclass)] +#[derive(Clone)] +pub struct PyDialect { + pub dialect: Arc, +} + +#[pymethods] +impl PyDialect { + #[staticmethod] + pub fn default() -> Self { + Self { + dialect: Arc::new(DefaultDialect {}), + } + } + #[staticmethod] + pub fn postgres() -> Self { + Self { + dialect: Arc::new(PostgreSqlDialect {}), + } + } + #[staticmethod] + pub fn mysql() -> Self { + Self { + dialect: Arc::new(MySqlDialect {}), + } + } + #[staticmethod] + pub fn sqlite() -> Self { + Self { + dialect: Arc::new(SqliteDialect {}), + } + } + #[staticmethod] + pub fn duckdb() -> Self { + Self { + dialect: Arc::new(DuckDBDialect::new()), + } + } +} diff --git a/src/unparser/mod.rs b/src/unparser/mod.rs new file mode 100644 index 000000000..b4b0fed10 --- /dev/null +++ b/src/unparser/mod.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod dialect; + +use std::sync::Arc; + +use datafusion::sql::unparser::{dialect::Dialect, Unparser}; +use dialect::PyDialect; +use pyo3::{exceptions::PyValueError, prelude::*}; + +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "Unparser", module = "datafusion.unparser", subclass)] +#[derive(Clone)] +pub struct PyUnparser { + dialect: Arc, + pretty: bool, +} + +#[pymethods] +impl PyUnparser { + #[new] + pub fn new(dialect: PyDialect) -> Self { + Self { + dialect: dialect.dialect.clone(), + pretty: false, + } + } + + pub fn plan_to_sql(&self, plan: &PyLogicalPlan) -> PyResult { + let mut unparser = Unparser::new(self.dialect.as_ref()); + unparser = unparser.with_pretty(self.pretty); + let sql = unparser + .plan_to_sql(&plan.plan()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(sql.to_string()) + } + + pub fn with_pretty(&self, pretty: bool) -> Self { + Self { + dialect: self.dialect.clone(), + pretty, + } + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + Ok(()) +} From 09b929a65c27ce8c58563d4def8d79b426ae47e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20Sch=C3=A4fer?= <33159547+floscha@users.noreply.github.com> Date: Sun, 30 Mar 2025 14:45:49 +0200 Subject: [PATCH 031/206] Documentation updates: mention correct dataset on basics page (#1081) * Documentation updates: mention correct dataset on basics page * Update docs/source/user-guide/basics.rst Co-authored-by: Kevin Liu * Make download hint more concise --------- Co-authored-by: Kevin Liu --- docs/source/user-guide/basics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index f37378a41..6636c0c6a 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -20,8 +20,8 @@ Concepts ======== -In this section, we will cover a basic example to introduce a few key concepts. We will use the same -source file as described in the :ref:`Introduction `, the Pokemon data set. +In this section, we will cover a basic example to introduce a few key concepts. We will use the +2021 Yellow Taxi Trip Records ([download](https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet)), from the [TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page). .. ipython:: python From 818975b5c43021fed109ebba3cb99d744e8f036a Mon Sep 17 00:00:00 2001 From: kosiew Date: Mon, 21 Apr 2025 19:51:25 +0800 Subject: [PATCH 032/206] Add Configurable HTML Table Formatter for DataFusion DataFrames in Python (#1100) * feat: add configurable HTML formatter for DataFrames * fix: update schema iteration in DataFrameHtmlFormatter to use correct format * refactor: remove unused constant MAX_LENGTH_CELL_WITHOUT_MINIMIZE in PyTableProvider * refactor: improve HTML rendering structure in DataFrameHtmlFormatter - Added List import to typing for type hints. - Refactored format_html method to modularize HTML component generation. - Created separate methods for building HTML header, table container, header, body, expandable cells, regular cells, and footer for better readability and maintainability. - Updated table_uuid generation to use f-string for consistency. - Ensured all HTML components are returned as lists for efficient joining. * doc: enhance docstrings for DataFrameHtmlFormatter methods to clarify usage * refactor: enhance DataFrameHtmlFormatter with customizable cell and header styles - Added methods `get_cell_style()` and `get_header_style()` to allow subclasses to customize the CSS styles for table cells and headers. - Updated `_build_table_header()` and `_build_regular_cell()` methods to utilize the new styling methods for improved maintainability. - Introduced a registry for custom type formatters in `DataFrameHtmlFormatter` to enable flexible formatting of cell values based on their types. - Enhanced `_format_cell_value()` to check for registered formatters before defaulting to string conversion, improving extensibility. * refactor: enhance DataFrameHtmlFormatter with custom cell and header builders - Introduced CellFormatter and StyleProvider protocols for better extensibility. - Added DefaultStyleProvider class with default CSS styles for cells and headers. - Updated DataFrameHtmlFormatter to support custom cell and header builders. - Refactored methods to utilize the new style provider for consistent styling. - Improved documentation for methods and classes to clarify usage and customization options. * doc: expand module docstring for DataFrameHtmlFormatter with usage examples and customization options * refactor: streamline HTML formatter by removing extensive docstring examples and enhancing cell formatting methods - Removed lengthy examples from the docstring of DataFrameHtmlFormatter to improve readability. - Added methods for extracting and formatting cell values, enhancing the clarity and maintainability of the code. - Updated cell building methods to utilize the new formatting logic, ensuring consistent application of styles and behaviors. - Introduced a reset fixture for tests to ensure the formatter is returned to default settings after each test case. - Added tests for HTML formatter configuration, custom style providers, type formatters, custom cell builders, and complex customizations to ensure robust functionality. * refactor: improve cell rendering logic in DataFrameHtmlFormatter by utilizing raw values for custom cell builders and optimizing expandable cell creation * refactor: enhance HTML representation in DataFrame by integrating latest formatter and improving cell value formatting logic * refactor: improve HTML formatting logic in DataFrame by separating data collection and schema retrieval for clarity refactor: enhance reset_formatter fixture to preserve original formatter configuration during tests * refactor: add debug utilities for HTML formatter integration testing and enhance debugging output in DataFrameHtmlFormatter * refactor: implement HTML formatter patch for DataFrame and enhance value retrieval in cell formatting * fix: correct typo in file extension check for parquet files in test_write_compressed_parquet * test: add test for DataFrame._repr_html_ to validate HTML output structure * refactor: remove monkeypatch for DataFrame._repr_html_ and associated logic * refactor: simplify _repr_html_ method in DataFrame to directly call internal representation * refactor: remove debug utilities for HTML formatter integration in DataFrame * refactor: remove debug print statements from DataFrameHtmlFormatter and add HTML formatter integration tests - Removed debug print statements from format_html, _build_table_body, and get_formatter methods in DataFrameHtmlFormatter to clean up the code. - Introduced a new debug_utils.py file containing a function to check HTML formatter integration. - Updated __init__.py to include configure_formatter for easier access. - Enhanced DataFrame class to include a docstring for _repr_html_ method. - Added comprehensive tests for HTML formatter configuration, custom style providers, type formatters, and cell/header builders in test_dataframe.py. * refactor: streamline imports and enhance HTML formatter integration in tests - Removed redundant import of `configure_formatter` in `__init__.py`. - Added `configure_formatter` to `__all__` in `__init__.py` for better module exposure. - Cleaned up import statements in `html_formatter.py` for clarity. - Consolidated import statements in `test_dataframe.py` for improved readability. - Simplified the `reset_formatter` fixture by removing unnecessary imports and comments. * refactor: remove redundant imports and debug print statements in HTML formatter tests * refactor: add reset_formatter function to reset global HTML formatter state - Implemented reset_formatter to create a new default DataFrame HTML formatter and update the global reference. - Added clean_formatter_state fixture in tests to ensure a fresh formatter state for each test case. - Updated test cases to use clean_formatter_state instead of the previous reset_formatter implementation. * refactor: enhance DataFrameHtmlFormatter initialization with parameter validation * test: add custom cell builder test for HTML formatter with value-based styling * test: enhance DataFrame HTML representation tests for structure and values * feat: enhance DataFrameHtmlFormatter with shared styles support and reset functionality - Added `use_shared_styles` parameter to control loading of styles/scripts. - Implemented logic to conditionally include styles based on `use_shared_styles`. - Updated the constructor to validate `use_shared_styles` as a boolean. - Introduced `reset_styles_loaded_state` function to reset the styles loaded state. - Modified `reset_formatter` to reset the `_styles_loaded` flag. * refactor: update footer comment in DataFrameHtmlFormatter to clarify content * test: enhance HTML representation test to accommodate span-wrapped values * docs: add usage examples to formatter functions in html_formatter.py * test: add HTML formatter tests for shared styles functionality * feat: add method to check if styles are loaded and enhance schema validation in DataFrameHtmlFormatter * refactor: streamline custom cell builder in HTML formatter tests for clarity and maintainability * fix ruff errors * chore: update license header in html_formatter.py for compliance * refactor: improve HTML formatter tests by updating import statements and enhancing regex patterns for body data * fix clippy errors --- python/datafusion/__init__.py | 2 + python/datafusion/html_formatter.py | 647 ++++++++++++++++++++++++++++ python/tests/test_dataframe.py | 396 ++++++++++++++++- src/dataframe.rs | 130 +----- 4 files changed, 1061 insertions(+), 114 deletions(-) create mode 100644 python/datafusion/html_formatter.py diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index ecf5545bc..60d0d61b4 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -45,6 +45,7 @@ Expr, WindowFrame, ) +from .html_formatter import configure_formatter from .io import read_avro, read_csv, read_json, read_parquet from .plan import ExecutionPlan, LogicalPlan from .record_batch import RecordBatch, RecordBatchStream @@ -76,6 +77,7 @@ "col", "column", "common", + "configure_formatter", "expr", "functions", "lit", diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py new file mode 100644 index 000000000..a50e14fd5 --- /dev/null +++ b/python/datafusion/html_formatter.py @@ -0,0 +1,647 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""HTML formatting utilities for DataFusion DataFrames.""" + +from __future__ import annotations + +from typing import ( + Any, + Callable, + Optional, + Protocol, + runtime_checkable, +) + + +@runtime_checkable +class CellFormatter(Protocol): + """Protocol for cell value formatters.""" + + def __call__(self, value: Any) -> str: + """Format a cell value to string representation.""" + ... + + +@runtime_checkable +class StyleProvider(Protocol): + """Protocol for HTML style providers.""" + + def get_cell_style(self) -> str: + """Get the CSS style for table cells.""" + ... + + def get_header_style(self) -> str: + """Get the CSS style for header cells.""" + ... + + +class DefaultStyleProvider: + """Default implementation of StyleProvider.""" + + def get_cell_style(self) -> str: + """Get the CSS style for table cells. + + Returns: + CSS style string + """ + return ( + "border: 1px solid black; padding: 8px; text-align: left; " + "white-space: nowrap;" + ) + + def get_header_style(self) -> str: + """Get the CSS style for header cells. + + Returns: + CSS style string + """ + return ( + "border: 1px solid black; padding: 8px; text-align: left; " + "background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; " + "max-width: fit-content;" + ) + + +class DataFrameHtmlFormatter: + """Configurable HTML formatter for DataFusion DataFrames. + + This class handles the HTML rendering of DataFrames for display in + Jupyter notebooks and other rich display contexts. + + This class supports extension through composition. Key extension points: + - Provide a custom StyleProvider for styling cells and headers + - Register custom formatters for specific types + - Provide custom cell builders for specialized cell rendering + + Args: + max_cell_length: Maximum characters to display in a cell before truncation + max_width: Maximum width of the HTML table in pixels + max_height: Maximum height of the HTML table in pixels + enable_cell_expansion: Whether to add expand/collapse buttons for long cell + values + custom_css: Additional CSS to include in the HTML output + show_truncation_message: Whether to display a message when data is truncated + style_provider: Custom provider for cell and header styles + use_shared_styles: Whether to load styles and scripts only once per notebook + session + """ + + # Class variable to track if styles have been loaded in the notebook + _styles_loaded = False + + def __init__( + self, + max_cell_length: int = 25, + max_width: int = 1000, + max_height: int = 300, + enable_cell_expansion: bool = True, + custom_css: Optional[str] = None, + show_truncation_message: bool = True, + style_provider: Optional[StyleProvider] = None, + use_shared_styles: bool = True, + ) -> None: + """Initialize the HTML formatter. + + Parameters + ---------- + max_cell_length : int, default 25 + Maximum length of cell content before truncation. + max_width : int, default 1000 + Maximum width of the displayed table in pixels. + max_height : int, default 300 + Maximum height of the displayed table in pixels. + enable_cell_expansion : bool, default True + Whether to allow cells to expand when clicked. + custom_css : str, optional + Custom CSS to apply to the HTML table. + show_truncation_message : bool, default True + Whether to show a message indicating that content has been truncated. + style_provider : StyleProvider, optional + Provider of CSS styles for the HTML table. If None, DefaultStyleProvider + is used. + use_shared_styles : bool, default True + Whether to use shared styles across multiple tables. + + Raises: + ------ + ValueError + If max_cell_length, max_width, or max_height is not a positive integer. + TypeError + If enable_cell_expansion, show_truncation_message, or use_shared_styles is + not a boolean, + or if custom_css is provided but is not a string, + or if style_provider is provided but does not implement the StyleProvider + protocol. + """ + # Validate numeric parameters + + if not isinstance(max_cell_length, int) or max_cell_length <= 0: + msg = "max_cell_length must be a positive integer" + raise ValueError(msg) + if not isinstance(max_width, int) or max_width <= 0: + msg = "max_width must be a positive integer" + raise ValueError(msg) + if not isinstance(max_height, int) or max_height <= 0: + msg = "max_height must be a positive integer" + raise ValueError(msg) + + # Validate boolean parameters + if not isinstance(enable_cell_expansion, bool): + msg = "enable_cell_expansion must be a boolean" + raise TypeError(msg) + if not isinstance(show_truncation_message, bool): + msg = "show_truncation_message must be a boolean" + raise TypeError(msg) + if not isinstance(use_shared_styles, bool): + msg = "use_shared_styles must be a boolean" + raise TypeError(msg) + + # Validate custom_css + if custom_css is not None and not isinstance(custom_css, str): + msg = "custom_css must be None or a string" + raise TypeError(msg) + + # Validate style_provider + if style_provider is not None and not isinstance(style_provider, StyleProvider): + msg = "style_provider must implement the StyleProvider protocol" + raise TypeError(msg) + + self.max_cell_length = max_cell_length + self.max_width = max_width + self.max_height = max_height + self.enable_cell_expansion = enable_cell_expansion + self.custom_css = custom_css + self.show_truncation_message = show_truncation_message + self.style_provider = style_provider or DefaultStyleProvider() + self.use_shared_styles = use_shared_styles + # Registry for custom type formatters + self._type_formatters: dict[type, CellFormatter] = {} + # Custom cell builders + self._custom_cell_builder: Optional[Callable[[Any, int, int, str], str]] = None + self._custom_header_builder: Optional[Callable[[Any], str]] = None + + def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: + """Register a custom formatter for a specific data type. + + Args: + type_class: The type to register a formatter for + formatter: Function that takes a value of the given type and returns + a formatted string + """ + self._type_formatters[type_class] = formatter + + def set_custom_cell_builder( + self, builder: Callable[[Any, int, int, str], str] + ) -> None: + """Set a custom cell builder function. + + Args: + builder: Function that takes (value, row, col, table_id) and returns HTML + """ + self._custom_cell_builder = builder + + def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: + """Set a custom header builder function. + + Args: + builder: Function that takes a field and returns HTML + """ + self._custom_header_builder = builder + + @classmethod + def is_styles_loaded(cls) -> bool: + """Check if HTML styles have been loaded in the current session. + + This method is primarily intended for debugging UI rendering issues + related to style loading. + + Returns: + True if styles have been loaded, False otherwise + + Example: + >>> from datafusion.html_formatter import DataFrameHtmlFormatter + >>> DataFrameHtmlFormatter.is_styles_loaded() + False + """ + return cls._styles_loaded + + def format_html( + self, + batches: list, + schema: Any, + has_more: bool = False, + table_uuid: str | None = None, + ) -> str: + """Format record batches as HTML. + + This method is used by DataFrame's _repr_html_ implementation and can be + called directly when custom HTML rendering is needed. + + Args: + batches: List of Arrow RecordBatch objects + schema: Arrow Schema object + has_more: Whether there are more batches not shown + table_uuid: Unique ID for the table, used for JavaScript interactions + + Returns: + HTML string representation of the data + + Raises: + TypeError: If schema is invalid and no batches are provided + """ + if not batches: + return "No data to display" + + # Validate schema + if schema is None or not hasattr(schema, "__iter__"): + msg = "Schema must be provided" + raise TypeError(msg) + + # Generate a unique ID if none provided + table_uuid = table_uuid or f"df-{id(batches)}" + + # Build HTML components + html = [] + + # Only include styles and scripts if: + # 1. Not using shared styles, OR + # 2. Using shared styles but they haven't been loaded yet + include_styles = ( + not self.use_shared_styles or not DataFrameHtmlFormatter._styles_loaded + ) + + if include_styles: + html.extend(self._build_html_header()) + # If we're using shared styles, mark them as loaded + if self.use_shared_styles: + DataFrameHtmlFormatter._styles_loaded = True + + html.extend(self._build_table_container_start()) + + # Add table header and body + html.extend(self._build_table_header(schema)) + html.extend(self._build_table_body(batches, table_uuid)) + + html.append("") + html.append("") + + # Add footer (JavaScript and messages) + if include_styles and self.enable_cell_expansion: + html.append(self._get_javascript()) + + # Always add truncation message if needed (independent of styles) + if has_more and self.show_truncation_message: + html.append("
Data truncated due to size.
") + + return "\n".join(html) + + def _build_html_header(self) -> list[str]: + """Build the HTML header with CSS styles.""" + html = [] + html.append("") + return html + + def _build_table_container_start(self) -> list[str]: + """Build the opening tags for the table container.""" + html = [] + html.append( + f'
' + ) + html.append('') + return html + + def _build_table_header(self, schema: Any) -> list[str]: + """Build the HTML table header with column names.""" + html = [] + html.append("") + html.append("") + for field in schema: + if self._custom_header_builder: + html.append(self._custom_header_builder(field)) + else: + html.append( + f"" + ) + html.append("") + html.append("") + return html + + def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: + """Build the HTML table body with data rows.""" + html = [] + html.append("") + + row_count = 0 + for batch in batches: + for row_idx in range(batch.num_rows): + row_count += 1 + html.append("") + + for col_idx, column in enumerate(batch.columns): + # Get the raw value from the column + raw_value = self._get_cell_value(column, row_idx) + + # Always check for type formatters first to format the value + formatted_value = self._format_cell_value(raw_value) + + # Then apply either custom cell builder or standard cell formatting + if self._custom_cell_builder: + # Pass both the raw value and formatted value to let the + # builder decide + cell_html = self._custom_cell_builder( + raw_value, row_count, col_idx, table_uuid + ) + html.append(cell_html) + else: + # Standard cell formatting with formatted value + if ( + len(str(raw_value)) > self.max_cell_length + and self.enable_cell_expansion + ): + cell_html = self._build_expandable_cell( + formatted_value, row_count, col_idx, table_uuid + ) + else: + cell_html = self._build_regular_cell(formatted_value) + html.append(cell_html) + + html.append("") + + html.append("") + return html + + def _get_cell_value(self, column: Any, row_idx: int) -> Any: + """Extract a cell value from a column. + + Args: + column: Arrow array + row_idx: Row index + + Returns: + The raw cell value + """ + try: + value = column[row_idx] + + if hasattr(value, "as_py"): + return value.as_py() + except (AttributeError, TypeError): + pass + else: + return value + + def _format_cell_value(self, value: Any) -> str: + """Format a cell value for display. + + Uses registered type formatters if available. + + Args: + value: The cell value to format + + Returns: + Formatted cell value as string + """ + # Check for custom type formatters + for type_cls, formatter in self._type_formatters.items(): + if isinstance(value, type_cls): + return formatter(value) + + # If no formatter matched, return string representation + return str(value) + + def _build_expandable_cell( + self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str + ) -> str: + """Build an expandable cell for long content.""" + short_value = str(formatted_value)[: self.max_cell_length] + return ( + f"" + ) + + def _build_regular_cell(self, formatted_value: str) -> str: + """Build a regular table cell.""" + return ( + f"" + ) + + def _build_html_footer(self, has_more: bool) -> list[str]: + """Build the HTML footer with JavaScript and messages.""" + html = [] + + # Add JavaScript for interactivity only if cell expansion is enabled + # and we're not using the shared styles approach + if self.enable_cell_expansion and not self.use_shared_styles: + html.append(self._get_javascript()) + + # Add truncation message if needed + if has_more and self.show_truncation_message: + html.append("
Data truncated due to size.
") + + return html + + def _get_default_css(self) -> str: + """Get default CSS styles for the HTML table.""" + return """ + .expandable-container { + display: inline-block; + max-width: 200px; + } + .expandable { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + display: block; + } + .full-text { + display: none; + white-space: normal; + } + .expand-btn { + cursor: pointer; + color: blue; + text-decoration: underline; + border: none; + background: none; + font-size: inherit; + display: block; + margin-top: 5px; + } + """ + + def _get_javascript(self) -> str: + """Get JavaScript code for interactive elements.""" + return """ + + """ + + +class FormatterManager: + """Manager class for the global DataFrame HTML formatter instance.""" + + _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() + + @classmethod + def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + """ + cls._default_formatter = formatter + _refresh_formatter_reference() + + @classmethod + def get_formatter(cls) -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + Returns: + The global HTML formatter instance + """ + return cls._default_formatter + + +def get_formatter() -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + This function is used by the DataFrame._repr_html_ implementation to access + the shared formatter instance. It can also be used directly when custom + HTML rendering is needed. + + Returns: + The global HTML formatter instance + + Example: + >>> from datafusion.html_formatter import get_formatter + >>> formatter = get_formatter() + >>> formatter.max_cell_length = 50 # Increase cell length + """ + return FormatterManager.get_formatter() + + +def set_formatter(formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + + Example: + >>> from datafusion.html_formatter import get_formatter, set_formatter + >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) + >>> set_formatter(custom_formatter) + """ + FormatterManager.set_formatter(formatter) + + +def configure_formatter(**kwargs: Any) -> None: + """Configure the global DataFrame HTML formatter. + + This function creates a new formatter with the provided configuration + and sets it as the global formatter for all DataFrames. + + Args: + **kwargs: Formatter configuration parameters like max_cell_length, + max_width, max_height, enable_cell_expansion, etc. + + Example: + >>> from datafusion.html_formatter import configure_formatter + >>> configure_formatter( + ... max_cell_length=50, + ... max_height=500, + ... enable_cell_expansion=True, + ... use_shared_styles=True + ... ) + """ + set_formatter(DataFrameHtmlFormatter(**kwargs)) + + +def reset_formatter() -> None: + """Reset the global DataFrame HTML formatter to default settings. + + This function creates a new formatter with default configuration + and sets it as the global formatter for all DataFrames. + + Example: + >>> from datafusion.html_formatter import reset_formatter + >>> reset_formatter() # Reset formatter to default settings + """ + formatter = DataFrameHtmlFormatter() + # Reset the styles_loaded flag to ensure styles will be reloaded + DataFrameHtmlFormatter._styles_loaded = False + set_formatter(formatter) + + +def reset_styles_loaded_state() -> None: + """Reset the styles loaded state to force reloading of styles. + + This can be useful when switching between notebook sessions or + when styles need to be refreshed. + + Example: + >>> from datafusion.html_formatter import reset_styles_loaded_state + >>> reset_styles_loaded_state() # Force styles to reload in next render + """ + DataFrameHtmlFormatter._styles_loaded = False + + +def _refresh_formatter_reference() -> None: + """Refresh formatter reference in any modules using it. + + This helps ensure that changes to the formatter are reflected in existing + DataFrames that might be caching the formatter reference. + """ + # This is a no-op but signals modules to refresh their reference diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index eda13930d..464b884db 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -28,8 +28,17 @@ column, literal, ) -from datafusion import functions as f +from datafusion import ( + functions as f, +) from datafusion.expr import Window +from datafusion.html_formatter import ( + DataFrameHtmlFormatter, + configure_formatter, + get_formatter, + reset_formatter, + reset_styles_loaded_state, +) from pyarrow.csv import write_csv @@ -102,6 +111,12 @@ def partitioned_df(): return ctx.create_dataframe([[batch]]) +@pytest.fixture +def clean_formatter_state(): + """Reset the HTML formatter after each test.""" + reset_formatter() + + def test_select(df): df_1 = df.select( column("a") + column("b"), @@ -656,6 +671,252 @@ def test_window_frame_defaults_match_postgres(partitioned_df): assert df_2.sort(col_a).to_pydict() == expected +def test_html_formatter_configuration(df, clean_formatter_state): + """Test configuring the HTML formatter with different options.""" + # Configure with custom settings + configure_formatter( + max_cell_length=5, + max_width=500, + max_height=200, + enable_cell_expansion=False, + ) + + html_output = df._repr_html_() + + # Verify our configuration was applied + assert "max-height: 200px" in html_output + assert "max-width: 500px" in html_output + # With cell expansion disabled, we shouldn't see expandable-container elements + assert "expandable-container" not in html_output + + +def test_html_formatter_custom_style_provider(df, clean_formatter_state): + """Test using custom style providers with the HTML formatter.""" + + class CustomStyleProvider: + def get_cell_style(self) -> str: + return ( + "background-color: #f5f5f5; color: #333; padding: 8px; border: " + "1px solid #ddd;" + ) + + def get_header_style(self) -> str: + return ( + "background-color: #4285f4; color: white; font-weight: bold; " + "padding: 10px; border: 1px solid #3367d6;" + ) + + # Configure with custom style provider + configure_formatter(style_provider=CustomStyleProvider()) + + html_output = df._repr_html_() + + # Verify our custom styles were applied + assert "background-color: #4285f4" in html_output + assert "color: white" in html_output + assert "background-color: #f5f5f5" in html_output + + +def test_html_formatter_type_formatters(df, clean_formatter_state): + """Test registering custom type formatters for specific data types.""" + + # Get current formatter and register custom formatters + formatter = get_formatter() + + # Format integers with color based on value + # Using int as the type for the formatter will work since we convert + # Arrow scalar values to Python native types in _get_cell_value + def format_int(value): + return f' 2 else "blue"}">{value}' + + formatter.register_formatter(int, format_int) + + html_output = df._repr_html_() + + # Our test dataframe has values 1,2,3 so we should see: + assert '1' in html_output + + +def test_html_formatter_custom_cell_builder(df, clean_formatter_state): + """Test using a custom cell builder function.""" + + # Create a custom cell builder with distinct styling for different value ranges + def custom_cell_builder(value, row, col, table_id): + try: + num_value = int(value) + if num_value > 5: # Values > 5 get green background with indicator + return ( + '' + ) + if num_value < 3: # Values < 3 get blue background with indicator + return ( + '' + ) + except (ValueError, TypeError): + pass + + # Default styling for other cells (3, 4, 5) + return f'' + + # Set our custom cell builder + formatter = get_formatter() + formatter.set_custom_cell_builder(custom_cell_builder) + + html_output = df._repr_html_() + + # Extract cells with specific styling using regex + low_cells = re.findall( + r'', html_output + ) + mid_cells = re.findall( + r'', html_output + ) + high_cells = re.findall( + r'', html_output + ) + + # Sort the extracted values for consistent comparison + low_cells = sorted(map(int, low_cells)) + mid_cells = sorted(map(int, mid_cells)) + high_cells = sorted(map(int, high_cells)) + + # Verify specific values have the correct styling applied + assert low_cells == [1, 2] # Values < 3 + assert mid_cells == [3, 4, 5, 5] # Values 3-5 + assert high_cells == [6, 8, 8] # Values > 5 + + # Verify the exact content with styling appears in the output + assert ( + '' + in html_output + ) + assert ( + '' + in html_output + ) + assert ( + '' in html_output + ) + assert ( + '' in html_output + ) + assert ( + '' + in html_output + ) + assert ( + '' + in html_output + ) + + # Count occurrences to ensure all cells are properly styled + assert html_output.count("-low") == 2 # Two low values (1, 2) + assert html_output.count("-mid") == 4 # Four mid values (3, 4, 5, 5) + assert html_output.count("-high") == 3 # Three high values (6, 8, 8) + + # Create a custom cell builder that changes background color based on value + def custom_cell_builder(value, row, col, table_id): + # Handle numeric values regardless of their exact type + try: + num_value = int(value) + if num_value > 5: # Values > 5 get green background + return f'' + if num_value < 3: # Values < 3 get light blue background + return f'' + except (ValueError, TypeError): + pass + + # Default styling for other cells + return f'' + + # Set our custom cell builder + formatter = get_formatter() + formatter.set_custom_cell_builder(custom_cell_builder) + + html_output = df._repr_html_() + + # Verify our custom cell styling was applied + assert "background-color: #d3e9f0" in html_output # For values 1,2 + + +def test_html_formatter_custom_header_builder(df, clean_formatter_state): + """Test using a custom header builder function.""" + + # Create a custom header builder with tooltips + def custom_header_builder(field): + tooltips = { + "a": "Primary key column", + "b": "Secondary values", + "c": "Additional data", + } + tooltip = tooltips.get(field.name, "") + return ( + f'' + ) + + # Set our custom header builder + formatter = get_formatter() + formatter.set_custom_header_builder(custom_header_builder) + + html_output = df._repr_html_() + + # Verify our custom headers were applied + assert 'title="Primary key column"' in html_output + assert 'title="Secondary values"' in html_output + assert "background-color: #333; color: white" in html_output + + +def test_html_formatter_complex_customization(df, clean_formatter_state): + """Test combining multiple customization options together.""" + + # Create a dark mode style provider + class DarkModeStyleProvider: + def get_cell_style(self) -> str: + return ( + "background-color: #222; color: #eee; " + "padding: 8px; border: 1px solid #444;" + ) + + def get_header_style(self) -> str: + return ( + "background-color: #111; color: #fff; padding: 10px; " + "border: 1px solid #333;" + ) + + # Configure with dark mode style + configure_formatter( + max_cell_length=10, + style_provider=DarkModeStyleProvider(), + custom_css=""" + .datafusion-table { + font-family: monospace; + border-collapse: collapse; + } + .datafusion-table tr:hover td { + background-color: #444 !important; + } + """, + ) + + # Add type formatters for special formatting - now working with native int values + formatter = get_formatter() + formatter.register_formatter( + int, + lambda n: f'{n}', + ) + + html_output = df._repr_html_() + + # Verify our customizations were applied + assert "background-color: #222" in html_output + assert "background-color: #111" in html_output + assert ".datafusion-table" in html_output + assert "color: #5af" in html_output # Even numbers + + def test_get_dataframe(tmp_path): ctx = SessionContext() @@ -1244,7 +1505,10 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: assert result["new_col"] == [3 for _i in range(3)] -def test_dataframe_repr_html(df) -> None: +def test_dataframe_repr_html_structure(df) -> None: + """Test that DataFrame._repr_html_ produces expected HTML output structure.""" + import re + output = df._repr_html_() # Since we've added a fair bit of processing to the html output, lets just verify @@ -1255,9 +1519,131 @@ def test_dataframe_repr_html(df) -> None: headers = ["a", "b", "c"] headers = [f"{v}" for v in headers] header_pattern = "(.*?)".join(headers) - assert len(re.findall(header_pattern, output, re.DOTALL)) == 1 + header_matches = re.findall(header_pattern, output, re.DOTALL) + assert len(header_matches) == 1 + # Update the pattern to handle values that may be wrapped in spans body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] - body_lines = [f"{v}" for inner in body_data for v in inner] + + body_lines = [ + f"(?:]*?>)?{v}(?:)?" + for inner in body_data + for v in inner + ] body_pattern = "(.*?)".join(body_lines) - assert len(re.findall(body_pattern, output, re.DOTALL)) == 1 + + body_matches = re.findall(body_pattern, output, re.DOTALL) + + assert len(body_matches) == 1, "Expected pattern of values not found in HTML output" + + +def test_dataframe_repr_html_values(df): + """Test that DataFrame._repr_html_ contains the expected data values.""" + html = df._repr_html_() + assert html is not None + + # Create a more flexible pattern that handles values being wrapped in spans + # This pattern will match the sequence of values 1,4,8,2,5,5,3,6,8 regardless + # of formatting + pattern = re.compile( + r"]*?>(?:]*?>)?1(?:)?.*?" + r"]*?>(?:]*?>)?4(?:)?.*?" + r"]*?>(?:]*?>)?8(?:)?.*?" + r"]*?>(?:]*?>)?2(?:)?.*?" + r"]*?>(?:]*?>)?5(?:)?.*?" + r"]*?>(?:]*?>)?5(?:)?.*?" + r"]*?>(?:]*?>)?3(?:)?.*?" + r"]*?>(?:]*?>)?6(?:)?.*?" + r"]*?>(?:]*?>)?8(?:)?", + re.DOTALL, + ) + + # Print debug info if the test fails + matches = re.findall(pattern, html) + if not matches: + print(f"HTML output snippet: {html[:500]}...") # noqa: T201 + + assert len(matches) > 0, "Expected pattern of values not found in HTML output" + + +def test_html_formatter_shared_styles(df, clean_formatter_state): + """Test that shared styles work correctly across multiple tables.""" + + # First, ensure we're using shared styles + configure_formatter(use_shared_styles=True) + + # Get HTML output for first table - should include styles + html_first = df._repr_html_() + + # Verify styles are included in first render + assert " + // Convert record batches to PyObject list + let py_batches = batches + .into_iter() + .map(|rb| rb.to_pyarrow(py)) + .collect::>>()?; -
-
" + f"{field.name}
" + f"
" + "" + "" + f"{formatted_value}" + f"" + f"
" + f"
{formatted_value}{value}-high{value}-low{value}-mid]*>(\d+)-low]*>(\d+)-mid]*>(\d+)-high1-low2-low3-mid4-mid6-high8-high{value}{value}{value}{field.name}
- \n".to_string(); + let py_schema = self.schema().into_pyobject(py)?; - let schema = batches[0].schema(); + // Get the Python formatter module and call format_html + let formatter_module = py.import("datafusion.html_formatter")?; + let get_formatter = formatter_module.getattr("get_formatter")?; + let formatter = get_formatter.call0()?; - let mut header = Vec::new(); - for field in schema.fields() { - header.push(format!("", field.name())); - } - let header_str = header.join(""); - html_str.push_str(&format!("{}\n", header_str)); - - let batch_formatters = batches - .iter() - .map(|batch| { - batch - .columns() - .iter() - .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default())) - .map(|c| { - c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string()))) - }) - .collect::, _>>() - }) - .collect::, _>>()?; - - let rows_per_batch = batches.iter().map(|batch| batch.num_rows()); - - // We need to build up row by row for html - let mut table_row = 0; - for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) { - for batch_row in 0..num_rows_in_batch { - table_row += 1; - let mut cells = Vec::new(); - for (col, formatter) in batch_formatter.iter().enumerate() { - let cell_data = formatter.value(batch_row).to_string(); - // From testing, primitive data types do not typically get larger than 21 characters - if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE { - let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE]; - cells.push(format!(" - ")); - } else { - cells.push(format!("", formatter.value(batch_row))); - } - } - let row_str = cells.join(""); - html_str.push_str(&format!("{}\n", row_str)); - } - } - html_str.push_str("
{}
-
- {short_cell_data} - {cell_data} - -
-
{}
\n"); - - html_str.push_str(" - - "); + // Call format_html method on the formatter + let kwargs = pyo3::types::PyDict::new(py); + let py_batches_list = PyList::new(py, py_batches.as_slice())?; + kwargs.set_item("batches", py_batches_list)?; + kwargs.set_item("schema", py_schema)?; + kwargs.set_item("has_more", has_more)?; + kwargs.set_item("table_uuid", table_uuid)?; - if has_more { - html_str.push_str("Data truncated due to size."); - } + let html_result = formatter.call_method("format_html", (), Some(&kwargs))?; + let html_str: String = html_result.extract()?; Ok(html_str) } @@ -835,7 +747,7 @@ fn record_batch_into_schema( ) -> Result { let schema = Arc::new(schema.clone()); let base_schema = record_batch.schema(); - if base_schema.fields().len() == 0 { + if base_schema.fields().is_empty() { // Nothing to project return Ok(RecordBatch::new_empty(schema)); } From d0d14f6e1584f9569cbf2e36c8a7abc7c70fd903 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 24 Apr 2025 09:38:38 -0400 Subject: [PATCH 033/206] feat: update datafusion dependency 47 (#1107) * Update cargo to use DF47 release candidate * Need to be explicit for collection of Expr due to change in dataframe API * Add missing enum variant * Add missing enum variants * The interface for last_value of aggregates upstream changed * Cargo fmt * last value aggregate without ordering is ill defined * Clippy warning * Set datafusion version to 47 now that it is released --- Cargo.lock | 600 +++++++++++++++++++------------ Cargo.toml | 24 +- python/tests/test_aggregation.py | 1 - src/dataframe.rs | 2 +- src/dataset_exec.rs | 4 +- src/expr.rs | 16 +- src/functions.rs | 37 +- 7 files changed, 415 insertions(+), 269 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f90038c50..b32d19d4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84ef243634a39fb6e9d1710737e7a5ef96c9bacabd2326859ff889bc9ef755e5" +checksum = "3095aaf545942ff5abd46654534f15b03a90fba78299d661e045e5d587222f0d" dependencies = [ "arrow-arith", "arrow-array", @@ -201,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f420c6aef51dad2e4a96ce29c0ec90ad84880bdb60b321c74c652a6be07b93f" +checksum = "00752064ff47cee746e816ddb8450520c3a52cbad1e256f6fa861a35f86c45e7" dependencies = [ "arrow-array", "arrow-buffer", @@ -215,9 +215,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24bda5ff6461a4ff9739959b3d57b377f45e3f878f7be1a4f28137c0a8f339fa" +checksum = "cebfe926794fbc1f49ddd0cdaf898956ca9f6e79541efce62dabccfd81380472" dependencies = [ "ahash", "arrow-buffer", @@ -232,9 +232,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6ed265c73f134a583d02c3cab5e16afab9446d8048ede8707e31f85fad58a0" +checksum = "0303c7ec4cf1a2c60310fc4d6bbc3350cd051a17bf9e9c0a8e47b4db79277824" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01c648572391edcef10e5fd458db70ba27ed6f71bcaee04397d0cfb100b34f8b" +checksum = "335f769c5a218ea823d3760a743feba1ef7857cba114c01399a891c2fff34285" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a02fb265a6d8011a7d3ad1a36f25816ad0a3bb04cb8e9fe7929c165b98c0cbcd" +checksum = "510db7dfbb4d5761826516cc611d97b3a68835d0ece95b034a052601109c0b1b" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f2cebf504bb6a92a134a87fff98f01b14fbb3a93ecf7aef90cd0f888c5fffa4" +checksum = "e8affacf3351a24039ea24adab06f316ded523b6f8c3dbe28fbac5f18743451b" dependencies = [ "arrow-buffer", "arrow-schema", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6405b287671c88846e7751f7291f717b164911474cabac6d3d8614d5aa7374" +checksum = "69880a9e6934d9cba2b8630dd08a3463a91db8693b16b499d54026b6137af284" dependencies = [ "arrow-array", "arrow-buffer", @@ -306,9 +306,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5329bf9e7390cbb6b117ddd4d82e94c5362ea4cab5095697139429f36a38350c" +checksum = "d8dafd17a05449e31e0114d740530e0ada7379d7cb9c338fd65b09a8130960b0" dependencies = [ "arrow-array", "arrow-buffer", @@ -328,9 +328,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e103c13d4b80da28339c1d7aa23dd85bd59f42158acc45d39eeb6770627909ce" +checksum = "895644523af4e17502d42c3cb6b27cb820f0cb77954c22d75c23a85247c849e1" dependencies = [ "arrow-array", "arrow-buffer", @@ -341,9 +341,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170549a11b8534f3097a0619cfe89c42812345dc998bcf81128fc700b84345b8" +checksum = "9be8a2a4e5e7d9c822b2b8095ecd77010576d824f654d347817640acfc97d229" dependencies = [ "arrow-array", "arrow-buffer", @@ -354,18 +354,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c53775bba63f319189f366d2b86e9a8889373eb198f07d8544938fc9f8ed9a" +checksum = "7450c76ab7c5a6805be3440dc2e2096010da58f7cab301fdc996a4ee3ee74e49" dependencies = [ - "bitflags 2.8.0", + "bitflags", ] [[package]] name = "arrow-select" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a99003b2eb562b8d9c99dfb672306f15e94b20d3734179d596895703e821dcf" +checksum = "aa5f5a93c75f46ef48e4001535e7b6c922eeb0aa20b73cf58d09e13d057490d8" dependencies = [ "ahash", "arrow-array", @@ -377,9 +377,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fdb130ee8325f4cd8262e19bb6baa3cbcef2b2573c4bee8c6fda7ea08199d7" +checksum = "6e7005d858d84b56428ba2a98a107fe88c0132c61793cf6b8232a1f9bfc0452b" dependencies = [ "arrow-array", "arrow-buffer", @@ -406,11 +406,11 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.18" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2 0.4.4", + "bzip2 0.5.2", "flate2", "futures-core", "memchr", @@ -438,18 +438,18 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "async-trait" -version = "0.1.86" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -502,9 +502,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bigdecimal" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" dependencies = [ "autocfg", "libm", @@ -514,12 +514,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.8.0" @@ -537,9 +531,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" +checksum = "389a099b34312839e16420d499a9cad9650541715937ffbdd40d36f49e77eeb3" dependencies = [ "arrayref", "arrayvec", @@ -608,21 +602,20 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" dependencies = [ "bzip2-sys", ] [[package]] name = "bzip2-sys" -version = "0.1.12+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -866,23 +859,26 @@ dependencies = [ [[package]] name = "datafusion" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" +checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" dependencies = [ - "apache-avro", "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.5.1", + "bzip2 0.5.2", "chrono", "datafusion-catalog", "datafusion-catalog-listing", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-avro", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", "datafusion-execution", "datafusion-expr", "datafusion-expr-common", @@ -897,12 +893,12 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-session", "datafusion-sql", "flate2", "futures", "itertools 0.14.0", "log", - "num-traits", "object_store", "parking_lot", "parquet", @@ -919,29 +915,35 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" +checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" dependencies = [ "arrow", "async-trait", "dashmap", "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", "datafusion-execution", "datafusion-expr", + "datafusion-physical-expr", "datafusion-physical-plan", + "datafusion-session", "datafusion-sql", "futures", "itertools 0.14.0", "log", + "object_store", "parking_lot", + "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" dependencies = [ "arrow", "async-trait", @@ -953,6 +955,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-session", "futures", "log", "object_store", @@ -961,9 +964,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" +checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" dependencies = [ "ahash", "apache-avro", @@ -986,27 +989,27 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" +checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" dependencies = [ + "futures", "log", "tokio", ] [[package]] name = "datafusion-datasource" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.5.1", + "bzip2 0.5.2", "chrono", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", @@ -1014,13 +1017,16 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-session", "flate2", "futures", "glob", "itertools 0.14.0", "log", "object_store", + "parquet", "rand", + "tempfile", "tokio", "tokio-util", "url", @@ -1028,17 +1034,123 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-avro" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ea5111aab9d3f2a8bff570343cccb03ce4c203875ef5a566b7d6f1eb72559e" +dependencies = [ + "apache-avro", + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "num-traits", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet", + "rand", + "tokio", +] + [[package]] name = "datafusion-doc" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" +checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" [[package]] name = "datafusion-execution" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" +checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" dependencies = [ "arrow", "dashmap", @@ -1055,9 +1167,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" +checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" dependencies = [ "arrow", "chrono", @@ -1076,9 +1188,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" +checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" dependencies = [ "arrow", "datafusion-common", @@ -1089,12 +1201,13 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d740dd9f32a4f4ed1b907e6934201bb059efe6c877532512c661771d973c7b21" +checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c" dependencies = [ "abi_stable", "arrow", + "arrow-schema", "async-ffi", "async-trait", "datafusion", @@ -1108,9 +1221,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" +checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" dependencies = [ "arrow", "arrow-buffer", @@ -1137,9 +1250,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" +checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" dependencies = [ "ahash", "arrow", @@ -1158,9 +1271,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" +checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" dependencies = [ "ahash", "arrow", @@ -1171,9 +1284,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" +checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" dependencies = [ "arrow", "arrow-ord", @@ -1192,9 +1305,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" +checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" dependencies = [ "arrow", "async-trait", @@ -1208,9 +1321,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" +checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1225,9 +1338,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" +checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1235,20 +1348,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" +checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" dependencies = [ "datafusion-expr", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "datafusion-optimizer" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" +checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" dependencies = [ "arrow", "chrono", @@ -1265,9 +1378,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" +checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" dependencies = [ "ahash", "arrow", @@ -1287,9 +1400,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" +checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" dependencies = [ "ahash", "arrow", @@ -1301,9 +1414,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" +checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" dependencies = [ "arrow", "datafusion-common", @@ -1320,9 +1433,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" +checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" dependencies = [ "ahash", "arrow", @@ -1350,9 +1463,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f6ef4c6eb52370cb48639e25e2331a415aac0b2b0a0a472b36e26603bdf184f" +checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83" dependencies = [ "arrow", "chrono", @@ -1366,9 +1479,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5faf4a9bbb0d0a305fea8a6db21ba863286b53e53a212e687d2774028dd6f03f" +checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f" dependencies = [ "arrow", "datafusion-common", @@ -1398,11 +1511,35 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-session" +version = "47.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + [[package]] name = "datafusion-sql" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" +checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" dependencies = [ "arrow", "bigdecimal", @@ -1417,9 +1554,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "46.0.1" +version = "47.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c2be3226a683e02cff65181e66e62eba9f812ed0e9b7ec8fe11ac8dabf1a73f" +checksum = "061efc0937f0ce3abb37ed0d56cfa01dd0e654b90e408656d05e846c8b7599fe" dependencies = [ "async-recursion", "async-trait", @@ -1453,7 +1590,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1498,21 +1635,22 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "24.12.23" +version = "25.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" dependencies = [ - "bitflags 1.3.2", + "bitflags", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", + "libz-rs-sys", "miniz_oxide", ] @@ -1593,7 +1731,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1703,9 +1841,9 @@ dependencies = [ [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", @@ -1986,7 +2124,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2012,9 +2150,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -2207,6 +2345,15 @@ dependencies = [ "libc", ] +[[package]] +name = "libz-rs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a" +dependencies = [ + "zlib-rs", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -2241,7 +2388,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ - "twox-hash", + "twox-hash 1.6.3", ] [[package]] @@ -2297,9 +2444,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.4" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -2407,19 +2554,22 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" +checksum = "e9ce831b09395f933addbc56d894d889e4b226eba304d4e7adbab591e26daf1e" dependencies = [ "async-trait", "base64 0.22.1", "bytes", "chrono", + "form_urlencoded", "futures", + "http", + "http-body-util", "httparse", "humantime", "hyper", - "itertools 0.13.0", + "itertools 0.14.0", "md-5", "parking_lot", "percent-encoding", @@ -2430,7 +2580,8 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "snafu", + "serde_urlencoded", + "thiserror 2.0.11", "tokio", "tracing", "url", @@ -2483,9 +2634,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.3.0" +version = "55.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94243778210509a5a5e9e012872127180c155d73a9cd6e2df9243d213e81e100" +checksum = "cd31a8290ac5b19f09ad77ee7a1e6a541f1be7674ad410547d5f1eef6eef4a9c" dependencies = [ "ahash", "arrow-array", @@ -2513,7 +2664,7 @@ dependencies = [ "snap", "thrift", "tokio", - "twox-hash", + "twox-hash 2.1.0", "zstd", ] @@ -2658,12 +2809,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.29" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ "proc-macro2", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2692,7 +2843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -2701,7 +2852,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.98", + "syn 2.0.100", "tempfile", ] @@ -2712,10 +2863,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2747,9 +2898,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" +checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229" dependencies = [ "cfg-if", "indoc", @@ -2765,9 +2916,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes" -version = "0.23.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "977dc837525cfd22919ba6a831413854beb7c99a256c03bf8624ad707e45810e" +checksum = "dd0b83dc42f9d41f50d38180dad65f0c99763b65a3ff2a81bf351dd35a1df8bf" dependencies = [ "futures", "once_cell", @@ -2778,9 +2929,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" +checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1" dependencies = [ "once_cell", "target-lexicon", @@ -2788,9 +2939,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" +checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc" dependencies = [ "libc", "pyo3-build-config", @@ -2798,27 +2949,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" +checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "pyo3-macros-backend" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" +checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2891,9 +3042,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -2945,7 +3096,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2954,7 +3105,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.8.0", + "bitflags", ] [[package]] @@ -3104,7 +3255,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.8.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -3198,9 +3349,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", "schemars_derive", @@ -3210,14 +3361,14 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3232,7 +3383,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.8.0", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -3251,9 +3402,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" dependencies = [ "serde", ] @@ -3266,9 +3417,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -3284,13 +3435,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3301,14 +3452,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "serde_json" -version = "1.0.138" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -3325,7 +3476,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3397,27 +3548,6 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" -[[package]] -name = "snafu" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "snap" version = "1.1.1" @@ -3436,9 +3566,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.54.0" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", "recursive", @@ -3453,7 +3583,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3497,14 +3627,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "substrait" -version = "0.53.2" +version = "0.55.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b" +checksum = "048fe52a3664881ccdfdc9bdb0f4e8805f3444ee64abf299d365c54f6a2ffabb" dependencies = [ "heck", "pbjson", @@ -3521,7 +3651,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.98", + "syn 2.0.100", "typify", "walkdir", ] @@ -3545,9 +3675,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -3571,14 +3701,14 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "target-lexicon" -version = "0.12.16" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] name = "tempfile" @@ -3620,7 +3750,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3631,7 +3761,7 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3681,9 +3811,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.43.0" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", @@ -3703,7 +3833,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3718,9 +3848,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" dependencies = [ "bytes", "futures-core", @@ -3775,7 +3905,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3818,6 +3948,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "twox-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" + [[package]] name = "typed-arena" version = "2.0.2" @@ -3841,7 +3977,7 @@ checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3875,7 +4011,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.98", + "syn 2.0.100", "thiserror 2.0.11", "unicode-ident", ] @@ -3893,7 +4029,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.98", + "syn 2.0.100", "typify-impl", ] @@ -4030,7 +4166,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -4065,7 +4201,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4276,7 +4412,7 @@ version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags 2.8.0", + "bitflags", ] [[package]] @@ -4320,7 +4456,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4342,7 +4478,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -4362,7 +4498,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "synstructure", ] @@ -4391,9 +4527,15 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] +[[package]] +name = "zlib-rs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8" + [[package]] name = "zstd" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index bc8639d4c..2c4188bb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,25 +34,25 @@ protoc = [ "datafusion-substrait/protoc" ] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.43", features = ["macros", "rt", "rt-multi-thread", "sync"] } -pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } -pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]} -arrow = { version = "54.2.1", features = ["pyarrow"] } -datafusion = { version = "46.0.1", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "46.0.1", optional = true } -datafusion-proto = { version = "46.0.1" } -datafusion-ffi = { version = "46.0.1" } +tokio = { version = "1.44", features = ["macros", "rt", "rt-multi-thread", "sync"] } +pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] } +pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]} +arrow = { version = "55.0.0", features = ["pyarrow"] } +datafusion = { version = "47.0.0", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "47.0.0", optional = true } +datafusion-proto = { version = "47.0.0" } +datafusion-ffi = { version = "47.0.0" } prost = "0.13.1" # keep in line with `datafusion-substrait` -uuid = { version = "1.12", features = ["v4"] } +uuid = { version = "1.16", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] } -async-trait = "0.1.73" +async-trait = "0.1.88" futures = "0.3" -object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] } +object_store = { version = "0.12.0", features = ["aws", "gcp", "azure", "http"] } url = "2" [build-dependencies] prost-types = "0.13.1" # keep in line with `datafusion-substrait` -pyo3-build-config = "0.23" +pyo3-build-config = "0.24" [lib] name = "datafusion_python" diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py index 61b1c7d80..49dfb38cf 100644 --- a/python/tests/test_aggregation.py +++ b/python/tests/test_aggregation.py @@ -338,7 +338,6 @@ def test_bit_and_bool_fns(df, name, expr, result): ), [7, 9], ), - ("last_value", f.last_value(column("a")), [3, 6]), ( "last_value_ordered", f.last_value(column("a"), order_by=[column("a").sort(ascending=False)]), diff --git a/src/dataframe.rs b/src/dataframe.rs index 9b610b5d7..787f63520 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -216,7 +216,7 @@ impl PyDataFrame { #[pyo3(signature = (*args))] fn select(&self, args: Vec) -> PyDataFusionResult { - let expr = args.into_iter().map(|e| e.into()).collect(); + let expr: Vec = args.into_iter().map(|e| e.into()).collect(); let df = self.df.as_ref().clone().select(expr)?; Ok(Self::new(df)) } diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 445e4fe74..aab8d7566 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -275,7 +275,9 @@ impl DisplayAs for DatasetExec { Python::with_gil(|py| { let number_of_fragments = self.fragments.bind(py).len(); match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { + DisplayFormatType::Default + | DisplayFormatType::Verbose + | DisplayFormatType::TreeRender => { let projected_columns: Vec = self .schema .fields() diff --git a/src/expr.rs b/src/expr.rs index 561170289..fe0e76daa 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -714,9 +714,19 @@ impl PyExpr { | Operator::BitwiseXor | Operator::BitwiseAnd | Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary), - Operator::AtArrow | Operator::ArrowAt => { - Err(py_type_err(format!("Unsupported expr: ${op}"))) - } + Operator::AtArrow + | Operator::ArrowAt + | Operator::Arrow + | Operator::LongArrow + | Operator::HashArrow + | Operator::HashLongArrow + | Operator::AtAt + | Operator::IntegerDivide + | Operator::HashMinus + | Operator::AtQuestion + | Operator::Question + | Operator::QuestionAnd + | Operator::QuestionPipe => Err(py_type_err(format!("Unsupported expr: ${op}"))), }, Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value), diff --git a/src/functions.rs b/src/functions.rs index 9c406b95a..476c2b80e 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -375,27 +375,6 @@ macro_rules! aggregate_function { }; } -macro_rules! aggregate_function_vec_args { - ($NAME: ident) => { - aggregate_function_vec_args!($NAME, expr); - }; - ($NAME: ident, $($arg:ident)*) => { - #[pyfunction] - #[pyo3(signature = ($($arg),*, distinct=None, filter=None, order_by=None, null_treatment=None))] - fn $NAME( - $($arg: PyExpr),*, - distinct: Option, - filter: Option, - order_by: Option>, - null_treatment: Option - ) -> PyDataFusionResult { - let agg_fn = functions_aggregate::expr_fn::$NAME(vec![$($arg.into()),*]); - - add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) - } - }; -} - /// Generates a [pyo3] wrapper for [datafusion::functions::expr_fn] /// /// These functions have explicit named arguments. @@ -698,8 +677,22 @@ pub fn approx_percentile_cont_with_weight( add_builder_fns_to_aggregate(agg_fn, None, filter, None, None) } -aggregate_function_vec_args!(last_value); +// We handle first_value explicitly because the signature expects an order_by +// https://github.com/apache/datafusion/issues/12376 +#[pyfunction] +#[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] +pub fn last_value( + expr: PyExpr, + distinct: Option, + filter: Option, + order_by: Option>, + null_treatment: Option, +) -> PyDataFusionResult { + // If we initialize the UDAF with order_by directly, then it gets over-written by the builder + let agg_fn = functions_aggregate::expr_fn::last_value(expr.expr, None); + add_builder_fns_to_aggregate(agg_fn, distinct, filter, order_by, null_treatment) +} // We handle first_value explicitly because the signature expects an order_by // https://github.com/apache/datafusion/issues/12376 #[pyfunction] From c9f15547cb8019068bbf2dc8eaf148d6eb42bd48 Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Fri, 25 Apr 2025 21:01:54 +0800 Subject: [PATCH 034/206] feat: alias with metadata (#1111) * feat: alias with metadata * fmt --- python/datafusion/expr.py | 14 +++++++++++--- python/datafusion/functions.py | 15 ++++++++++++--- python/tests/test_expr.py | 5 +++++ python/tests/test_functions.py | 5 +++++ src/expr.rs | 6 ++++-- src/functions.rs | 9 +++++++-- 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 2697d8143..01e1f3ded 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -406,9 +406,17 @@ def column(value: str) -> Expr: """Creates a new expression representing a column.""" return Expr(expr_internal.RawExpr.column(value)) - def alias(self, name: str) -> Expr: - """Assign a name to the expression.""" - return Expr(self.expr.alias(name)) + def alias(self, name: str, metadata: Optional[dict[str, str]] = None) -> Expr: + """Assign a name to the expression. + + Args: + name: The name to assign to the expression. + metadata: Optional metadata to attach to the expression. + + Returns: + A new expression with the assigned name. + """ + return Expr(self.expr.alias(name, metadata)) def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr: """Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`. diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 5cf914e16..f430cdf4b 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -372,9 +372,18 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So return SortExpr(expr, ascending=ascending, nulls_first=nulls_first) -def alias(expr: Expr, name: str) -> Expr: - """Creates an alias expression.""" - return Expr(f.alias(expr.expr, name)) +def alias(expr: Expr, name: str, metadata: Optional[dict[str, str]] = None) -> Expr: + """Creates an alias expression with an optional metadata dictionary. + + Args: + expr: The expression to alias + name: The alias name + metadata: Optional metadata to attach to the column + + Returns: + An expression with the given alias + """ + return Expr(f.alias(expr.expr, name, metadata)) def col(name: str) -> Expr: diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 926e69845..dcf75f021 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -247,3 +247,8 @@ def test_fill_null(df): assert result.column(0) == pa.array([1, 2, 100]) assert result.column(1) == pa.array([4, 25, 6]) assert result.column(2) == pa.array([1234, 1234, 8]) + + +def test_alias_with_metadata(df): + df = df.select(col("a").alias("b", {"key": "value"})) + assert df.schema().field("b").metadata == {b"key": b"value"} diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 37f2075f5..90cf01f7e 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -1231,3 +1231,8 @@ def test_between_default(df): actual = df.collect()[0].to_pydict() assert actual == expected + + +def test_alias_with_metadata(df): + df = df.select(f.alias(f.col("a"), "b", {"key": "value"})) + assert df.schema().field("b").metadata == {b"key": b"value"} diff --git a/src/expr.rs b/src/expr.rs index fe0e76daa..7d4aa8798 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -22,6 +22,7 @@ use datafusion::logical_expr::{ }; use pyo3::IntoPyObjectExt; use pyo3::{basic::CompareOp, prelude::*}; +use std::collections::HashMap; use std::convert::{From, Into}; use std::sync::Arc; use window::PyWindowFrame; @@ -275,8 +276,9 @@ impl PyExpr { } /// assign a name to the PyExpr - pub fn alias(&self, name: &str) -> PyExpr { - self.expr.clone().alias(name).into() + #[pyo3(signature = (name, metadata=None))] + pub fn alias(&self, name: &str, metadata: Option>) -> PyExpr { + self.expr.clone().alias_with_metadata(name, metadata).into() } /// Create a sort PyExpr from an existing PyExpr. diff --git a/src/functions.rs b/src/functions.rs index 476c2b80e..caa79b8ad 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; + use datafusion::functions_aggregate::all_default_aggregate_functions; use datafusion::functions_window::all_default_window_functions; use datafusion::logical_expr::expr::WindowFunctionParams; @@ -205,10 +207,13 @@ fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult /// Creates a new Alias Expr #[pyfunction] -fn alias(expr: PyExpr, name: &str) -> PyResult { +#[pyo3(signature = (expr, name, metadata=None))] +fn alias(expr: PyExpr, name: &str, metadata: Option>) -> PyResult { let relation: Option = None; Ok(PyExpr { - expr: datafusion::logical_expr::Expr::Alias(Alias::new(expr.expr, relation, name)), + expr: datafusion::logical_expr::Expr::Alias( + Alias::new(expr.expr, relation, name).with_metadata(metadata), + ), }) } From 91b66351fb19d91b62e8db83444141743b106e43 Mon Sep 17 00:00:00 2001 From: kosiew Date: Sun, 27 Apr 2025 21:41:01 +0800 Subject: [PATCH 035/206] Add DataFrame usage guide with HTML rendering customization options (#1108) * docs: enhance user guide with detailed DataFrame operations and examples * move /docs/source/api/dataframe.rst into user-guide * docs: remove DataFrame API documentation * docs: fix formatting inconsistencies in DataFrame user guide * Two minor corrections to documentation rendering --------- Co-authored-by: Tim Saucer --- docs/source/index.rst | 1 + docs/source/user-guide/basics.rst | 5 +- docs/source/user-guide/dataframe.rst | 179 +++++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 docs/source/user-guide/dataframe.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 558b2d572..c18793822 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -72,6 +72,7 @@ Example user-guide/introduction user-guide/basics user-guide/data-sources + user-guide/dataframe user-guide/common-operations/index user-guide/io/index user-guide/configuration diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index 6636c0c6a..2975d9a6b 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -21,7 +21,8 @@ Concepts ======== In this section, we will cover a basic example to introduce a few key concepts. We will use the -2021 Yellow Taxi Trip Records ([download](https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet)), from the [TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page). +2021 Yellow Taxi Trip Records (`download `_), +from the `TLC Trip Record Data `_. .. ipython:: python @@ -72,6 +73,8 @@ DataFrames are typically created by calling a method on :py:class:`~datafusion.c calling the transformation methods, such as :py:func:`~datafusion.dataframe.DataFrame.filter`, :py:func:`~datafusion.dataframe.DataFrame.select`, :py:func:`~datafusion.dataframe.DataFrame.aggregate`, and :py:func:`~datafusion.dataframe.DataFrame.limit` to build up a query definition. +For more details on working with DataFrames, including visualization options and conversion to other formats, see :doc:`dataframe`. + Expressions ----------- diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst new file mode 100644 index 000000000..a78fd8073 --- /dev/null +++ b/docs/source/user-guide/dataframe.rst @@ -0,0 +1,179 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +DataFrames +========== + +Overview +-------- + +DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. +It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust +and Arrow. + +A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation. +The actual execution happens when terminal operations like ``collect()`` or ``show()`` are called. + +Basic Usage +----------- + +.. code-block:: python + + import datafusion + from datafusion import col, lit + + # Create a context and register a data source + ctx = datafusion.SessionContext() + ctx.register_csv("my_table", "path/to/data.csv") + + # Create and manipulate a DataFrame + df = ctx.sql("SELECT * FROM my_table") + + # Or use the DataFrame API directly + df = (ctx.table("my_table") + .filter(col("age") > lit(25)) + .select([col("name"), col("age")])) + + # Execute and collect results + result = df.collect() + + # Display the first few rows + df.show() + +HTML Rendering +-------------- + +When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will +automatically display as formatted HTML tables, making it easier to visualize your data. + +The ``_repr_html_`` method is called automatically by Jupyter to render a DataFrame. This method +controls how DataFrames appear in notebook environments, providing a richer visualization than +plain text output. + +Customizing HTML Rendering +-------------------------- + +You can customize how DataFrames are rendered in HTML by configuring the formatter: + +.. code-block:: python + + from datafusion.html_formatter import configure_formatter + + # Change the default styling + configure_formatter( + max_rows=50, # Maximum number of rows to display + max_width=None, # Maximum width in pixels (None for auto) + theme="light", # Theme: "light" or "dark" + precision=2, # Floating point precision + thousands_separator=",", # Separator for thousands + date_format="%Y-%m-%d", # Date format + truncate_width=20 # Max width for string columns before truncating + ) + +The formatter settings affect all DataFrames displayed after configuration. + +Custom Style Providers +---------------------- + +For advanced styling needs, you can create a custom style provider: + +.. code-block:: python + + from datafusion.html_formatter import StyleProvider, configure_formatter + + class MyStyleProvider(StyleProvider): + def get_table_styles(self): + return { + "table": "border-collapse: collapse; width: 100%;", + "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;", + "td": "border: 1px solid #ddd; padding: 8px;", + "tr:nth-child(even)": "background-color: #f2f2f2;", + } + + def get_value_styles(self, dtype, value): + """Return custom styles for specific values""" + if dtype == "float" and value < 0: + return "color: red;" + return None + + # Apply the custom style provider + configure_formatter(style_provider=MyStyleProvider()) + +Creating a Custom Formatter +--------------------------- + +For complete control over rendering, you can implement a custom formatter: + +.. code-block:: python + + from datafusion.html_formatter import Formatter, get_formatter + + class MyFormatter(Formatter): + def format_html(self, batches, schema, has_more=False, table_uuid=None): + # Create your custom HTML here + html = "
" + # ... formatting logic ... + html += "
" + return html + + # Set as the global formatter + configure_formatter(formatter_class=MyFormatter) + + # Or use the formatter just for specific operations + formatter = get_formatter() + custom_html = formatter.format_html(batches, schema) + +Managing Formatters +------------------- + +Reset to default formatting: + +.. code-block:: python + + from datafusion.html_formatter import reset_formatter + + # Reset to default settings + reset_formatter() + +Get the current formatter settings: + +.. code-block:: python + + from datafusion.html_formatter import get_formatter + + formatter = get_formatter() + print(formatter.max_rows) + print(formatter.theme) + +Contextual Formatting +--------------------- + +You can also use a context manager to temporarily change formatting settings: + +.. code-block:: python + + from datafusion.html_formatter import formatting_context + + # Default formatting + df.show() + + # Temporarily use different formatting + with formatting_context(max_rows=100, theme="dark"): + df.show() # Will use the temporary settings + + # Back to default formatting + df.show() From 00dea113eb85d54b758eb3451ea448c7b9263c1c Mon Sep 17 00:00:00 2001 From: deanm0000 <37878412+deanm0000@users.noreply.github.com> Date: Sun, 27 Apr 2025 10:14:54 -0400 Subject: [PATCH 036/206] Improve col class access using __getattr__ Co-authored-by: Tim Saucer --- python/datafusion/__init__.py | 12 ++-------- python/datafusion/col.py | 45 +++++++++++++++++++++++++++++++++++ python/tests/test_expr.py | 23 ++++++++++++++++++ 3 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 python/datafusion/col.py diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 60d0d61b4..15ceefbdb 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,6 +26,8 @@ except ImportError: import importlib_metadata +from datafusion.col import col, column + from . import functions, object_store, substrait, unparser # The following imports are okay to remain as opaque to the user. @@ -95,16 +97,6 @@ ] -def column(value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - -def col(value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - def literal(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/col.py b/python/datafusion/col.py new file mode 100644 index 000000000..1141dc092 --- /dev/null +++ b/python/datafusion/col.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Col class.""" + +from datafusion.expr import Expr + + +class Col: + """Create a column expression. + + This helper class allows an extra syntax of creating columns using the __getattr__ + method. + """ + + def __call__(self, value: str) -> Expr: + """Create a column expression.""" + return Expr.column(value) + + def __getattr__(self, value: str) -> Expr: + """Create a column using attribute syntax.""" + # For autocomplete to work with IPython + if value.startswith("__wrapped__"): + return getattr(type(self), value) + + return Expr.column(value) + + +col: Col = Col() +column: Col = Col() +__all__ = ["col", "column"] diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index dcf75f021..3651b60d6 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -249,6 +249,29 @@ def test_fill_null(df): assert result.column(2) == pa.array([1234, 1234, 8]) +def test_col_getattr(): + ctx = SessionContext() + data = { + "array_values": [[1, 2, 3], [4, 5], [6], []], + "struct_values": [ + {"name": "Alice", "age": 15}, + {"name": "Bob", "age": 14}, + {"name": "Charlie", "age": 13}, + {"name": None, "age": 12}, + ], + } + df = ctx.from_pydict(data, name="table1") + + names = df.select(col.struct_values["name"].alias("name")).collect() + names = [r.as_py() for rs in names for r in rs["name"]] + + array_values = df.select(col.array_values[1].alias("value")).collect() + array_values = [r.as_py() for rs in array_values for r in rs["value"]] + + assert names == ["Alice", "Bob", "Charlie", None] + assert array_values == [2, 5, None, None] + + def test_alias_with_metadata(df): df = df.select(col("a").alias("b", {"key": "value"})) assert df.schema().field("b").metadata == {b"key": b"value"} From 5a7f638286d2397bbce87e0e8197bebb46f26649 Mon Sep 17 00:00:00 2001 From: deanm0000 <37878412+deanm0000@users.noreply.github.com> Date: Sun, 27 Apr 2025 10:17:41 -0400 Subject: [PATCH 037/206] Add expression chaining of single parameter scalar functions --- python/datafusion/expr.py | 289 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 289 insertions(+) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 01e1f3ded..84e9d4ebb 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -24,6 +24,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Optional +import functions as F import pyarrow as pa try: @@ -611,6 +612,294 @@ def over(self, window: Window) -> Expr: ) ) + def asin(self) -> Expr: + """Returns the arc sine or inverse sine of a number.""" + return F.asin(self) + + def array_pop_back(self) -> Expr: + """Returns the array without the last element.""" + return F.array_pop_back(self) + + def reverse(self) -> Expr: + """Reverse the string argument.""" + return F.reverse(self) + + def bit_length(self) -> Expr: + """Returns the number of bits in the string argument.""" + return F.bit_length(self) + + def array_length(self) -> Expr: + """Returns the length of the array.""" + return F.array_length(self) + + def array_ndims(self) -> Expr: + """Returns the number of dimensions of the array.""" + return F.array_ndims(self) + + def to_hex(self) -> Expr: + """Converts an integer to a hexadecimal string.""" + return F.to_hex(self) + + def array_dims(self) -> Expr: + """Returns an array of the array's dimensions.""" + return F.array_dims(self) + + def from_unixtime(self) -> Expr: + """Converts an integer to RFC3339 timestamp format string.""" + return F.from_unixtime(self) + + def array_empty(self) -> Expr: + """Returns a boolean indicating whether the array is empty.""" + return F.array_empty(self) + + def sin(self) -> Expr: + """Returns the sine of the argument.""" + return F.sin(self) + + def log10(self) -> Expr: + """Base 10 logarithm of the argument.""" + return F.log10(self) + + def initcap(self) -> Expr: + """Set the initial letter of each word to capital. + + Converts the first letter of each word in ``string`` to uppercase and the remaining + characters to lowercase. + """ + return F.initcap(self) + + def list_distinct(self) -> Expr: + """Returns distinct values from the array after removing duplicates. + + This is an alias for :py:func:`array_distinct`. + """ + return F.list_distinct(self) + + def iszero(self) -> Expr: + """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" + return F.iszero(self) + + def array_distinct(self) -> Expr: + """Returns distinct values from the array after removing duplicates.""" + return F.array_distinct(self) + + def arrow_typeof(self) -> Expr: + """Returns the Arrow type of the expression.""" + return F.arrow_typeof(self) + + def length(self) -> Expr: + """The number of characters in the ``string``.""" + return F.length(self) + + def lower(self) -> Expr: + """Converts a string to lowercase.""" + return F.lower(self) + + def acos(self) -> Expr: + """Returns the arc cosine or inverse cosine of a number. + + Returns: + -------- + Expr + A new expression representing the arc cosine of the input expression. + """ + return F.acos(self) + + def ascii(self) -> Expr: + """Returns the numeric code of the first character of the argument.""" + return F.ascii(self) + + def sha384(self) -> Expr: + """Computes the SHA-384 hash of a binary string.""" + return F.sha384(self) + + def isnan(self) -> Expr: + """Returns true if a given number is +NaN or -NaN otherwise returns false.""" + return F.isnan(self) + + def degrees(self) -> Expr: + """Converts the argument from radians to degrees.""" + return F.degrees(self) + + def cardinality(self) -> Expr: + """Returns the total number of elements in the array.""" + return F.cardinality(self) + + def sha224(self) -> Expr: + """Computes the SHA-224 hash of a binary string.""" + return F.sha224(self) + + def asinh(self) -> Expr: + """Returns inverse hyperbolic sine.""" + return F.asinh(self) + + def flatten(self) -> Expr: + """Flattens an array of arrays into a single array.""" + return F.flatten(self) + + def exp(self) -> Expr: + """Returns the exponential of the argument.""" + return F.exp(self) + + def abs(self) -> Expr: + """Return the absolute value of a given number. + + Returns: + -------- + Expr + A new expression representing the absolute value of the input expression. + """ + return F.abs(self) + + def btrim(self) -> Expr: + """Removes all characters, spaces by default, from both sides of a string.""" + return F.btrim(self) + + def md5(self) -> Expr: + """Computes an MD5 128-bit checksum for a string expression.""" + return F.md5(self) + + def octet_length(self) -> Expr: + """Returns the number of bytes of a string.""" + return F.octet_length(self) + + def cosh(self) -> Expr: + """Returns the hyperbolic cosine of the argument.""" + return F.cosh(self) + + def radians(self) -> Expr: + """Converts the argument from degrees to radians.""" + return F.radians(self) + + def sqrt(self) -> Expr: + """Returns the square root of the argument.""" + return F.sqrt(self) + + def character_length(self) -> Expr: + """Returns the number of characters in the argument.""" + return F.character_length(self) + + def tanh(self) -> Expr: + """Returns the hyperbolic tangent of the argument.""" + return F.tanh(self) + + def atan(self) -> Expr: + """Returns inverse tangent of a number.""" + return F.atan(self) + + def rtrim(self) -> Expr: + """Removes all characters, spaces by default, from the end of a string.""" + return F.rtrim(self) + + def atanh(self) -> Expr: + """Returns inverse hyperbolic tangent.""" + return F.atanh(self) + + def list_dims(self) -> Expr: + """Returns an array of the array's dimensions. + + This is an alias for :py:func:`array_dims`. + """ + return F.list_dims(self) + + def sha256(self) -> Expr: + """Computes the SHA-256 hash of a binary string.""" + return F.sha256(self) + + def factorial(self) -> Expr: + """Returns the factorial of the argument.""" + return F.factorial(self) + + def acosh(self) -> Expr: + """Returns inverse hyperbolic cosine.""" + return F.acosh(self) + + def floor(self) -> Expr: + """Returns the nearest integer less than or equal to the argument.""" + return F.floor(self) + + def ceil(self) -> Expr: + """Returns the nearest integer greater than or equal to argument.""" + return F.ceil(self) + + def list_length(self) -> Expr: + """Returns the length of the array. + + This is an alias for :py:func:`array_length`. + """ + return F.list_length(self) + + def upper(self) -> Expr: + """Converts a string to uppercase.""" + return F.upper(self) + + def chr(self) -> Expr: + """Converts the Unicode code point to a UTF8 character.""" + return F.chr(self) + + def ln(self) -> Expr: + """Returns the natural logarithm (base e) of the argument.""" + return F.ln(self) + + def tan(self) -> Expr: + """Returns the tangent of the argument.""" + return F.tan(self) + + def array_pop_front(self) -> Expr: + """Returns the array without the first element.""" + return F.array_pop_front(self) + + def cbrt(self) -> Expr: + """Returns the cube root of a number.""" + return F.cbrt(self) + + def sha512(self) -> Expr: + """Computes the SHA-512 hash of a binary string.""" + return F.sha512(self) + + def char_length(self) -> Expr: + """The number of characters in the ``string``.""" + return F.char_length(self) + + def list_ndims(self) -> Expr: + """Returns the number of dimensions of the array. + + This is an alias for :py:func:`array_ndims`. + """ + return F.list_ndims(self) + + def trim(self) -> Expr: + """Removes all characters, spaces by default, from both sides of a string.""" + return F.trim(self) + + def cos(self) -> Expr: + """Returns the cosine of the argument.""" + return F.cos(self) + + def sinh(self) -> Expr: + """Returns the hyperbolic sine of the argument.""" + return F.sinh(self) + + def empty(self) -> Expr: + """This is an alias for :py:func:`array_empty`.""" + return F.empty(self) + + def ltrim(self) -> Expr: + """Removes all characters, spaces by default, from the beginning of a string.""" + return F.ltrim(self) + + def signum(self) -> Expr: + """Returns the sign of the argument (-1, 0, +1).""" + return F.signum(self) + + def log2(self) -> Expr: + """Base 2 logarithm of the argument.""" + return F.log2(self) + + def cot(self) -> Expr: + """Returns the cotangent of the argument.""" + return F.cot(self) + class ExprFuncBuilder: def __init__(self, builder: expr_internal.ExprFuncBuilder) -> None: From 10600fb8fc32eba43b0b0f198325b55c63f8223d Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Mon, 28 Apr 2025 21:25:59 +0800 Subject: [PATCH 038/206] fix: recursive import (#1117) * fix: recursive import * format * format --- python/datafusion/expr.py | 135 +++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 3 deletions(-) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 84e9d4ebb..3750eeb3f 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -24,7 +24,6 @@ from typing import TYPE_CHECKING, Any, ClassVar, Optional -import functions as F import pyarrow as pa try: @@ -614,58 +613,84 @@ def over(self, window: Window) -> Expr: def asin(self) -> Expr: """Returns the arc sine or inverse sine of a number.""" + from . import functions as F + return F.asin(self) def array_pop_back(self) -> Expr: """Returns the array without the last element.""" + from . import functions as F + return F.array_pop_back(self) def reverse(self) -> Expr: """Reverse the string argument.""" + from . import functions as F + return F.reverse(self) def bit_length(self) -> Expr: """Returns the number of bits in the string argument.""" + from . import functions as F + return F.bit_length(self) def array_length(self) -> Expr: """Returns the length of the array.""" + from . import functions as F + return F.array_length(self) def array_ndims(self) -> Expr: """Returns the number of dimensions of the array.""" + from . import functions as F + return F.array_ndims(self) def to_hex(self) -> Expr: """Converts an integer to a hexadecimal string.""" + from . import functions as F + return F.to_hex(self) def array_dims(self) -> Expr: """Returns an array of the array's dimensions.""" + from . import functions as F + return F.array_dims(self) def from_unixtime(self) -> Expr: """Converts an integer to RFC3339 timestamp format string.""" + from . import functions as F + return F.from_unixtime(self) def array_empty(self) -> Expr: """Returns a boolean indicating whether the array is empty.""" + from . import functions as F + return F.array_empty(self) def sin(self) -> Expr: """Returns the sine of the argument.""" + from . import functions as F + return F.sin(self) def log10(self) -> Expr: """Base 10 logarithm of the argument.""" + from . import functions as F + return F.log10(self) def initcap(self) -> Expr: """Set the initial letter of each word to capital. - Converts the first letter of each word in ``string`` to uppercase and the remaining - characters to lowercase. + Converts the first letter of each word in ``string`` + to uppercase and the remaining characters to lowercase. """ + from . import functions as F + return F.initcap(self) def list_distinct(self) -> Expr: @@ -673,26 +698,38 @@ def list_distinct(self) -> Expr: This is an alias for :py:func:`array_distinct`. """ + from . import functions as F + return F.list_distinct(self) def iszero(self) -> Expr: """Returns true if a given number is +0.0 or -0.0 otherwise returns false.""" + from . import functions as F + return F.iszero(self) def array_distinct(self) -> Expr: """Returns distinct values from the array after removing duplicates.""" + from . import functions as F + return F.array_distinct(self) def arrow_typeof(self) -> Expr: """Returns the Arrow type of the expression.""" + from . import functions as F + return F.arrow_typeof(self) def length(self) -> Expr: """The number of characters in the ``string``.""" + from . import functions as F + return F.length(self) def lower(self) -> Expr: """Converts a string to lowercase.""" + from . import functions as F + return F.lower(self) def acos(self) -> Expr: @@ -703,42 +740,62 @@ def acos(self) -> Expr: Expr A new expression representing the arc cosine of the input expression. """ + from . import functions as F + return F.acos(self) def ascii(self) -> Expr: """Returns the numeric code of the first character of the argument.""" + from . import functions as F + return F.ascii(self) def sha384(self) -> Expr: """Computes the SHA-384 hash of a binary string.""" + from . import functions as F + return F.sha384(self) def isnan(self) -> Expr: """Returns true if a given number is +NaN or -NaN otherwise returns false.""" + from . import functions as F + return F.isnan(self) def degrees(self) -> Expr: """Converts the argument from radians to degrees.""" + from . import functions as F + return F.degrees(self) def cardinality(self) -> Expr: """Returns the total number of elements in the array.""" + from . import functions as F + return F.cardinality(self) def sha224(self) -> Expr: """Computes the SHA-224 hash of a binary string.""" + from . import functions as F + return F.sha224(self) def asinh(self) -> Expr: """Returns inverse hyperbolic sine.""" + from . import functions as F + return F.asinh(self) def flatten(self) -> Expr: """Flattens an array of arrays into a single array.""" + from . import functions as F + return F.flatten(self) def exp(self) -> Expr: """Returns the exponential of the argument.""" + from . import functions as F + return F.exp(self) def abs(self) -> Expr: @@ -749,50 +806,74 @@ def abs(self) -> Expr: Expr A new expression representing the absolute value of the input expression. """ + from . import functions as F + return F.abs(self) def btrim(self) -> Expr: """Removes all characters, spaces by default, from both sides of a string.""" + from . import functions as F + return F.btrim(self) def md5(self) -> Expr: """Computes an MD5 128-bit checksum for a string expression.""" + from . import functions as F + return F.md5(self) def octet_length(self) -> Expr: """Returns the number of bytes of a string.""" + from . import functions as F + return F.octet_length(self) def cosh(self) -> Expr: """Returns the hyperbolic cosine of the argument.""" + from . import functions as F + return F.cosh(self) def radians(self) -> Expr: """Converts the argument from degrees to radians.""" + from . import functions as F + return F.radians(self) def sqrt(self) -> Expr: """Returns the square root of the argument.""" + from . import functions as F + return F.sqrt(self) def character_length(self) -> Expr: """Returns the number of characters in the argument.""" + from . import functions as F + return F.character_length(self) def tanh(self) -> Expr: """Returns the hyperbolic tangent of the argument.""" + from . import functions as F + return F.tanh(self) def atan(self) -> Expr: """Returns inverse tangent of a number.""" + from . import functions as F + return F.atan(self) def rtrim(self) -> Expr: """Removes all characters, spaces by default, from the end of a string.""" + from . import functions as F + return F.rtrim(self) def atanh(self) -> Expr: """Returns inverse hyperbolic tangent.""" + from . import functions as F + return F.atanh(self) def list_dims(self) -> Expr: @@ -800,26 +881,38 @@ def list_dims(self) -> Expr: This is an alias for :py:func:`array_dims`. """ + from . import functions as F + return F.list_dims(self) def sha256(self) -> Expr: """Computes the SHA-256 hash of a binary string.""" + from . import functions as F + return F.sha256(self) def factorial(self) -> Expr: """Returns the factorial of the argument.""" + from . import functions as F + return F.factorial(self) def acosh(self) -> Expr: """Returns inverse hyperbolic cosine.""" + from . import functions as F + return F.acosh(self) def floor(self) -> Expr: """Returns the nearest integer less than or equal to the argument.""" + from . import functions as F + return F.floor(self) def ceil(self) -> Expr: """Returns the nearest integer greater than or equal to argument.""" + from . import functions as F + return F.ceil(self) def list_length(self) -> Expr: @@ -827,38 +920,56 @@ def list_length(self) -> Expr: This is an alias for :py:func:`array_length`. """ + from . import functions as F + return F.list_length(self) def upper(self) -> Expr: """Converts a string to uppercase.""" + from . import functions as F + return F.upper(self) def chr(self) -> Expr: """Converts the Unicode code point to a UTF8 character.""" + from . import functions as F + return F.chr(self) def ln(self) -> Expr: """Returns the natural logarithm (base e) of the argument.""" + from . import functions as F + return F.ln(self) def tan(self) -> Expr: """Returns the tangent of the argument.""" + from . import functions as F + return F.tan(self) def array_pop_front(self) -> Expr: """Returns the array without the first element.""" + from . import functions as F + return F.array_pop_front(self) def cbrt(self) -> Expr: """Returns the cube root of a number.""" + from . import functions as F + return F.cbrt(self) def sha512(self) -> Expr: """Computes the SHA-512 hash of a binary string.""" + from . import functions as F + return F.sha512(self) def char_length(self) -> Expr: """The number of characters in the ``string``.""" + from . import functions as F + return F.char_length(self) def list_ndims(self) -> Expr: @@ -866,38 +977,56 @@ def list_ndims(self) -> Expr: This is an alias for :py:func:`array_ndims`. """ + from . import functions as F + return F.list_ndims(self) def trim(self) -> Expr: """Removes all characters, spaces by default, from both sides of a string.""" + from . import functions as F + return F.trim(self) def cos(self) -> Expr: """Returns the cosine of the argument.""" + from . import functions as F + return F.cos(self) def sinh(self) -> Expr: """Returns the hyperbolic sine of the argument.""" + from . import functions as F + return F.sinh(self) def empty(self) -> Expr: """This is an alias for :py:func:`array_empty`.""" + from . import functions as F + return F.empty(self) def ltrim(self) -> Expr: """Removes all characters, spaces by default, from the beginning of a string.""" + from . import functions as F + return F.ltrim(self) def signum(self) -> Expr: """Returns the sign of the argument (-1, 0, +1).""" + from . import functions as F + return F.signum(self) def log2(self) -> Expr: """Base 2 logarithm of the argument.""" + from . import functions as F + return F.log2(self) def cot(self) -> Expr: """Returns the cotangent of the argument.""" + from . import functions as F + return F.cot(self) From 6fbeceff6091aee610273d9b27106483f9ce24ea Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 1 May 2025 12:10:40 -0400 Subject: [PATCH 039/206] Copy over protected branch rule from datafusion repo (#1122) --- .asf.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.asf.yaml b/.asf.yaml index e96b43cf0..75b2262de 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -29,6 +29,10 @@ github: rebase: false features: issues: true + protected_branches: + main: + required_pull_request_reviews: + required_approving_review_count: 1 staging: whoami: asf-staging From 15b96c48eb76ad8ea19022df427aa25b06c3012b Mon Sep 17 00:00:00 2001 From: Chen Chongchen Date: Mon, 5 May 2025 21:43:03 +0800 Subject: [PATCH 040/206] feat: add missing PyLogicalPlan to_variant (#1085) * add expr * format * clippy * add license * update * ruff * Update expr.py * add test * ruff * Minor ruff whitespace change * Minor format change --------- Co-authored-by: Tim Saucer --- python/datafusion/common.py | 6 + python/datafusion/expr.py | 54 +++- python/tests/test_expr.py | 86 ++++++ src/common.rs | 3 + src/common/schema.rs | 89 ++++++ src/expr.rs | 41 +++ src/expr/copy_to.rs | 138 +++++++++ src/expr/create_catalog.rs | 100 +++++++ src/expr/create_catalog_schema.rs | 100 +++++++ src/expr/create_external_table.rs | 183 ++++++++++++ src/expr/create_function.rs | 182 ++++++++++++ src/expr/create_index.rs | 129 +++++++++ src/expr/describe_table.rs | 92 ++++++ src/expr/dml.rs | 136 +++++++++ src/expr/drop_catalog_schema.rs | 116 ++++++++ src/expr/drop_function.rs | 95 +++++++ src/expr/drop_view.rs | 102 +++++++ src/expr/recursive_query.rs | 111 ++++++++ src/expr/statement.rs | 454 ++++++++++++++++++++++++++++++ src/expr/values.rs | 86 ++++++ src/sql/logical.rs | 85 +++++- 21 files changed, 2372 insertions(+), 16 deletions(-) create mode 100644 src/expr/copy_to.rs create mode 100644 src/expr/create_catalog.rs create mode 100644 src/expr/create_catalog_schema.rs create mode 100644 src/expr/create_external_table.rs create mode 100644 src/expr/create_function.rs create mode 100644 src/expr/create_index.rs create mode 100644 src/expr/describe_table.rs create mode 100644 src/expr/dml.rs create mode 100644 src/expr/drop_catalog_schema.rs create mode 100644 src/expr/drop_function.rs create mode 100644 src/expr/drop_view.rs create mode 100644 src/expr/recursive_query.rs create mode 100644 src/expr/statement.rs create mode 100644 src/expr/values.rs diff --git a/python/datafusion/common.py b/python/datafusion/common.py index e762a993b..c689a816d 100644 --- a/python/datafusion/common.py +++ b/python/datafusion/common.py @@ -33,8 +33,12 @@ SqlTable = common_internal.SqlTable SqlType = common_internal.SqlType SqlView = common_internal.SqlView +TableType = common_internal.TableType +TableSource = common_internal.TableSource +Constraints = common_internal.Constraints __all__ = [ + "Constraints", "DFSchema", "DataType", "DataTypeMap", @@ -47,6 +51,8 @@ "SqlTable", "SqlType", "SqlView", + "TableSource", + "TableType", ] diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 3750eeb3f..9e58873d0 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -54,14 +54,29 @@ Case = expr_internal.Case Cast = expr_internal.Cast Column = expr_internal.Column +CopyTo = expr_internal.CopyTo +CreateCatalog = expr_internal.CreateCatalog +CreateCatalogSchema = expr_internal.CreateCatalogSchema +CreateExternalTable = expr_internal.CreateExternalTable +CreateFunction = expr_internal.CreateFunction +CreateFunctionBody = expr_internal.CreateFunctionBody +CreateIndex = expr_internal.CreateIndex CreateMemoryTable = expr_internal.CreateMemoryTable CreateView = expr_internal.CreateView +Deallocate = expr_internal.Deallocate +DescribeTable = expr_internal.DescribeTable Distinct = expr_internal.Distinct +DmlStatement = expr_internal.DmlStatement +DropCatalogSchema = expr_internal.DropCatalogSchema +DropFunction = expr_internal.DropFunction DropTable = expr_internal.DropTable +DropView = expr_internal.DropView EmptyRelation = expr_internal.EmptyRelation +Execute = expr_internal.Execute Exists = expr_internal.Exists Explain = expr_internal.Explain Extension = expr_internal.Extension +FileType = expr_internal.FileType Filter = expr_internal.Filter GroupingSet = expr_internal.GroupingSet Join = expr_internal.Join @@ -83,21 +98,31 @@ Literal = expr_internal.Literal Negative = expr_internal.Negative Not = expr_internal.Not +OperateFunctionArg = expr_internal.OperateFunctionArg Partitioning = expr_internal.Partitioning Placeholder = expr_internal.Placeholder +Prepare = expr_internal.Prepare Projection = expr_internal.Projection +RecursiveQuery = expr_internal.RecursiveQuery Repartition = expr_internal.Repartition ScalarSubquery = expr_internal.ScalarSubquery ScalarVariable = expr_internal.ScalarVariable +SetVariable = expr_internal.SetVariable SimilarTo = expr_internal.SimilarTo Sort = expr_internal.Sort Subquery = expr_internal.Subquery SubqueryAlias = expr_internal.SubqueryAlias TableScan = expr_internal.TableScan +TransactionAccessMode = expr_internal.TransactionAccessMode +TransactionConclusion = expr_internal.TransactionConclusion +TransactionEnd = expr_internal.TransactionEnd +TransactionIsolationLevel = expr_internal.TransactionIsolationLevel +TransactionStart = expr_internal.TransactionStart TryCast = expr_internal.TryCast Union = expr_internal.Union Unnest = expr_internal.Unnest UnnestExpr = expr_internal.UnnestExpr +Values = expr_internal.Values WindowExpr = expr_internal.WindowExpr __all__ = [ @@ -111,15 +136,30 @@ "CaseBuilder", "Cast", "Column", + "CopyTo", + "CreateCatalog", + "CreateCatalogSchema", + "CreateExternalTable", + "CreateFunction", + "CreateFunctionBody", + "CreateIndex", "CreateMemoryTable", "CreateView", + "Deallocate", + "DescribeTable", "Distinct", + "DmlStatement", + "DropCatalogSchema", + "DropFunction", "DropTable", + "DropView", "EmptyRelation", + "Execute", "Exists", "Explain", "Expr", "Extension", + "FileType", "Filter", "GroupingSet", "ILike", @@ -142,22 +182,32 @@ "Literal", "Negative", "Not", + "OperateFunctionArg", "Partitioning", "Placeholder", + "Prepare", "Projection", + "RecursiveQuery", "Repartition", "ScalarSubquery", "ScalarVariable", + "SetVariable", "SimilarTo", "Sort", "SortExpr", "Subquery", "SubqueryAlias", "TableScan", + "TransactionAccessMode", + "TransactionConclusion", + "TransactionEnd", + "TransactionIsolationLevel", + "TransactionStart", "TryCast", "Union", "Unnest", "UnnestExpr", + "Values", "Window", "WindowExpr", "WindowFrame", @@ -686,8 +736,8 @@ def log10(self) -> Expr: def initcap(self) -> Expr: """Set the initial letter of each word to capital. - Converts the first letter of each word in ``string`` - to uppercase and the remaining characters to lowercase. + Converts the first letter of each word in ``string`` to uppercase and the + remaining characters to lowercase. """ from . import functions as F diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index 3651b60d6..58a202724 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -23,12 +23,21 @@ AggregateFunction, BinaryExpr, Column, + CopyTo, + CreateIndex, + DescribeTable, + DmlStatement, + DropCatalogSchema, Filter, Limit, Literal, Projection, + RecursiveQuery, Sort, TableScan, + TransactionEnd, + TransactionStart, + Values, ) @@ -249,6 +258,83 @@ def test_fill_null(df): assert result.column(2) == pa.array([1234, 1234, 8]) +def test_copy_to(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + df = ctx.sql("COPY foo TO bar STORED AS CSV") + plan = df.logical_plan() + plan = plan.to_variant() + assert isinstance(plan, CopyTo) + + +def test_create_index(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + plan = ctx.sql("create index idx on foo (a)").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, CreateIndex) + + +def test_describe_table(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + plan = ctx.sql("describe foo").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, DescribeTable) + + +def test_dml_statement(): + ctx = SessionContext() + ctx.sql("CREATE TABLE foo (a int, b int)").collect() + plan = ctx.sql("insert into foo values (1, 2)").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, DmlStatement) + + +def drop_catalog_schema(): + ctx = SessionContext() + plan = ctx.sql("drop schema cat").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, DropCatalogSchema) + + +def test_recursive_query(): + ctx = SessionContext() + plan = ctx.sql( + """ + WITH RECURSIVE cte AS ( + SELECT 1 as n + UNION ALL + SELECT n + 1 FROM cte WHERE n < 5 + ) + SELECT * FROM cte; + """ + ).logical_plan() + plan = plan.inputs()[0].inputs()[0].to_variant() + assert isinstance(plan, RecursiveQuery) + + +def test_values(): + ctx = SessionContext() + plan = ctx.sql("values (1, 'foo'), (2, 'bar')").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, Values) + + +def test_transaction_start(): + ctx = SessionContext() + plan = ctx.sql("START TRANSACTION").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, TransactionStart) + + +def test_transaction_end(): + ctx = SessionContext() + plan = ctx.sql("COMMIT").logical_plan() + plan = plan.to_variant() + assert isinstance(plan, TransactionEnd) + + def test_col_getattr(): ctx = SessionContext() data = { diff --git a/src/common.rs b/src/common.rs index 453bf67a4..88d2fdd5f 100644 --- a/src/common.rs +++ b/src/common.rs @@ -36,5 +36,8 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/common/schema.rs b/src/common/schema.rs index 66ce925ae..5a54fe333 100644 --- a/src/common/schema.rs +++ b/src/common/schema.rs @@ -15,14 +15,22 @@ // specific language governing permissions and limitations // under the License. +use std::fmt::{self, Display, Formatter}; +use std::sync::Arc; use std::{any::Any, borrow::Cow}; +use arrow::datatypes::Schema; +use arrow::pyarrow::PyArrowType; use datafusion::arrow::datatypes::SchemaRef; +use datafusion::common::Constraints; +use datafusion::datasource::TableType; use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableSource}; use pyo3::prelude::*; use datafusion::logical_expr::utils::split_conjunction; +use crate::sql::logical::PyLogicalPlan; + use super::{data_type::DataTypeMap, function::SqlFunction}; #[pyclass(name = "SqlSchema", module = "datafusion.common", subclass)] @@ -218,3 +226,84 @@ impl SqlStatistics { self.row_count } } + +#[pyclass(name = "Constraints", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyConstraints { + pub constraints: Constraints, +} + +impl From for Constraints { + fn from(constraints: PyConstraints) -> Self { + constraints.constraints + } +} + +impl From for PyConstraints { + fn from(constraints: Constraints) -> Self { + PyConstraints { constraints } + } +} + +impl Display for PyConstraints { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "Constraints: {:?}", self.constraints) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "TableType", module = "datafusion.common")] +pub enum PyTableType { + Base, + View, + Temporary, +} + +impl From for datafusion::logical_expr::TableType { + fn from(table_type: PyTableType) -> Self { + match table_type { + PyTableType::Base => datafusion::logical_expr::TableType::Base, + PyTableType::View => datafusion::logical_expr::TableType::View, + PyTableType::Temporary => datafusion::logical_expr::TableType::Temporary, + } + } +} + +impl From for PyTableType { + fn from(table_type: TableType) -> Self { + match table_type { + datafusion::logical_expr::TableType::Base => PyTableType::Base, + datafusion::logical_expr::TableType::View => PyTableType::View, + datafusion::logical_expr::TableType::Temporary => PyTableType::Temporary, + } + } +} + +#[pyclass(name = "TableSource", module = "datafusion.common", subclass)] +#[derive(Clone)] +pub struct PyTableSource { + pub table_source: Arc, +} + +#[pymethods] +impl PyTableSource { + pub fn schema(&self) -> PyArrowType { + (*self.table_source.schema()).clone().into() + } + + pub fn constraints(&self) -> Option { + self.table_source.constraints().map(|c| PyConstraints { + constraints: c.clone(), + }) + } + + pub fn table_type(&self) -> PyTableType { + self.table_source.table_type().into() + } + + pub fn get_logical_plan(&self) -> Option { + self.table_source + .get_logical_plan() + .map(|plan| PyLogicalPlan::new(plan.into_owned())) + } +} diff --git a/src/expr.rs b/src/expr.rs index 7d4aa8798..404e575f8 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -67,10 +67,21 @@ pub mod case; pub mod cast; pub mod column; pub mod conditional_expr; +pub mod copy_to; +pub mod create_catalog; +pub mod create_catalog_schema; +pub mod create_external_table; +pub mod create_function; +pub mod create_index; pub mod create_memory_table; pub mod create_view; +pub mod describe_table; pub mod distinct; +pub mod dml; +pub mod drop_catalog_schema; +pub mod drop_function; pub mod drop_table; +pub mod drop_view; pub mod empty_relation; pub mod exists; pub mod explain; @@ -86,18 +97,21 @@ pub mod literal; pub mod logical_node; pub mod placeholder; pub mod projection; +pub mod recursive_query; pub mod repartition; pub mod scalar_subquery; pub mod scalar_variable; pub mod signature; pub mod sort; pub mod sort_expr; +pub mod statement; pub mod subquery; pub mod subquery_alias; pub mod table_scan; pub mod union; pub mod unnest; pub mod unnest_expr; +pub mod values; pub mod window; use sort_expr::{to_sort_expressions, PySortExpr}; @@ -802,5 +816,32 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) } diff --git a/src/expr/copy_to.rs b/src/expr/copy_to.rs new file mode 100644 index 000000000..ebfcb8ebc --- /dev/null +++ b/src/expr/copy_to.rs @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + collections::HashMap, + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::{common::file_options::file_type::FileType, logical_expr::dml::CopyTo}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CopyTo", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCopyTo { + copy: CopyTo, +} + +impl From for CopyTo { + fn from(copy: PyCopyTo) -> Self { + copy.copy + } +} + +impl From for PyCopyTo { + fn from(copy: CopyTo) -> PyCopyTo { + PyCopyTo { copy } + } +} + +impl Display for PyCopyTo { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CopyTo: {:?}", self.copy.output_url) + } +} + +impl LogicalNode for PyCopyTo { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.copy.input).clone())] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyCopyTo { + #[new] + pub fn new( + input: PyLogicalPlan, + output_url: String, + partition_by: Vec, + file_type: PyFileType, + options: HashMap, + ) -> Self { + PyCopyTo { + copy: CopyTo { + input: input.plan(), + output_url, + partition_by, + file_type: file_type.file_type, + options, + }, + } + } + + fn input(&self) -> PyLogicalPlan { + PyLogicalPlan::from((*self.copy.input).clone()) + } + + fn output_url(&self) -> String { + self.copy.output_url.clone() + } + + fn partition_by(&self) -> Vec { + self.copy.partition_by.clone() + } + + fn file_type(&self) -> PyFileType { + PyFileType { + file_type: self.copy.file_type.clone(), + } + } + + fn options(&self) -> HashMap { + self.copy.options.clone() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CopyTo({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CopyTo".to_string()) + } +} + +#[pyclass(name = "FileType", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyFileType { + file_type: Arc, +} + +impl Display for PyFileType { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "FileType: {}", self.file_type) + } +} + +#[pymethods] +impl PyFileType { + fn __repr__(&self) -> PyResult { + Ok(format!("FileType({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("FileType".to_string()) + } +} diff --git a/src/expr/create_catalog.rs b/src/expr/create_catalog.rs new file mode 100644 index 000000000..f4ea0f517 --- /dev/null +++ b/src/expr/create_catalog.rs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateCatalog; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CreateCatalog", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateCatalog { + create: CreateCatalog, +} + +impl From for CreateCatalog { + fn from(create: PyCreateCatalog) -> Self { + create.create + } +} + +impl From for PyCreateCatalog { + fn from(create: CreateCatalog) -> PyCreateCatalog { + PyCreateCatalog { create } + } +} + +impl Display for PyCreateCatalog { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateCatalog: {:?}", self.create.catalog_name) + } +} + +#[pymethods] +impl PyCreateCatalog { + #[new] + pub fn new( + catalog_name: String, + if_not_exists: bool, + schema: PyDFSchema, + ) -> PyResult { + Ok(PyCreateCatalog { + create: CreateCatalog { + catalog_name, + if_not_exists, + schema: Arc::new(schema.into()), + }, + }) + } + + pub fn catalog_name(&self) -> String { + self.create.catalog_name.clone() + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateCatalog({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateCatalog".to_string()) + } +} + +impl LogicalNode for PyCreateCatalog { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_catalog_schema.rs b/src/expr/create_catalog_schema.rs new file mode 100644 index 000000000..85f447e1e --- /dev/null +++ b/src/expr/create_catalog_schema.rs @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateCatalogSchema; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "CreateCatalogSchema", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateCatalogSchema { + create: CreateCatalogSchema, +} + +impl From for CreateCatalogSchema { + fn from(create: PyCreateCatalogSchema) -> Self { + create.create + } +} + +impl From for PyCreateCatalogSchema { + fn from(create: CreateCatalogSchema) -> PyCreateCatalogSchema { + PyCreateCatalogSchema { create } + } +} + +impl Display for PyCreateCatalogSchema { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateCatalogSchema: {:?}", self.create.schema_name) + } +} + +#[pymethods] +impl PyCreateCatalogSchema { + #[new] + pub fn new( + schema_name: String, + if_not_exists: bool, + schema: PyDFSchema, + ) -> PyResult { + Ok(PyCreateCatalogSchema { + create: CreateCatalogSchema { + schema_name, + if_not_exists, + schema: Arc::new(schema.into()), + }, + }) + } + + pub fn schema_name(&self) -> String { + self.create.schema_name.clone() + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateCatalogSchema({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateCatalogSchema".to_string()) + } +} + +impl LogicalNode for PyCreateCatalogSchema { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_external_table.rs b/src/expr/create_external_table.rs new file mode 100644 index 000000000..01ce7d0ca --- /dev/null +++ b/src/expr/create_external_table.rs @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{common::schema::PyConstraints, expr::PyExpr, sql::logical::PyLogicalPlan}; +use std::{ + collections::HashMap, + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateExternalTable; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::common::df_schema::PyDFSchema; + +use super::{logical_node::LogicalNode, sort_expr::PySortExpr}; + +#[pyclass(name = "CreateExternalTable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateExternalTable { + create: CreateExternalTable, +} + +impl From for CreateExternalTable { + fn from(create: PyCreateExternalTable) -> Self { + create.create + } +} + +impl From for PyCreateExternalTable { + fn from(create: CreateExternalTable) -> PyCreateExternalTable { + PyCreateExternalTable { create } + } +} + +impl Display for PyCreateExternalTable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "CreateExternalTable: {:?}{}", + self.create.name, self.create.constraints + ) + } +} + +#[pymethods] +impl PyCreateExternalTable { + #[allow(clippy::too_many_arguments)] + #[new] + #[pyo3(signature = (schema, name, location, file_type, table_partition_cols, if_not_exists, temporary, order_exprs, unbounded, options, constraints, column_defaults, definition=None))] + pub fn new( + schema: PyDFSchema, + name: String, + location: String, + file_type: String, + table_partition_cols: Vec, + if_not_exists: bool, + temporary: bool, + order_exprs: Vec>, + unbounded: bool, + options: HashMap, + constraints: PyConstraints, + column_defaults: HashMap, + definition: Option, + ) -> Self { + let create = CreateExternalTable { + schema: Arc::new(schema.into()), + name: name.into(), + location, + file_type, + table_partition_cols, + if_not_exists, + temporary, + definition, + order_exprs: order_exprs + .into_iter() + .map(|vec| vec.into_iter().map(|s| s.into()).collect::>()) + .collect::>(), + unbounded, + options, + constraints: constraints.constraints, + column_defaults: column_defaults + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + }; + PyCreateExternalTable { create } + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + pub fn name(&self) -> PyResult { + Ok(self.create.name.to_string()) + } + + pub fn location(&self) -> String { + self.create.location.clone() + } + + pub fn file_type(&self) -> String { + self.create.file_type.clone() + } + + pub fn table_partition_cols(&self) -> Vec { + self.create.table_partition_cols.clone() + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn temporary(&self) -> bool { + self.create.temporary + } + + pub fn definition(&self) -> Option { + self.create.definition.clone() + } + + pub fn order_exprs(&self) -> Vec> { + self.create + .order_exprs + .iter() + .map(|vec| vec.iter().map(|s| s.clone().into()).collect()) + .collect() + } + + pub fn unbounded(&self) -> bool { + self.create.unbounded + } + + pub fn options(&self) -> HashMap { + self.create.options.clone() + } + + pub fn constraints(&self) -> PyConstraints { + PyConstraints { + constraints: self.create.constraints.clone(), + } + } + + pub fn column_defaults(&self) -> HashMap { + self.create + .column_defaults + .iter() + .map(|(k, v)| (k.clone(), v.clone().into())) + .collect() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateExternalTable({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateExternalTable".to_string()) + } +} + +impl LogicalNode for PyCreateExternalTable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_function.rs b/src/expr/create_function.rs new file mode 100644 index 000000000..6f3c3f0ff --- /dev/null +++ b/src/expr/create_function.rs @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::{ + CreateFunction, CreateFunctionBody, OperateFunctionArg, Volatility, +}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use super::logical_node::LogicalNode; +use super::PyExpr; +use crate::common::{data_type::PyDataType, df_schema::PyDFSchema}; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "CreateFunction", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateFunction { + create: CreateFunction, +} + +impl From for CreateFunction { + fn from(create: PyCreateFunction) -> Self { + create.create + } +} + +impl From for PyCreateFunction { + fn from(create: CreateFunction) -> PyCreateFunction { + PyCreateFunction { create } + } +} + +impl Display for PyCreateFunction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateFunction: name {:?}", self.create.name) + } +} + +#[pyclass(name = "OperateFunctionArg", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyOperateFunctionArg { + arg: OperateFunctionArg, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "Volatility", module = "datafusion.expr")] +pub enum PyVolatility { + Immutable, + Stable, + Volatile, +} + +#[pyclass(name = "CreateFunctionBody", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateFunctionBody { + body: CreateFunctionBody, +} + +#[pymethods] +impl PyCreateFunctionBody { + pub fn language(&self) -> Option { + self.body + .language + .as_ref() + .map(|language| language.to_string()) + } + + pub fn behavior(&self) -> Option { + self.body.behavior.as_ref().map(|behavior| match behavior { + Volatility::Immutable => PyVolatility::Immutable, + Volatility::Stable => PyVolatility::Stable, + Volatility::Volatile => PyVolatility::Volatile, + }) + } + + pub fn function_body(&self) -> Option { + self.body + .function_body + .as_ref() + .map(|function_body| function_body.clone().into()) + } +} + +#[pymethods] +impl PyCreateFunction { + #[new] + #[pyo3(signature = (or_replace, temporary, name, params, schema, return_type=None, args=None))] + pub fn new( + or_replace: bool, + temporary: bool, + name: String, + params: PyCreateFunctionBody, + schema: PyDFSchema, + return_type: Option, + args: Option>, + ) -> Self { + PyCreateFunction { + create: CreateFunction { + or_replace, + temporary, + name, + args: args.map(|args| args.into_iter().map(|arg| arg.arg).collect()), + return_type: return_type.map(|return_type| return_type.data_type), + params: params.body, + schema: Arc::new(schema.into()), + }, + } + } + + pub fn or_replace(&self) -> bool { + self.create.or_replace + } + + pub fn temporary(&self) -> bool { + self.create.temporary + } + + pub fn name(&self) -> String { + self.create.name.clone() + } + + pub fn params(&self) -> PyCreateFunctionBody { + PyCreateFunctionBody { + body: self.create.params.clone(), + } + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + pub fn return_type(&self) -> Option { + self.create + .return_type + .as_ref() + .map(|return_type| return_type.clone().into()) + } + + pub fn args(&self) -> Option> { + self.create.args.as_ref().map(|args| { + args.iter() + .map(|arg| PyOperateFunctionArg { arg: arg.clone() }) + .collect() + }) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateFunction({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateFunction".to_string()) + } +} + +impl LogicalNode for PyCreateFunction { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/create_index.rs b/src/expr/create_index.rs new file mode 100644 index 000000000..13dadbc3f --- /dev/null +++ b/src/expr/create_index.rs @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::CreateIndex; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::{logical_node::LogicalNode, sort_expr::PySortExpr}; + +#[pyclass(name = "CreateIndex", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyCreateIndex { + create: CreateIndex, +} + +impl From for CreateIndex { + fn from(create: PyCreateIndex) -> Self { + create.create + } +} + +impl From for PyCreateIndex { + fn from(create: CreateIndex) -> PyCreateIndex { + PyCreateIndex { create } + } +} + +impl Display for PyCreateIndex { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "CreateIndex: {:?}", self.create.name) + } +} + +#[pymethods] +impl PyCreateIndex { + #[new] + #[pyo3(signature = (table, columns, unique, if_not_exists, schema, name=None, using=None))] + pub fn new( + table: String, + columns: Vec, + unique: bool, + if_not_exists: bool, + schema: PyDFSchema, + name: Option, + using: Option, + ) -> PyResult { + Ok(PyCreateIndex { + create: CreateIndex { + name, + table: table.into(), + using, + columns: columns.iter().map(|c| c.clone().into()).collect(), + unique, + if_not_exists, + schema: Arc::new(schema.into()), + }, + }) + } + + pub fn name(&self) -> Option { + self.create.name.clone() + } + + pub fn table(&self) -> PyResult { + Ok(self.create.table.to_string()) + } + + pub fn using(&self) -> Option { + self.create.using.clone() + } + + pub fn columns(&self) -> Vec { + self.create + .columns + .iter() + .map(|c| c.clone().into()) + .collect() + } + + pub fn unique(&self) -> bool { + self.create.unique + } + + pub fn if_not_exists(&self) -> bool { + self.create.if_not_exists + } + + pub fn schema(&self) -> PyDFSchema { + (*self.create.schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("CreateIndex({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("CreateIndex".to_string()) + } +} + +impl LogicalNode for PyCreateIndex { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/describe_table.rs b/src/expr/describe_table.rs new file mode 100644 index 000000000..5658a13f2 --- /dev/null +++ b/src/expr/describe_table.rs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use arrow::{datatypes::Schema, pyarrow::PyArrowType}; +use datafusion::logical_expr::DescribeTable; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "DescribeTable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDescribeTable { + describe: DescribeTable, +} + +impl Display for PyDescribeTable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "DescribeTable") + } +} + +#[pymethods] +impl PyDescribeTable { + #[new] + fn new(schema: PyArrowType, output_schema: PyDFSchema) -> Self { + Self { + describe: DescribeTable { + schema: Arc::new(schema.0), + output_schema: Arc::new(output_schema.into()), + }, + } + } + + pub fn schema(&self) -> PyArrowType { + (*self.describe.schema).clone().into() + } + + pub fn output_schema(&self) -> PyDFSchema { + (*self.describe.output_schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DescribeTable({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("DescribeTable".to_string()) + } +} + +impl From for DescribeTable { + fn from(describe: PyDescribeTable) -> Self { + describe.describe + } +} + +impl From for PyDescribeTable { + fn from(describe: DescribeTable) -> PyDescribeTable { + PyDescribeTable { describe } + } +} + +impl LogicalNode for PyDescribeTable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/dml.rs b/src/expr/dml.rs new file mode 100644 index 000000000..251e336cc --- /dev/null +++ b/src/expr/dml.rs @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::logical_expr::dml::InsertOp; +use datafusion::logical_expr::{DmlStatement, WriteOp}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::common::schema::PyTableSource; +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "DmlStatement", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDmlStatement { + dml: DmlStatement, +} + +impl From for DmlStatement { + fn from(dml: PyDmlStatement) -> Self { + dml.dml + } +} + +impl From for PyDmlStatement { + fn from(dml: DmlStatement) -> PyDmlStatement { + PyDmlStatement { dml } + } +} + +impl LogicalNode for PyDmlStatement { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.dml.input).clone())] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyDmlStatement { + pub fn table_name(&self) -> PyResult { + Ok(self.dml.table_name.to_string()) + } + + pub fn target(&self) -> PyResult { + Ok(PyTableSource { + table_source: self.dml.target.clone(), + }) + } + + pub fn op(&self) -> PyWriteOp { + self.dml.op.clone().into() + } + + pub fn input(&self) -> PyLogicalPlan { + PyLogicalPlan { + plan: self.dml.input.clone(), + } + } + + pub fn output_schema(&self) -> PyDFSchema { + (*self.dml.output_schema).clone().into() + } + + fn __repr__(&self) -> PyResult { + Ok("DmlStatement".to_string()) + } + + fn __name__(&self) -> PyResult { + Ok("DmlStatement".to_string()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "WriteOp", module = "datafusion.expr")] +pub enum PyWriteOp { + Append, + Overwrite, + Replace, + + Update, + Delete, + Ctas, +} + +impl From for PyWriteOp { + fn from(write_op: WriteOp) -> Self { + match write_op { + WriteOp::Insert(InsertOp::Append) => PyWriteOp::Append, + WriteOp::Insert(InsertOp::Overwrite) => PyWriteOp::Overwrite, + WriteOp::Insert(InsertOp::Replace) => PyWriteOp::Replace, + + WriteOp::Update => PyWriteOp::Update, + WriteOp::Delete => PyWriteOp::Delete, + WriteOp::Ctas => PyWriteOp::Ctas, + } + } +} + +impl From for WriteOp { + fn from(py: PyWriteOp) -> Self { + match py { + PyWriteOp::Append => WriteOp::Insert(InsertOp::Append), + PyWriteOp::Overwrite => WriteOp::Insert(InsertOp::Overwrite), + PyWriteOp::Replace => WriteOp::Insert(InsertOp::Replace), + + PyWriteOp::Update => WriteOp::Update, + PyWriteOp::Delete => WriteOp::Delete, + PyWriteOp::Ctas => WriteOp::Ctas, + } + } +} + +#[pymethods] +impl PyWriteOp { + fn name(&self) -> String { + let write_op: WriteOp = self.clone().into(); + write_op.name().to_string() + } +} diff --git a/src/expr/drop_catalog_schema.rs b/src/expr/drop_catalog_schema.rs new file mode 100644 index 000000000..b7420a99c --- /dev/null +++ b/src/expr/drop_catalog_schema.rs @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::{common::SchemaReference, logical_expr::DropCatalogSchema, sql::TableReference}; +use pyo3::{exceptions::PyValueError, prelude::*, IntoPyObjectExt}; + +use crate::common::df_schema::PyDFSchema; + +use super::logical_node::LogicalNode; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "DropCatalogSchema", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDropCatalogSchema { + drop: DropCatalogSchema, +} + +impl From for DropCatalogSchema { + fn from(drop: PyDropCatalogSchema) -> Self { + drop.drop + } +} + +impl From for PyDropCatalogSchema { + fn from(drop: DropCatalogSchema) -> PyDropCatalogSchema { + PyDropCatalogSchema { drop } + } +} + +impl Display for PyDropCatalogSchema { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "DropCatalogSchema") + } +} + +fn parse_schema_reference(name: String) -> PyResult { + match name.into() { + TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table }), + TableReference::Partial { schema, table } => Ok(SchemaReference::Full { + schema: table, + catalog: schema, + }), + TableReference::Full { + catalog: _, + schema: _, + table: _, + } => Err(PyErr::new::( + "Invalid schema specifier (has 3 parts)".to_string(), + )), + } +} + +#[pymethods] +impl PyDropCatalogSchema { + #[new] + fn new(name: String, schema: PyDFSchema, if_exists: bool, cascade: bool) -> PyResult { + let name = parse_schema_reference(name)?; + Ok(PyDropCatalogSchema { + drop: DropCatalogSchema { + name, + schema: Arc::new(schema.into()), + if_exists, + cascade, + }, + }) + } + + fn name(&self) -> PyResult { + Ok(self.drop.name.to_string()) + } + + fn schema(&self) -> PyDFSchema { + (*self.drop.schema).clone().into() + } + + fn if_exists(&self) -> PyResult { + Ok(self.drop.if_exists) + } + + fn cascade(&self) -> PyResult { + Ok(self.drop.cascade) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DropCatalogSchema({})", self)) + } +} + +impl LogicalNode for PyDropCatalogSchema { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/drop_function.rs b/src/expr/drop_function.rs new file mode 100644 index 000000000..9fbd78fdc --- /dev/null +++ b/src/expr/drop_function.rs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::DropFunction; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use super::logical_node::LogicalNode; +use crate::common::df_schema::PyDFSchema; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "DropFunction", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDropFunction { + drop: DropFunction, +} + +impl From for DropFunction { + fn from(drop: PyDropFunction) -> Self { + drop.drop + } +} + +impl From for PyDropFunction { + fn from(drop: DropFunction) -> PyDropFunction { + PyDropFunction { drop } + } +} + +impl Display for PyDropFunction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "DropFunction") + } +} + +#[pymethods] +impl PyDropFunction { + #[new] + fn new(name: String, schema: PyDFSchema, if_exists: bool) -> PyResult { + Ok(PyDropFunction { + drop: DropFunction { + name, + schema: Arc::new(schema.into()), + if_exists, + }, + }) + } + fn name(&self) -> PyResult { + Ok(self.drop.name.clone()) + } + + fn schema(&self) -> PyDFSchema { + (*self.drop.schema).clone().into() + } + + fn if_exists(&self) -> PyResult { + Ok(self.drop.if_exists) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DropFunction({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("DropFunction".to_string()) + } +} + +impl LogicalNode for PyDropFunction { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/drop_view.rs b/src/expr/drop_view.rs new file mode 100644 index 000000000..1d1ab1e59 --- /dev/null +++ b/src/expr/drop_view.rs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + sync::Arc, +}; + +use datafusion::logical_expr::DropView; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::common::df_schema::PyDFSchema; + +use super::logical_node::LogicalNode; +use crate::sql::logical::PyLogicalPlan; + +#[pyclass(name = "DropView", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDropView { + drop: DropView, +} + +impl From for DropView { + fn from(drop: PyDropView) -> Self { + drop.drop + } +} + +impl From for PyDropView { + fn from(drop: DropView) -> PyDropView { + PyDropView { drop } + } +} + +impl Display for PyDropView { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "DropView: {name:?} if not exist:={if_exists}", + name = self.drop.name, + if_exists = self.drop.if_exists + ) + } +} + +#[pymethods] +impl PyDropView { + #[new] + fn new(name: String, schema: PyDFSchema, if_exists: bool) -> PyResult { + Ok(PyDropView { + drop: DropView { + name: name.into(), + schema: Arc::new(schema.into()), + if_exists, + }, + }) + } + + fn name(&self) -> PyResult { + Ok(self.drop.name.to_string()) + } + + fn schema(&self) -> PyDFSchema { + (*self.drop.schema).clone().into() + } + + fn if_exists(&self) -> PyResult { + Ok(self.drop.if_exists) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("DropView({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("DropView".to_string()) + } +} + +impl LogicalNode for PyDropView { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/recursive_query.rs b/src/expr/recursive_query.rs new file mode 100644 index 000000000..65181f7d3 --- /dev/null +++ b/src/expr/recursive_query.rs @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{self, Display, Formatter}; + +use datafusion::logical_expr::RecursiveQuery; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::sql::logical::PyLogicalPlan; + +use super::logical_node::LogicalNode; + +#[pyclass(name = "RecursiveQuery", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyRecursiveQuery { + query: RecursiveQuery, +} + +impl From for RecursiveQuery { + fn from(query: PyRecursiveQuery) -> Self { + query.query + } +} + +impl From for PyRecursiveQuery { + fn from(query: RecursiveQuery) -> PyRecursiveQuery { + PyRecursiveQuery { query } + } +} + +impl Display for PyRecursiveQuery { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "RecursiveQuery {name:?} is_distinct:={is_distinct}", + name = self.query.name, + is_distinct = self.query.is_distinct + ) + } +} + +#[pymethods] +impl PyRecursiveQuery { + #[new] + fn new( + name: String, + static_term: PyLogicalPlan, + recursive_term: PyLogicalPlan, + is_distinct: bool, + ) -> Self { + Self { + query: RecursiveQuery { + name, + static_term: static_term.plan(), + recursive_term: recursive_term.plan(), + is_distinct, + }, + } + } + + fn name(&self) -> PyResult { + Ok(self.query.name.clone()) + } + + fn static_term(&self) -> PyLogicalPlan { + PyLogicalPlan::from((*self.query.static_term).clone()) + } + + fn recursive_term(&self) -> PyLogicalPlan { + PyLogicalPlan::from((*self.query.recursive_term).clone()) + } + + fn is_distinct(&self) -> PyResult { + Ok(self.query.is_distinct) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("RecursiveQuery({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("RecursiveQuery".to_string()) + } +} + +impl LogicalNode for PyRecursiveQuery { + fn inputs(&self) -> Vec { + vec![ + PyLogicalPlan::from((*self.query.static_term).clone()), + PyLogicalPlan::from((*self.query.recursive_term).clone()), + ] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} diff --git a/src/expr/statement.rs b/src/expr/statement.rs new file mode 100644 index 000000000..83774cda1 --- /dev/null +++ b/src/expr/statement.rs @@ -0,0 +1,454 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::logical_expr::{ + Deallocate, Execute, Prepare, SetVariable, TransactionAccessMode, TransactionConclusion, + TransactionEnd, TransactionIsolationLevel, TransactionStart, +}; +use pyo3::{prelude::*, IntoPyObjectExt}; + +use crate::{common::data_type::PyDataType, sql::logical::PyLogicalPlan}; + +use super::{logical_node::LogicalNode, PyExpr}; + +#[pyclass(name = "TransactionStart", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyTransactionStart { + transaction_start: TransactionStart, +} + +impl From for PyTransactionStart { + fn from(transaction_start: TransactionStart) -> PyTransactionStart { + PyTransactionStart { transaction_start } + } +} + +impl TryFrom for TransactionStart { + type Error = PyErr; + + fn try_from(py: PyTransactionStart) -> Result { + Ok(py.transaction_start) + } +} + +impl LogicalNode for PyTransactionStart { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "TransactionAccessMode", module = "datafusion.expr")] +pub enum PyTransactionAccessMode { + ReadOnly, + ReadWrite, +} + +impl From for PyTransactionAccessMode { + fn from(access_mode: TransactionAccessMode) -> PyTransactionAccessMode { + match access_mode { + TransactionAccessMode::ReadOnly => PyTransactionAccessMode::ReadOnly, + TransactionAccessMode::ReadWrite => PyTransactionAccessMode::ReadWrite, + } + } +} + +impl TryFrom for TransactionAccessMode { + type Error = PyErr; + + fn try_from(py: PyTransactionAccessMode) -> Result { + match py { + PyTransactionAccessMode::ReadOnly => Ok(TransactionAccessMode::ReadOnly), + PyTransactionAccessMode::ReadWrite => Ok(TransactionAccessMode::ReadWrite), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass( + eq, + eq_int, + name = "TransactionIsolationLevel", + module = "datafusion.expr" +)] +pub enum PyTransactionIsolationLevel { + ReadUncommitted, + ReadCommitted, + RepeatableRead, + Serializable, + Snapshot, +} + +impl From for PyTransactionIsolationLevel { + fn from(isolation_level: TransactionIsolationLevel) -> PyTransactionIsolationLevel { + match isolation_level { + TransactionIsolationLevel::ReadUncommitted => { + PyTransactionIsolationLevel::ReadUncommitted + } + TransactionIsolationLevel::ReadCommitted => PyTransactionIsolationLevel::ReadCommitted, + TransactionIsolationLevel::RepeatableRead => { + PyTransactionIsolationLevel::RepeatableRead + } + TransactionIsolationLevel::Serializable => PyTransactionIsolationLevel::Serializable, + TransactionIsolationLevel::Snapshot => PyTransactionIsolationLevel::Snapshot, + } + } +} + +impl TryFrom for TransactionIsolationLevel { + type Error = PyErr; + + fn try_from(value: PyTransactionIsolationLevel) -> Result { + match value { + PyTransactionIsolationLevel::ReadUncommitted => { + Ok(TransactionIsolationLevel::ReadUncommitted) + } + PyTransactionIsolationLevel::ReadCommitted => { + Ok(TransactionIsolationLevel::ReadCommitted) + } + PyTransactionIsolationLevel::RepeatableRead => { + Ok(TransactionIsolationLevel::RepeatableRead) + } + PyTransactionIsolationLevel::Serializable => { + Ok(TransactionIsolationLevel::Serializable) + } + PyTransactionIsolationLevel::Snapshot => Ok(TransactionIsolationLevel::Snapshot), + } + } +} + +#[pymethods] +impl PyTransactionStart { + #[new] + pub fn new( + access_mode: PyTransactionAccessMode, + isolation_level: PyTransactionIsolationLevel, + ) -> PyResult { + let access_mode = access_mode.try_into()?; + let isolation_level = isolation_level.try_into()?; + Ok(PyTransactionStart { + transaction_start: TransactionStart { + access_mode, + isolation_level, + }, + }) + } + + pub fn access_mode(&self) -> PyResult { + Ok(self.transaction_start.access_mode.clone().into()) + } + + pub fn isolation_level(&self) -> PyResult { + Ok(self.transaction_start.isolation_level.clone().into()) + } +} + +#[pyclass(name = "TransactionEnd", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyTransactionEnd { + transaction_end: TransactionEnd, +} + +impl From for PyTransactionEnd { + fn from(transaction_end: TransactionEnd) -> PyTransactionEnd { + PyTransactionEnd { transaction_end } + } +} + +impl TryFrom for TransactionEnd { + type Error = PyErr; + + fn try_from(py: PyTransactionEnd) -> Result { + Ok(py.transaction_end) + } +} + +impl LogicalNode for PyTransactionEnd { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[pyclass(eq, eq_int, name = "TransactionConclusion", module = "datafusion.expr")] +pub enum PyTransactionConclusion { + Commit, + Rollback, +} + +impl From for PyTransactionConclusion { + fn from(value: TransactionConclusion) -> Self { + match value { + TransactionConclusion::Commit => PyTransactionConclusion::Commit, + TransactionConclusion::Rollback => PyTransactionConclusion::Rollback, + } + } +} + +impl TryFrom for TransactionConclusion { + type Error = PyErr; + + fn try_from(value: PyTransactionConclusion) -> Result { + match value { + PyTransactionConclusion::Commit => Ok(TransactionConclusion::Commit), + PyTransactionConclusion::Rollback => Ok(TransactionConclusion::Rollback), + } + } +} +#[pymethods] +impl PyTransactionEnd { + #[new] + pub fn new(conclusion: PyTransactionConclusion, chain: bool) -> PyResult { + let conclusion = conclusion.try_into()?; + Ok(PyTransactionEnd { + transaction_end: TransactionEnd { conclusion, chain }, + }) + } + + pub fn conclusion(&self) -> PyResult { + Ok(self.transaction_end.conclusion.clone().into()) + } + + pub fn chain(&self) -> bool { + self.transaction_end.chain + } +} + +#[pyclass(name = "SetVariable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PySetVariable { + set_variable: SetVariable, +} + +impl From for PySetVariable { + fn from(set_variable: SetVariable) -> PySetVariable { + PySetVariable { set_variable } + } +} + +impl TryFrom for SetVariable { + type Error = PyErr; + + fn try_from(py: PySetVariable) -> Result { + Ok(py.set_variable) + } +} + +impl LogicalNode for PySetVariable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PySetVariable { + #[new] + pub fn new(variable: String, value: String) -> Self { + PySetVariable { + set_variable: SetVariable { variable, value }, + } + } + + pub fn variable(&self) -> String { + self.set_variable.variable.clone() + } + + pub fn value(&self) -> String { + self.set_variable.value.clone() + } +} + +#[pyclass(name = "Prepare", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyPrepare { + prepare: Prepare, +} + +impl From for PyPrepare { + fn from(prepare: Prepare) -> PyPrepare { + PyPrepare { prepare } + } +} + +impl TryFrom for Prepare { + type Error = PyErr; + + fn try_from(py: PyPrepare) -> Result { + Ok(py.prepare) + } +} + +impl LogicalNode for PyPrepare { + fn inputs(&self) -> Vec { + vec![PyLogicalPlan::from((*self.prepare.input).clone())] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyPrepare { + #[new] + pub fn new(name: String, data_types: Vec, input: PyLogicalPlan) -> Self { + let input = input.plan().clone(); + let data_types = data_types + .into_iter() + .map(|data_type| data_type.into()) + .collect(); + PyPrepare { + prepare: Prepare { + name, + data_types, + input, + }, + } + } + + pub fn name(&self) -> String { + self.prepare.name.clone() + } + + pub fn data_types(&self) -> Vec { + self.prepare + .data_types + .clone() + .into_iter() + .map(|t| t.into()) + .collect() + } + + pub fn input(&self) -> PyLogicalPlan { + PyLogicalPlan { + plan: self.prepare.input.clone(), + } + } +} + +#[pyclass(name = "Execute", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyExecute { + execute: Execute, +} + +impl From for PyExecute { + fn from(execute: Execute) -> PyExecute { + PyExecute { execute } + } +} + +impl TryFrom for Execute { + type Error = PyErr; + + fn try_from(py: PyExecute) -> Result { + Ok(py.execute) + } +} + +impl LogicalNode for PyExecute { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyExecute { + #[new] + pub fn new(name: String, parameters: Vec) -> Self { + let parameters = parameters + .into_iter() + .map(|parameter| parameter.into()) + .collect(); + PyExecute { + execute: Execute { name, parameters }, + } + } + + pub fn name(&self) -> String { + self.execute.name.clone() + } + + pub fn parameters(&self) -> Vec { + self.execute + .parameters + .clone() + .into_iter() + .map(|t| t.into()) + .collect() + } +} + +#[pyclass(name = "Deallocate", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyDeallocate { + deallocate: Deallocate, +} + +impl From for PyDeallocate { + fn from(deallocate: Deallocate) -> PyDeallocate { + PyDeallocate { deallocate } + } +} + +impl TryFrom for Deallocate { + type Error = PyErr; + + fn try_from(py: PyDeallocate) -> Result { + Ok(py.deallocate) + } +} + +impl LogicalNode for PyDeallocate { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyDeallocate { + #[new] + pub fn new(name: String) -> Self { + PyDeallocate { + deallocate: Deallocate { name }, + } + } + + pub fn name(&self) -> String { + self.deallocate.name.clone() + } +} diff --git a/src/expr/values.rs b/src/expr/values.rs new file mode 100644 index 000000000..fb2692230 --- /dev/null +++ b/src/expr/values.rs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion::logical_expr::Values; +use pyo3::{prelude::*, IntoPyObjectExt}; +use pyo3::{pyclass, PyErr, PyResult, Python}; + +use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan}; + +use super::{logical_node::LogicalNode, PyExpr}; + +#[pyclass(name = "Values", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyValues { + values: Values, +} + +impl From for PyValues { + fn from(values: Values) -> PyValues { + PyValues { values } + } +} + +impl TryFrom for Values { + type Error = PyErr; + + fn try_from(py: PyValues) -> Result { + Ok(py.values) + } +} + +impl LogicalNode for PyValues { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyValues { + #[new] + pub fn new(schema: PyDFSchema, values: Vec>) -> PyResult { + let values = values + .into_iter() + .map(|row| row.into_iter().map(|expr| expr.into()).collect()) + .collect(); + Ok(PyValues { + values: Values { + schema: Arc::new(schema.into()), + values, + }, + }) + } + + pub fn schema(&self) -> PyResult { + Ok((*self.values.schema).clone().into()) + } + + pub fn values(&self) -> Vec> { + self.values + .values + .clone() + .into_iter() + .map(|row| row.into_iter().map(|expr| expr.into()).collect()) + .collect() + } +} diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 96561c434..198d68bdc 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,10 +17,25 @@ use std::sync::Arc; +use crate::context::PySessionContext; use crate::errors::PyDataFusionResult; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; +use crate::expr::copy_to::PyCopyTo; +use crate::expr::create_catalog::PyCreateCatalog; +use crate::expr::create_catalog_schema::PyCreateCatalogSchema; +use crate::expr::create_external_table::PyCreateExternalTable; +use crate::expr::create_function::PyCreateFunction; +use crate::expr::create_index::PyCreateIndex; +use crate::expr::create_memory_table::PyCreateMemoryTable; +use crate::expr::create_view::PyCreateView; +use crate::expr::describe_table::PyDescribeTable; use crate::expr::distinct::PyDistinct; +use crate::expr::dml::PyDmlStatement; +use crate::expr::drop_catalog_schema::PyDropCatalogSchema; +use crate::expr::drop_function::PyDropFunction; +use crate::expr::drop_table::PyDropTable; +use crate::expr::drop_view::PyDropView; use crate::expr::empty_relation::PyEmptyRelation; use crate::expr::explain::PyExplain; use crate::expr::extension::PyExtension; @@ -28,14 +43,20 @@ use crate::expr::filter::PyFilter; use crate::expr::join::PyJoin; use crate::expr::limit::PyLimit; use crate::expr::projection::PyProjection; +use crate::expr::recursive_query::PyRecursiveQuery; +use crate::expr::repartition::PyRepartition; use crate::expr::sort::PySort; +use crate::expr::statement::{ + PyDeallocate, PyExecute, PyPrepare, PySetVariable, PyTransactionEnd, PyTransactionStart, +}; use crate::expr::subquery::PySubquery; use crate::expr::subquery_alias::PySubqueryAlias; use crate::expr::table_scan::PyTableScan; +use crate::expr::union::PyUnion; use crate::expr::unnest::PyUnnest; +use crate::expr::values::PyValues; use crate::expr::window::PyWindowExpr; -use crate::{context::PySessionContext, errors::py_unsupported_variant_err}; -use datafusion::logical_expr::LogicalPlan; +use datafusion::logical_expr::{DdlStatement, LogicalPlan, Statement}; use datafusion_proto::logical_plan::{AsLogicalPlan, DefaultLogicalExtensionCodec}; use prost::Message; use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyBytes}; @@ -82,18 +103,54 @@ impl PyLogicalPlan { LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), LogicalPlan::Unnest(plan) => PyUnnest::from(plan.clone()).to_variant(py), LogicalPlan::Window(plan) => PyWindowExpr::from(plan.clone()).to_variant(py), - LogicalPlan::Repartition(_) - | LogicalPlan::Union(_) - | LogicalPlan::Statement(_) - | LogicalPlan::Values(_) - | LogicalPlan::Dml(_) - | LogicalPlan::Ddl(_) - | LogicalPlan::Copy(_) - | LogicalPlan::DescribeTable(_) - | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( - "Conversion of variant not implemented: {:?}", - self.plan - ))), + LogicalPlan::Repartition(plan) => PyRepartition::from(plan.clone()).to_variant(py), + LogicalPlan::Union(plan) => PyUnion::from(plan.clone()).to_variant(py), + LogicalPlan::Statement(plan) => match plan { + Statement::TransactionStart(plan) => { + PyTransactionStart::from(plan.clone()).to_variant(py) + } + Statement::TransactionEnd(plan) => { + PyTransactionEnd::from(plan.clone()).to_variant(py) + } + Statement::SetVariable(plan) => PySetVariable::from(plan.clone()).to_variant(py), + Statement::Prepare(plan) => PyPrepare::from(plan.clone()).to_variant(py), + Statement::Execute(plan) => PyExecute::from(plan.clone()).to_variant(py), + Statement::Deallocate(plan) => PyDeallocate::from(plan.clone()).to_variant(py), + }, + LogicalPlan::Values(plan) => PyValues::from(plan.clone()).to_variant(py), + LogicalPlan::Dml(plan) => PyDmlStatement::from(plan.clone()).to_variant(py), + LogicalPlan::Ddl(plan) => match plan { + DdlStatement::CreateExternalTable(plan) => { + PyCreateExternalTable::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateMemoryTable(plan) => { + PyCreateMemoryTable::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateView(plan) => PyCreateView::from(plan.clone()).to_variant(py), + DdlStatement::CreateCatalogSchema(plan) => { + PyCreateCatalogSchema::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateCatalog(plan) => { + PyCreateCatalog::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateIndex(plan) => PyCreateIndex::from(plan.clone()).to_variant(py), + DdlStatement::DropTable(plan) => PyDropTable::from(plan.clone()).to_variant(py), + DdlStatement::DropView(plan) => PyDropView::from(plan.clone()).to_variant(py), + DdlStatement::DropCatalogSchema(plan) => { + PyDropCatalogSchema::from(plan.clone()).to_variant(py) + } + DdlStatement::CreateFunction(plan) => { + PyCreateFunction::from(plan.clone()).to_variant(py) + } + DdlStatement::DropFunction(plan) => { + PyDropFunction::from(plan.clone()).to_variant(py) + } + }, + LogicalPlan::Copy(plan) => PyCopyTo::from(plan.clone()).to_variant(py), + LogicalPlan::DescribeTable(plan) => PyDescribeTable::from(plan.clone()).to_variant(py), + LogicalPlan::RecursiveQuery(plan) => { + PyRecursiveQuery::from(plan.clone()).to_variant(py) + } } } From 7d8bcd8d20623beb76a397eb4fddfb18781589eb Mon Sep 17 00:00:00 2001 From: kosiew Date: Mon, 5 May 2025 21:50:52 +0800 Subject: [PATCH 041/206] Partial fix for 1078: Enhance DataFrame Formatter Configuration with Memory and Display Controls (#1119) * feat: add configurable max table bytes and min table rows for DataFrame display * Revert "feat: add configurable max table bytes and min table rows for DataFrame display" This reverts commit f9b78fa3180c5d6c20eaa3b6d0af7426d7084093. * feat: add FormatterConfig for configurable DataFrame display options * refactor: simplify attribute extraction in get_formatter_config function * refactor: remove hardcoded constants and use FormatterConfig for display options * refactor: simplify record batch collection by using FormatterConfig for display options * feat: add max_memory_bytes, min_rows_display, and repr_rows parameters to DataFrameHtmlFormatter * feat: add tests for HTML formatter row display settings and memory limit * refactor: extract Python formatter retrieval into a separate function * Revert "feat: add tests for HTML formatter row display settings and memory limit" This reverts commit e089d7b282e53e587116b11d92760e6d292ec871. * feat: add tests for HTML formatter row and memory limit configurations * Revert "feat: add tests for HTML formatter row and memory limit configurations" This reverts commit 4090fd2f7378855b045d6bfd1368d088cc9ada75. * feat: add tests for new parameters and validation in DataFrameHtmlFormatter * Reorganize tests * refactor: rename and restructure formatter functions for clarity and maintainability * feat: implement PythonFormatter struct and refactor formatter retrieval for improved clarity * refactor: improve comments and restructure FormatterConfig usage in PyDataFrame * Add DataFrame usage guide with HTML rendering customization options (#1108) * docs: enhance user guide with detailed DataFrame operations and examples * move /docs/source/api/dataframe.rst into user-guide * docs: remove DataFrame API documentation * docs: fix formatting inconsistencies in DataFrame user guide * Two minor corrections to documentation rendering --------- Co-authored-by: Tim Saucer * Update documentation * refactor: streamline HTML rendering documentation * refactor: extract validation logic into separate functions for clarity * Implement feature X to enhance user experience and optimize performance * feat: add validation method for FormatterConfig to ensure positive integer values * add comment - ensure minimum rows are collected even if memory or row limits are hit * Update html_formatter documentation * update tests * remove unused type hints from imports in html_formatter.py * remove redundant tests for DataFrameHtmlFormatter and clean up assertions * refactor get_attr function to support generic default values * build_formatter_config_from_python return PyResult * fix ruff errors * trigger ci * fix: remove redundant newline in test_custom_style_provider_html_formatter * add more tests * trigger ci * Fix ruff errors * fix clippy error * feat: add validation for parameters in configure_formatter * test: add tests for invalid parameters in configure_formatter * Fix ruff errors --------- Co-authored-by: Tim Saucer --- docs/source/user-guide/dataframe.rst | 52 +++++++- python/datafusion/html_formatter.py | 104 ++++++++++++--- python/tests/test_dataframe.py | 183 ++++++++++++++++++++++++--- src/dataframe.rs | 142 +++++++++++++++++---- 4 files changed, 413 insertions(+), 68 deletions(-) diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst index a78fd8073..11e3d7e72 100644 --- a/docs/source/user-guide/dataframe.rst +++ b/docs/source/user-guide/dataframe.rst @@ -75,13 +75,17 @@ You can customize how DataFrames are rendered in HTML by configuring the formatt # Change the default styling configure_formatter( - max_rows=50, # Maximum number of rows to display - max_width=None, # Maximum width in pixels (None for auto) - theme="light", # Theme: "light" or "dark" - precision=2, # Floating point precision - thousands_separator=",", # Separator for thousands - date_format="%Y-%m-%d", # Date format - truncate_width=20 # Max width for string columns before truncating + max_cell_length=25, # Maximum characters in a cell before truncation + max_width=1000, # Maximum width in pixels + max_height=300, # Maximum height in pixels + max_memory_bytes=2097152, # Maximum memory for rendering (2MB) + min_rows_display=20, # Minimum number of rows to display + repr_rows=10, # Number of rows to display in __repr__ + enable_cell_expansion=True,# Allow expanding truncated cells + custom_css=None, # Additional custom CSS + show_truncation_message=True, # Show message when data is truncated + style_provider=None, # Custom styling provider + use_shared_styles=True # Share styles across tables ) The formatter settings affect all DataFrames displayed after configuration. @@ -113,6 +117,25 @@ For advanced styling needs, you can create a custom style provider: # Apply the custom style provider configure_formatter(style_provider=MyStyleProvider()) +Performance Optimization with Shared Styles +------------------------------------------- +The ``use_shared_styles`` parameter (enabled by default) optimizes performance when displaying +multiple DataFrames in notebook environments: + + .. code-block:: python + from datafusion.html_formatter import StyleProvider, configure_formatter + # Default: Use shared styles (recommended for notebooks) + configure_formatter(use_shared_styles=True) + + # Disable shared styles (each DataFrame includes its own styles) + configure_formatter(use_shared_styles=False) + +When ``use_shared_styles=True``: +- CSS styles and JavaScript are included only once per notebook session +- This reduces HTML output size and prevents style duplication +- Improves rendering performance with many DataFrames +- Applies consistent styling across all DataFrames + Creating a Custom Formatter --------------------------- @@ -177,3 +200,18 @@ You can also use a context manager to temporarily change formatting settings: # Back to default formatting df.show() + +Memory and Display Controls +--------------------------- + +You can control how much data is displayed and how much memory is used for rendering: + + .. code-block:: python + + configure_formatter( + max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display + min_rows_display=50, # Always show at least 50 rows + repr_rows=20 # Show 20 rows in __repr__ output + ) + +These parameters help balance comprehensive data display against performance considerations. \ No newline at end of file diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py index a50e14fd5..12a7e4553 100644 --- a/python/datafusion/html_formatter.py +++ b/python/datafusion/html_formatter.py @@ -27,6 +27,36 @@ ) +def _validate_positive_int(value: Any, param_name: str) -> None: + """Validate that a parameter is a positive integer. + + Args: + value: The value to validate + param_name: Name of the parameter (used in error message) + + Raises: + ValueError: If the value is not a positive integer + """ + if not isinstance(value, int) or value <= 0: + msg = f"{param_name} must be a positive integer" + raise ValueError(msg) + + +def _validate_bool(value: Any, param_name: str) -> None: + """Validate that a parameter is a boolean. + + Args: + value: The value to validate + param_name: Name of the parameter (used in error message) + + Raises: + TypeError: If the value is not a boolean + """ + if not isinstance(value, bool): + msg = f"{param_name} must be a boolean" + raise TypeError(msg) + + @runtime_checkable class CellFormatter(Protocol): """Protocol for cell value formatters.""" @@ -91,6 +121,9 @@ class DataFrameHtmlFormatter: max_cell_length: Maximum characters to display in a cell before truncation max_width: Maximum width of the HTML table in pixels max_height: Maximum height of the HTML table in pixels + max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) + min_rows_display: Minimum number of rows to display + repr_rows: Default number of rows to display in repr output enable_cell_expansion: Whether to add expand/collapse buttons for long cell values custom_css: Additional CSS to include in the HTML output @@ -108,6 +141,9 @@ def __init__( max_cell_length: int = 25, max_width: int = 1000, max_height: int = 300, + max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB + min_rows_display: int = 20, + repr_rows: int = 10, enable_cell_expansion: bool = True, custom_css: Optional[str] = None, show_truncation_message: bool = True, @@ -124,6 +160,12 @@ def __init__( Maximum width of the displayed table in pixels. max_height : int, default 300 Maximum height of the displayed table in pixels. + max_memory_bytes : int, default 2097152 (2MB) + Maximum memory in bytes for rendered data. + min_rows_display : int, default 20 + Minimum number of rows to display. + repr_rows : int, default 10 + Default number of rows to display in repr output. enable_cell_expansion : bool, default True Whether to allow cells to expand when clicked. custom_css : str, optional @@ -139,7 +181,8 @@ def __init__( Raises: ------ ValueError - If max_cell_length, max_width, or max_height is not a positive integer. + If max_cell_length, max_width, max_height, max_memory_bytes, + min_rows_display, or repr_rows is not a positive integer. TypeError If enable_cell_expansion, show_truncation_message, or use_shared_styles is not a boolean, @@ -148,27 +191,17 @@ def __init__( protocol. """ # Validate numeric parameters - - if not isinstance(max_cell_length, int) or max_cell_length <= 0: - msg = "max_cell_length must be a positive integer" - raise ValueError(msg) - if not isinstance(max_width, int) or max_width <= 0: - msg = "max_width must be a positive integer" - raise ValueError(msg) - if not isinstance(max_height, int) or max_height <= 0: - msg = "max_height must be a positive integer" - raise ValueError(msg) + _validate_positive_int(max_cell_length, "max_cell_length") + _validate_positive_int(max_width, "max_width") + _validate_positive_int(max_height, "max_height") + _validate_positive_int(max_memory_bytes, "max_memory_bytes") + _validate_positive_int(min_rows_display, "min_rows_display") + _validate_positive_int(repr_rows, "repr_rows") # Validate boolean parameters - if not isinstance(enable_cell_expansion, bool): - msg = "enable_cell_expansion must be a boolean" - raise TypeError(msg) - if not isinstance(show_truncation_message, bool): - msg = "show_truncation_message must be a boolean" - raise TypeError(msg) - if not isinstance(use_shared_styles, bool): - msg = "use_shared_styles must be a boolean" - raise TypeError(msg) + _validate_bool(enable_cell_expansion, "enable_cell_expansion") + _validate_bool(show_truncation_message, "show_truncation_message") + _validate_bool(use_shared_styles, "use_shared_styles") # Validate custom_css if custom_css is not None and not isinstance(custom_css, str): @@ -183,6 +216,9 @@ def __init__( self.max_cell_length = max_cell_length self.max_width = max_width self.max_height = max_height + self.max_memory_bytes = max_memory_bytes + self.min_rows_display = min_rows_display + self.repr_rows = repr_rows self.enable_cell_expansion = enable_cell_expansion self.custom_css = custom_css self.show_truncation_message = show_truncation_message @@ -597,6 +633,9 @@ def configure_formatter(**kwargs: Any) -> None: **kwargs: Formatter configuration parameters like max_cell_length, max_width, max_height, enable_cell_expansion, etc. + Raises: + ValueError: If any invalid parameters are provided + Example: >>> from datafusion.html_formatter import configure_formatter >>> configure_formatter( @@ -606,6 +645,31 @@ def configure_formatter(**kwargs: Any) -> None: ... use_shared_styles=True ... ) """ + # Valid parameters accepted by DataFrameHtmlFormatter + valid_params = { + "max_cell_length", + "max_width", + "max_height", + "max_memory_bytes", + "min_rows_display", + "repr_rows", + "enable_cell_expansion", + "custom_css", + "show_truncation_message", + "style_provider", + "use_shared_styles", + } + + # Check for invalid parameters + invalid_params = set(kwargs) - valid_params + if invalid_params: + msg = ( + f"Invalid formatter parameters: {', '.join(invalid_params)}. " + f"Valid parameters are: {', '.join(valid_params)}" + ) + raise ValueError(msg) + + # Create and set formatter with validated parameters set_formatter(DataFrameHtmlFormatter(**kwargs)) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 464b884db..e01308c86 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -41,6 +41,8 @@ ) from pyarrow.csv import write_csv +MB = 1024 * 1024 + @pytest.fixture def ctx(): @@ -117,6 +119,31 @@ def clean_formatter_state(): reset_formatter() +# custom style for testing with html formatter +class CustomStyleProvider: + def get_cell_style(self) -> str: + return ( + "background-color: #f5f5f5; color: #333; padding: 8px; border: " + "1px solid #ddd;" + ) + + def get_header_style(self) -> str: + return ( + "background-color: #4285f4; color: white; font-weight: bold; " + "padding: 10px; border: 1px solid #3367d6;" + ) + + +def count_table_rows(html_content: str) -> int: + """Count the number of table rows in HTML content. + Args: + html_content: HTML string to analyze + Returns: + Number of table rows found (number of tags) + """ + return len(re.findall(r" str: - return ( - "background-color: #f5f5f5; color: #333; padding: 8px; border: " - "1px solid #ddd;" - ) - - def get_header_style(self) -> str: - return ( - "background-color: #4285f4; color: white; font-weight: bold; " - "padding: 10px; border: 1px solid #3367d6;" - ) - # Configure with custom style provider configure_formatter(style_provider=CustomStyleProvider()) @@ -917,6 +930,141 @@ def get_header_style(self) -> str: assert "color: #5af" in html_output # Even numbers +def test_html_formatter_memory(df, clean_formatter_state): + """Test the memory and row control parameters in DataFrameHtmlFormatter.""" + configure_formatter(max_memory_bytes=10, min_rows_display=1) + html_output = df._repr_html_() + + # Count the number of table rows in the output + tr_count = count_table_rows(html_output) + # With a tiny memory limit of 10 bytes, the formatter should display + # the minimum number of rows (1) plus a message about truncation + assert tr_count == 2 # 1 for header row, 1 for data row + assert "data truncated" in html_output.lower() + + configure_formatter(max_memory_bytes=10 * MB, min_rows_display=1) + html_output = df._repr_html_() + # With larger memory limit and min_rows=2, should display all rows + tr_count = count_table_rows(html_output) + # Table should have header row (1) + 3 data rows = 4 rows + assert tr_count == 4 + # No truncation message should appear + assert "data truncated" not in html_output.lower() + + +def test_html_formatter_repr_rows(df, clean_formatter_state): + configure_formatter(min_rows_display=2, repr_rows=2) + html_output = df._repr_html_() + + tr_count = count_table_rows(html_output) + # Tabe should have header row (1) + 2 data rows = 3 rows + assert tr_count == 3 + + configure_formatter(min_rows_display=2, repr_rows=3) + html_output = df._repr_html_() + + tr_count = count_table_rows(html_output) + # Tabe should have header row (1) + 3 data rows = 4 rows + assert tr_count == 4 + + +def test_html_formatter_validation(): + # Test validation for invalid parameters + + with pytest.raises(ValueError, match="max_cell_length must be a positive integer"): + DataFrameHtmlFormatter(max_cell_length=0) + + with pytest.raises(ValueError, match="max_width must be a positive integer"): + DataFrameHtmlFormatter(max_width=0) + + with pytest.raises(ValueError, match="max_height must be a positive integer"): + DataFrameHtmlFormatter(max_height=0) + + with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): + DataFrameHtmlFormatter(max_memory_bytes=0) + + with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): + DataFrameHtmlFormatter(max_memory_bytes=-100) + + with pytest.raises(ValueError, match="min_rows_display must be a positive integer"): + DataFrameHtmlFormatter(min_rows_display=0) + + with pytest.raises(ValueError, match="min_rows_display must be a positive integer"): + DataFrameHtmlFormatter(min_rows_display=-5) + + with pytest.raises(ValueError, match="repr_rows must be a positive integer"): + DataFrameHtmlFormatter(repr_rows=0) + + with pytest.raises(ValueError, match="repr_rows must be a positive integer"): + DataFrameHtmlFormatter(repr_rows=-10) + + +def test_configure_formatter(df, clean_formatter_state): + """Test using custom style providers with the HTML formatter and configured + parameters.""" + + # these are non-default values + max_cell_length = 10 + max_width = 500 + max_height = 30 + max_memory_bytes = 3 * MB + min_rows_display = 2 + repr_rows = 2 + enable_cell_expansion = False + show_truncation_message = False + use_shared_styles = False + + reset_formatter() + formatter_default = get_formatter() + + assert formatter_default.max_cell_length != max_cell_length + assert formatter_default.max_width != max_width + assert formatter_default.max_height != max_height + assert formatter_default.max_memory_bytes != max_memory_bytes + assert formatter_default.min_rows_display != min_rows_display + assert formatter_default.repr_rows != repr_rows + assert formatter_default.enable_cell_expansion != enable_cell_expansion + assert formatter_default.show_truncation_message != show_truncation_message + assert formatter_default.use_shared_styles != use_shared_styles + + # Configure with custom style provider and additional parameters + configure_formatter( + max_cell_length=max_cell_length, + max_width=max_width, + max_height=max_height, + max_memory_bytes=max_memory_bytes, + min_rows_display=min_rows_display, + repr_rows=repr_rows, + enable_cell_expansion=enable_cell_expansion, + show_truncation_message=show_truncation_message, + use_shared_styles=use_shared_styles, + ) + formatter_custom = get_formatter() + assert formatter_custom.max_cell_length == max_cell_length + assert formatter_custom.max_width == max_width + assert formatter_custom.max_height == max_height + assert formatter_custom.max_memory_bytes == max_memory_bytes + assert formatter_custom.min_rows_display == min_rows_display + assert formatter_custom.repr_rows == repr_rows + assert formatter_custom.enable_cell_expansion == enable_cell_expansion + assert formatter_custom.show_truncation_message == show_truncation_message + assert formatter_custom.use_shared_styles == use_shared_styles + + +def test_configure_formatter_invalid_params(clean_formatter_state): + """Test that configure_formatter rejects invalid parameters.""" + with pytest.raises(ValueError, match="Invalid formatter parameters"): + configure_formatter(invalid_param=123) + + # Test with multiple parameters, one valid and one invalid + with pytest.raises(ValueError, match="Invalid formatter parameters"): + configure_formatter(max_width=500, not_a_real_param="test") + + # Test with multiple invalid parameters + with pytest.raises(ValueError, match="Invalid formatter parameters"): + configure_formatter(fake_param1="test", fake_param2=456) + + def test_get_dataframe(tmp_path): ctx = SessionContext() @@ -1505,9 +1653,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame: assert result["new_col"] == [3 for _i in range(3)] -def test_dataframe_repr_html_structure(df) -> None: +def test_dataframe_repr_html_structure(df, clean_formatter_state) -> None: """Test that DataFrame._repr_html_ produces expected HTML output structure.""" - import re output = df._repr_html_() @@ -1537,7 +1684,7 @@ def test_dataframe_repr_html_structure(df) -> None: assert len(body_matches) == 1, "Expected pattern of values not found in HTML output" -def test_dataframe_repr_html_values(df): +def test_dataframe_repr_html_values(df, clean_formatter_state): """Test that DataFrame._repr_html_ contains the expected data values.""" html = df._repr_html_() assert html is not None diff --git a/src/dataframe.rs b/src/dataframe.rs index 787f63520..211e31bd1 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -71,8 +71,103 @@ impl PyTableProvider { PyTable::new(table_provider) } } -const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB -const MIN_TABLE_ROWS_TO_DISPLAY: usize = 20; + +/// Configuration for DataFrame display formatting +#[derive(Debug, Clone)] +pub struct FormatterConfig { + /// Maximum memory in bytes to use for display (default: 2MB) + pub max_bytes: usize, + /// Minimum number of rows to display (default: 20) + pub min_rows: usize, + /// Number of rows to include in __repr__ output (default: 10) + pub repr_rows: usize, +} + +impl Default for FormatterConfig { + fn default() -> Self { + Self { + max_bytes: 2 * 1024 * 1024, // 2MB + min_rows: 20, + repr_rows: 10, + } + } +} + +impl FormatterConfig { + /// Validates that all configuration values are positive integers. + /// + /// # Returns + /// + /// `Ok(())` if all values are valid, or an `Err` with a descriptive error message. + pub fn validate(&self) -> Result<(), String> { + if self.max_bytes == 0 { + return Err("max_bytes must be a positive integer".to_string()); + } + + if self.min_rows == 0 { + return Err("min_rows must be a positive integer".to_string()); + } + + if self.repr_rows == 0 { + return Err("repr_rows must be a positive integer".to_string()); + } + + Ok(()) + } +} + +/// Holds the Python formatter and its configuration +struct PythonFormatter<'py> { + /// The Python formatter object + formatter: Bound<'py, PyAny>, + /// The formatter configuration + config: FormatterConfig, +} + +/// Get the Python formatter and its configuration +fn get_python_formatter_with_config(py: Python) -> PyResult { + let formatter = import_python_formatter(py)?; + let config = build_formatter_config_from_python(&formatter)?; + Ok(PythonFormatter { formatter, config }) +} + +/// Get the Python formatter from the datafusion.html_formatter module +fn import_python_formatter(py: Python) -> PyResult> { + let formatter_module = py.import("datafusion.html_formatter")?; + let get_formatter = formatter_module.getattr("get_formatter")?; + get_formatter.call0() +} + +// Helper function to extract attributes with fallback to default +fn get_attr<'a, T>(py_object: &'a Bound<'a, PyAny>, attr_name: &str, default_value: T) -> T +where + T: for<'py> pyo3::FromPyObject<'py> + Clone, +{ + py_object + .getattr(attr_name) + .and_then(|v| v.extract::()) + .unwrap_or_else(|_| default_value.clone()) +} + +/// Helper function to create a FormatterConfig from a Python formatter object +fn build_formatter_config_from_python(formatter: &Bound<'_, PyAny>) -> PyResult { + let default_config = FormatterConfig::default(); + let max_bytes = get_attr(formatter, "max_memory_bytes", default_config.max_bytes); + let min_rows = get_attr(formatter, "min_rows_display", default_config.min_rows); + let repr_rows = get_attr(formatter, "repr_rows", default_config.repr_rows); + + let config = FormatterConfig { + max_bytes, + min_rows, + repr_rows, + }; + + // Return the validated config, converting String error to PyErr + config + .validate() + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?; + Ok(config) +} /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. @@ -114,9 +209,14 @@ impl PyDataFrame { } fn __repr__(&self, py: Python) -> PyDataFusionResult { + // Get the Python formatter config + let PythonFormatter { + formatter: _, + config, + } = get_python_formatter_with_config(py)?; let (batches, has_more) = wait_for_future( py, - collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10), + collect_record_batches_to_display(self.df.as_ref().clone(), config), )?; if batches.is_empty() { // This should not be reached, but do it for safety since we index into the vector below @@ -135,13 +235,11 @@ impl PyDataFrame { } fn _repr_html_(&self, py: Python) -> PyDataFusionResult { + // Get the Python formatter and config + let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; let (batches, has_more) = wait_for_future( py, - collect_record_batches_to_display( - self.df.as_ref().clone(), - MIN_TABLE_ROWS_TO_DISPLAY, - usize::MAX, - ), + collect_record_batches_to_display(self.df.as_ref().clone(), config), )?; if batches.is_empty() { // This should not be reached, but do it for safety since we index into the vector below @@ -158,12 +256,6 @@ impl PyDataFrame { let py_schema = self.schema().into_pyobject(py)?; - // Get the Python formatter module and call format_html - let formatter_module = py.import("datafusion.html_formatter")?; - let get_formatter = formatter_module.getattr("get_formatter")?; - let formatter = get_formatter.call0()?; - - // Call format_html method on the formatter let kwargs = pyo3::types::PyDict::new(py); let py_batches_list = PyList::new(py, py_batches.as_slice())?; kwargs.set_item("batches", py_batches_list)?; @@ -796,9 +888,14 @@ fn record_batch_into_schema( /// rows, set min_rows == max_rows. async fn collect_record_batches_to_display( df: DataFrame, - min_rows: usize, - max_rows: usize, + config: FormatterConfig, ) -> Result<(Vec, bool), DataFusionError> { + let FormatterConfig { + max_bytes, + min_rows, + repr_rows, + } = config; + let partitioned_stream = df.execute_stream_partitioned().await?; let mut stream = futures::stream::iter(partitioned_stream).flatten(); let mut size_estimate_so_far = 0; @@ -806,9 +903,8 @@ async fn collect_record_batches_to_display( let mut record_batches = Vec::default(); let mut has_more = false; - while (size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows) - || rows_so_far < min_rows - { + // ensure minimum rows even if memory/row limits are hit + while (size_estimate_so_far < max_bytes && rows_so_far < repr_rows) || rows_so_far < min_rows { let mut rb = match stream.next().await { None => { break; @@ -821,8 +917,8 @@ async fn collect_record_batches_to_display( if rows_in_rb > 0 { size_estimate_so_far += rb.get_array_memory_size(); - if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY { - let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32; + if size_estimate_so_far > max_bytes { + let ratio = max_bytes as f32 / size_estimate_so_far as f32; let total_rows = rows_in_rb + rows_so_far; let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize; @@ -838,8 +934,8 @@ async fn collect_record_batches_to_display( } } - if rows_in_rb + rows_so_far > max_rows { - rb = rb.slice(0, max_rows - rows_so_far); + if rows_in_rb + rows_so_far > repr_rows { + rb = rb.slice(0, repr_rows - rows_so_far); has_more = true; } From f3c98ec7a2eb325041530b1ae8d6de41aa558037 Mon Sep 17 00:00:00 2001 From: kosiew Date: Fri, 16 May 2025 14:34:19 +0800 Subject: [PATCH 042/206] Add fill_null method to DataFrame API for handling missing values (#1019) * feat: add fill_null method to DataFrame for handling null values * test: add coalesce function tests for handling default values * Resolve test cases for fill_null * feat: add fill_nan method to DataFrame for handling NaN values * move imports out of functions * docs: add documentation for fill_null and fill_nan methods in DataFrame * Add more tests * fix ruff errors * amend def fill_null to invoke PyDataFrame's fill_null - Implemented `fill_null` method in `dataframe.rs` to allow filling null values with a specified value for specific columns or all columns. - Added a helper function `python_value_to_scalar_value` to convert Python values to DataFusion ScalarValues, supporting various types including integers, floats, booleans, strings, and timestamps. - Updated the `count` method in `PyDataFrame` to maintain functionality. * refactor: remove fill_nan method documentation from functions.rst * refactor: remove unused import of Enum from dataframe.py * refactor: improve error handling and type extraction in python_value_to_scalar_value function * refactor: enhance datetime and date conversion logic in python_value_to_scalar_value function * refactor: streamline type extraction in python_value_to_scalar_value function * fix try_convert_to_string * refactor: improve type handling in python_value_to_scalar_value function * refactor: move py_obj_to_scalar_value function to utils module * refactor: update fill_null to use py_obj_to_scalar_value from utils * Remove python_object_to_scalar_value code * refactor: enhance py_obj_to_scalar_value to utilize PyArrow for complex type conversion * refactor: update py_obj_to_scalar_value to handle errors and use extract_bound for PyArrow scalar conversion * refactor: modify py_obj_to_scalar_value to return ScalarValue directly and streamline error handling * refactor: update py_obj_to_scalar_value to return a Result for better error handling * test: add tests for fill_null functionality in DataFrame with null values * test: enhance null DataFrame tests to include date32 and date64 columns * refactor: simplify py_obj_to_scalar_value by removing direct extraction of basic types * refactor: remove unnecessary documentation from py_obj_to_scalar_value function * Fix ruff errors * test: update datetime handling in coalesce tests to include timezone information * Fix ruff errors * trigger ci --- .../common-operations/functions.rst | 21 ++ python/datafusion/dataframe.py | 26 +- python/tests/test_dataframe.py | 266 ++++++++++++++++++ python/tests/test_functions.py | 61 ++++ src/config.rs | 21 +- src/dataframe.rs | 23 +- src/utils.rs | 18 ++ 7 files changed, 414 insertions(+), 22 deletions(-) diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index 12097be8f..d458d3eb0 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -129,3 +129,24 @@ The function :py:func:`~datafusion.functions.in_list` allows to check a column f .limit(20) .to_pandas() ) + + +Handling Missing Values +===================== + +DataFusion provides methods to handle missing values in DataFrames: + +fill_null +--------- + +The ``fill_null()`` method replaces NULL values in specified columns with a provided value: + +.. code-block:: python + + # Fill all NULL values with 0 where possible + df = df.fill_null(0) + + # Fill NULL values only in specific string columns + df = df.fill_null("missing", subset=["name", "category"]) + +The fill value will be cast to match each column's type. If casting fails for a column, that column remains unchanged. diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 26fe8f453..a1df7e080 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -37,6 +37,8 @@ except ImportError: from typing_extensions import deprecated # Python 3.12 +from datafusion._internal import DataFrame as DataFrameInternal +from datafusion.expr import Expr, SortExpr, sort_or_default from datafusion.plan import ExecutionPlan, LogicalPlan from datafusion.record_batch import RecordBatchStream @@ -53,8 +55,6 @@ from enum import Enum -from datafusion.expr import Expr, SortExpr, sort_or_default - # excerpt from deltalake # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 @@ -869,3 +869,25 @@ def within_limit(df: DataFrame, limit: int) -> DataFrame: DataFrame: After applying func to the original dataframe. """ return func(self, *args) + + def fill_null(self, value: Any, subset: list[str] | None = None) -> DataFrame: + """Fill null values in specified columns with a value. + + Args: + value: Value to replace nulls with. Will be cast to match column type. + subset: Optional list of column names to fill. If None, fills all columns. + + Returns: + DataFrame with null values replaced where type casting is possible + + Examples: + >>> df = df.fill_null(0) # Fill all nulls with 0 where possible + >>> # Fill nulls in specific string columns + >>> df = df.fill_null("missing", subset=["name", "category"]) + + Notes: + - Only fills nulls in columns where the value can be cast to the column type + - For columns where casting fails, the original column is kept unchanged + - For columns not in subset, the original column is kept unchanged + """ + return DataFrame(self.df.fill_null(value, subset)) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index e01308c86..dd5f962b2 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import datetime import os import re from typing import Any @@ -119,6 +120,38 @@ def clean_formatter_state(): reset_formatter() +@pytest.fixture +def null_df(): + """Create a DataFrame with null values of different types.""" + ctx = SessionContext() + + # Create a RecordBatch with nulls across different types + batch = pa.RecordBatch.from_arrays( + [ + pa.array([1, None, 3, None], type=pa.int64()), + pa.array([4.5, 6.7, None, None], type=pa.float64()), + pa.array(["a", None, "c", None], type=pa.string()), + pa.array([True, None, False, None], type=pa.bool_()), + pa.array( + [10957, None, 18993, None], type=pa.date32() + ), # 2000-01-01, null, 2022-01-01, null + pa.array( + [946684800000, None, 1640995200000, None], type=pa.date64() + ), # 2000-01-01, null, 2022-01-01, null + ], + names=[ + "int_col", + "float_col", + "str_col", + "bool_col", + "date32_col", + "date64_col", + ], + ) + + return ctx.create_dataframe([[batch]]) + + # custom style for testing with html formatter class CustomStyleProvider: def get_cell_style(self) -> str: @@ -1794,3 +1827,236 @@ def test_html_formatter_manual_format_html(clean_formatter_state): assert "") + return html + + def _build_table_container_start(self) -> list[str]: + """Build the opening tags for the table container.""" + html = [] + html.append( + f'
' + ) + html.append('') + return html + + def _build_table_header(self, schema: Any) -> list[str]: + """Build the HTML table header with column names.""" + html = [] + html.append("") + html.append("") + for field in schema: + if self._custom_header_builder: + html.append(self._custom_header_builder(field)) + else: + html.append( + f"" + ) + html.append("") + html.append("") + return html + + def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: + """Build the HTML table body with data rows.""" + html = [] + html.append("") + + row_count = 0 + for batch in batches: + for row_idx in range(batch.num_rows): + row_count += 1 + html.append("") + + for col_idx, column in enumerate(batch.columns): + # Get the raw value from the column + raw_value = self._get_cell_value(column, row_idx) + + # Always check for type formatters first to format the value + formatted_value = self._format_cell_value(raw_value) + + # Then apply either custom cell builder or standard cell formatting + if self._custom_cell_builder: + # Pass both the raw value and formatted value to let the + # builder decide + cell_html = self._custom_cell_builder( + raw_value, row_count, col_idx, table_uuid + ) + html.append(cell_html) + else: + # Standard cell formatting with formatted value + if ( + len(str(raw_value)) > self.max_cell_length + and self.enable_cell_expansion + ): + cell_html = self._build_expandable_cell( + formatted_value, row_count, col_idx, table_uuid + ) + else: + cell_html = self._build_regular_cell(formatted_value) + html.append(cell_html) + + html.append("") + + html.append("") + return html + + def _get_cell_value(self, column: Any, row_idx: int) -> Any: + """Extract a cell value from a column. + + Args: + column: Arrow array + row_idx: Row index + + Returns: + The raw cell value + """ + try: + value = column[row_idx] + + if hasattr(value, "as_py"): + return value.as_py() + except (AttributeError, TypeError): + pass + else: + return value + + def _format_cell_value(self, value: Any) -> str: + """Format a cell value for display. + + Uses registered type formatters if available. + + Args: + value: The cell value to format + + Returns: + Formatted cell value as string + """ + # Check for custom type formatters + for type_cls, formatter in self._type_formatters.items(): + if isinstance(value, type_cls): + return formatter(value) + + # If no formatter matched, return string representation + return str(value) + + def _build_expandable_cell( + self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str + ) -> str: + """Build an expandable cell for long content.""" + short_value = str(formatted_value)[: self.max_cell_length] + return ( + f"" + ) + + def _build_regular_cell(self, formatted_value: str) -> str: + """Build a regular table cell.""" + return ( + f"" + ) + + def _build_html_footer(self, has_more: bool) -> list[str]: + """Build the HTML footer with JavaScript and messages.""" + html = [] + + # Add JavaScript for interactivity only if cell expansion is enabled + # and we're not using the shared styles approach + if self.enable_cell_expansion and not self.use_shared_styles: + html.append(self._get_javascript()) + + # Add truncation message if needed + if has_more and self.show_truncation_message: + html.append("
Data truncated due to size.
") + + return html + + def _get_default_css(self) -> str: + """Get default CSS styles for the HTML table.""" + return """ + .expandable-container { + display: inline-block; + max-width: 200px; + } + .expandable { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + display: block; + } + .full-text { + display: none; + white-space: normal; + } + .expand-btn { + cursor: pointer; + color: blue; + text-decoration: underline; + border: none; + background: none; + font-size: inherit; + display: block; + margin-top: 5px; + } + """ + + def _get_javascript(self) -> str: + """Get JavaScript code for interactive elements.""" + return """ + + """ + + +class FormatterManager: + """Manager class for the global DataFrame HTML formatter instance.""" + + _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() + + @classmethod + def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + """ + cls._default_formatter = formatter + _refresh_formatter_reference() + + @classmethod + def get_formatter(cls) -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + Returns: + The global HTML formatter instance + """ + return cls._default_formatter + + +def get_formatter() -> DataFrameHtmlFormatter: + """Get the current global DataFrame HTML formatter. + + This function is used by the DataFrame._repr_html_ implementation to access + the shared formatter instance. It can also be used directly when custom + HTML rendering is needed. + + Returns: + The global HTML formatter instance + + Example: + >>> from datafusion.html_formatter import get_formatter + >>> formatter = get_formatter() + >>> formatter.max_cell_length = 50 # Increase cell length + """ + return FormatterManager.get_formatter() + + +def set_formatter(formatter: DataFrameHtmlFormatter) -> None: + """Set the global DataFrame HTML formatter. + + Args: + formatter: The formatter instance to use globally + + Example: + >>> from datafusion.html_formatter import get_formatter, set_formatter + >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) + >>> set_formatter(custom_formatter) + """ + FormatterManager.set_formatter(formatter) + + +def configure_formatter(**kwargs: Any) -> None: + """Configure the global DataFrame HTML formatter. + + This function creates a new formatter with the provided configuration + and sets it as the global formatter for all DataFrames. + + Args: + **kwargs: Formatter configuration parameters like max_cell_length, + max_width, max_height, enable_cell_expansion, etc. + + Raises: + ValueError: If any invalid parameters are provided + + Example: + >>> from datafusion.html_formatter import configure_formatter + >>> configure_formatter( + ... max_cell_length=50, + ... max_height=500, + ... enable_cell_expansion=True, + ... use_shared_styles=True + ... ) + """ + # Valid parameters accepted by DataFrameHtmlFormatter + valid_params = { + "max_cell_length", + "max_width", + "max_height", + "max_memory_bytes", + "min_rows_display", + "repr_rows", + "enable_cell_expansion", + "custom_css", + "show_truncation_message", + "style_provider", + "use_shared_styles", + } + + # Check for invalid parameters + invalid_params = set(kwargs) - valid_params + if invalid_params: + msg = ( + f"Invalid formatter parameters: {', '.join(invalid_params)}. " + f"Valid parameters are: {', '.join(valid_params)}" + ) + raise ValueError(msg) + + # Create and set formatter with validated parameters + set_formatter(DataFrameHtmlFormatter(**kwargs)) + + +def reset_formatter() -> None: + """Reset the global DataFrame HTML formatter to default settings. + + This function creates a new formatter with default configuration + and sets it as the global formatter for all DataFrames. + + Example: + >>> from datafusion.html_formatter import reset_formatter + >>> reset_formatter() # Reset formatter to default settings + """ + formatter = DataFrameHtmlFormatter() + # Reset the styles_loaded flag to ensure styles will be reloaded + DataFrameHtmlFormatter._styles_loaded = False + set_formatter(formatter) + + +def reset_styles_loaded_state() -> None: + """Reset the styles loaded state to force reloading of styles. + + This can be useful when switching between notebook sessions or + when styles need to be refreshed. + + Example: + >>> from datafusion.html_formatter import reset_styles_loaded_state + >>> reset_styles_loaded_state() # Force styles to reload in next render + """ + DataFrameHtmlFormatter._styles_loaded = False + + +def _refresh_formatter_reference() -> None: + """Refresh formatter reference in any modules using it. + + This helps ensure that changes to the formatter are reflected in existing + DataFrames that might be caching the formatter reference. + """ + # This is a no-op but signals modules to refresh their reference diff --git a/python/datafusion/html_formatter.py b/python/datafusion/html_formatter.py index 12a7e4553..65eb1f042 100644 --- a/python/datafusion/html_formatter.py +++ b/python/datafusion/html_formatter.py @@ -14,698 +14,16 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""HTML formatting utilities for DataFusion DataFrames.""" -from __future__ import annotations +"""Deprecated module for dataframe formatting.""" -from typing import ( - Any, - Callable, - Optional, - Protocol, - runtime_checkable, -) - - -def _validate_positive_int(value: Any, param_name: str) -> None: - """Validate that a parameter is a positive integer. - - Args: - value: The value to validate - param_name: Name of the parameter (used in error message) - - Raises: - ValueError: If the value is not a positive integer - """ - if not isinstance(value, int) or value <= 0: - msg = f"{param_name} must be a positive integer" - raise ValueError(msg) - - -def _validate_bool(value: Any, param_name: str) -> None: - """Validate that a parameter is a boolean. - - Args: - value: The value to validate - param_name: Name of the parameter (used in error message) - - Raises: - TypeError: If the value is not a boolean - """ - if not isinstance(value, bool): - msg = f"{param_name} must be a boolean" - raise TypeError(msg) - - -@runtime_checkable -class CellFormatter(Protocol): - """Protocol for cell value formatters.""" - - def __call__(self, value: Any) -> str: - """Format a cell value to string representation.""" - ... - - -@runtime_checkable -class StyleProvider(Protocol): - """Protocol for HTML style providers.""" - - def get_cell_style(self) -> str: - """Get the CSS style for table cells.""" - ... - - def get_header_style(self) -> str: - """Get the CSS style for header cells.""" - ... - - -class DefaultStyleProvider: - """Default implementation of StyleProvider.""" - - def get_cell_style(self) -> str: - """Get the CSS style for table cells. - - Returns: - CSS style string - """ - return ( - "border: 1px solid black; padding: 8px; text-align: left; " - "white-space: nowrap;" - ) - - def get_header_style(self) -> str: - """Get the CSS style for header cells. - - Returns: - CSS style string - """ - return ( - "border: 1px solid black; padding: 8px; text-align: left; " - "background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; " - "max-width: fit-content;" - ) - - -class DataFrameHtmlFormatter: - """Configurable HTML formatter for DataFusion DataFrames. - - This class handles the HTML rendering of DataFrames for display in - Jupyter notebooks and other rich display contexts. - - This class supports extension through composition. Key extension points: - - Provide a custom StyleProvider for styling cells and headers - - Register custom formatters for specific types - - Provide custom cell builders for specialized cell rendering - - Args: - max_cell_length: Maximum characters to display in a cell before truncation - max_width: Maximum width of the HTML table in pixels - max_height: Maximum height of the HTML table in pixels - max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) - min_rows_display: Minimum number of rows to display - repr_rows: Default number of rows to display in repr output - enable_cell_expansion: Whether to add expand/collapse buttons for long cell - values - custom_css: Additional CSS to include in the HTML output - show_truncation_message: Whether to display a message when data is truncated - style_provider: Custom provider for cell and header styles - use_shared_styles: Whether to load styles and scripts only once per notebook - session - """ - - # Class variable to track if styles have been loaded in the notebook - _styles_loaded = False - - def __init__( - self, - max_cell_length: int = 25, - max_width: int = 1000, - max_height: int = 300, - max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB - min_rows_display: int = 20, - repr_rows: int = 10, - enable_cell_expansion: bool = True, - custom_css: Optional[str] = None, - show_truncation_message: bool = True, - style_provider: Optional[StyleProvider] = None, - use_shared_styles: bool = True, - ) -> None: - """Initialize the HTML formatter. - - Parameters - ---------- - max_cell_length : int, default 25 - Maximum length of cell content before truncation. - max_width : int, default 1000 - Maximum width of the displayed table in pixels. - max_height : int, default 300 - Maximum height of the displayed table in pixels. - max_memory_bytes : int, default 2097152 (2MB) - Maximum memory in bytes for rendered data. - min_rows_display : int, default 20 - Minimum number of rows to display. - repr_rows : int, default 10 - Default number of rows to display in repr output. - enable_cell_expansion : bool, default True - Whether to allow cells to expand when clicked. - custom_css : str, optional - Custom CSS to apply to the HTML table. - show_truncation_message : bool, default True - Whether to show a message indicating that content has been truncated. - style_provider : StyleProvider, optional - Provider of CSS styles for the HTML table. If None, DefaultStyleProvider - is used. - use_shared_styles : bool, default True - Whether to use shared styles across multiple tables. - - Raises: - ------ - ValueError - If max_cell_length, max_width, max_height, max_memory_bytes, - min_rows_display, or repr_rows is not a positive integer. - TypeError - If enable_cell_expansion, show_truncation_message, or use_shared_styles is - not a boolean, - or if custom_css is provided but is not a string, - or if style_provider is provided but does not implement the StyleProvider - protocol. - """ - # Validate numeric parameters - _validate_positive_int(max_cell_length, "max_cell_length") - _validate_positive_int(max_width, "max_width") - _validate_positive_int(max_height, "max_height") - _validate_positive_int(max_memory_bytes, "max_memory_bytes") - _validate_positive_int(min_rows_display, "min_rows_display") - _validate_positive_int(repr_rows, "repr_rows") - - # Validate boolean parameters - _validate_bool(enable_cell_expansion, "enable_cell_expansion") - _validate_bool(show_truncation_message, "show_truncation_message") - _validate_bool(use_shared_styles, "use_shared_styles") - - # Validate custom_css - if custom_css is not None and not isinstance(custom_css, str): - msg = "custom_css must be None or a string" - raise TypeError(msg) - - # Validate style_provider - if style_provider is not None and not isinstance(style_provider, StyleProvider): - msg = "style_provider must implement the StyleProvider protocol" - raise TypeError(msg) - - self.max_cell_length = max_cell_length - self.max_width = max_width - self.max_height = max_height - self.max_memory_bytes = max_memory_bytes - self.min_rows_display = min_rows_display - self.repr_rows = repr_rows - self.enable_cell_expansion = enable_cell_expansion - self.custom_css = custom_css - self.show_truncation_message = show_truncation_message - self.style_provider = style_provider or DefaultStyleProvider() - self.use_shared_styles = use_shared_styles - # Registry for custom type formatters - self._type_formatters: dict[type, CellFormatter] = {} - # Custom cell builders - self._custom_cell_builder: Optional[Callable[[Any, int, int, str], str]] = None - self._custom_header_builder: Optional[Callable[[Any], str]] = None - - def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: - """Register a custom formatter for a specific data type. - - Args: - type_class: The type to register a formatter for - formatter: Function that takes a value of the given type and returns - a formatted string - """ - self._type_formatters[type_class] = formatter - - def set_custom_cell_builder( - self, builder: Callable[[Any, int, int, str], str] - ) -> None: - """Set a custom cell builder function. - - Args: - builder: Function that takes (value, row, col, table_id) and returns HTML - """ - self._custom_cell_builder = builder - - def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: - """Set a custom header builder function. - - Args: - builder: Function that takes a field and returns HTML - """ - self._custom_header_builder = builder - - @classmethod - def is_styles_loaded(cls) -> bool: - """Check if HTML styles have been loaded in the current session. - - This method is primarily intended for debugging UI rendering issues - related to style loading. - - Returns: - True if styles have been loaded, False otherwise - - Example: - >>> from datafusion.html_formatter import DataFrameHtmlFormatter - >>> DataFrameHtmlFormatter.is_styles_loaded() - False - """ - return cls._styles_loaded - - def format_html( - self, - batches: list, - schema: Any, - has_more: bool = False, - table_uuid: str | None = None, - ) -> str: - """Format record batches as HTML. - - This method is used by DataFrame's _repr_html_ implementation and can be - called directly when custom HTML rendering is needed. - - Args: - batches: List of Arrow RecordBatch objects - schema: Arrow Schema object - has_more: Whether there are more batches not shown - table_uuid: Unique ID for the table, used for JavaScript interactions - - Returns: - HTML string representation of the data - - Raises: - TypeError: If schema is invalid and no batches are provided - """ - if not batches: - return "No data to display" - - # Validate schema - if schema is None or not hasattr(schema, "__iter__"): - msg = "Schema must be provided" - raise TypeError(msg) - - # Generate a unique ID if none provided - table_uuid = table_uuid or f"df-{id(batches)}" - - # Build HTML components - html = [] - - # Only include styles and scripts if: - # 1. Not using shared styles, OR - # 2. Using shared styles but they haven't been loaded yet - include_styles = ( - not self.use_shared_styles or not DataFrameHtmlFormatter._styles_loaded - ) - - if include_styles: - html.extend(self._build_html_header()) - # If we're using shared styles, mark them as loaded - if self.use_shared_styles: - DataFrameHtmlFormatter._styles_loaded = True - - html.extend(self._build_table_container_start()) - - # Add table header and body - html.extend(self._build_table_header(schema)) - html.extend(self._build_table_body(batches, table_uuid)) - - html.append("
" + f"{field.name}
" + f"
" + "" + "" + f"{formatted_value}" + f"" + f"
" + f"
{formatted_value}
") - html.append("
") - - # Add footer (JavaScript and messages) - if include_styles and self.enable_cell_expansion: - html.append(self._get_javascript()) - - # Always add truncation message if needed (independent of styles) - if has_more and self.show_truncation_message: - html.append("
Data truncated due to size.
") - - return "\n".join(html) - - def _build_html_header(self) -> list[str]: - """Build the HTML header with CSS styles.""" - html = [] - html.append("") - return html +import warnings - def _build_table_container_start(self) -> list[str]: - """Build the opening tags for the table container.""" - html = [] - html.append( - f'
' - ) - html.append('') - return html +from datafusion.dataframe_formatter import * # noqa: F403 - def _build_table_header(self, schema: Any) -> list[str]: - """Build the HTML table header with column names.""" - html = [] - html.append("") - html.append("") - for field in schema: - if self._custom_header_builder: - html.append(self._custom_header_builder(field)) - else: - html.append( - f"" - ) - html.append("") - html.append("") - return html - - def _build_table_body(self, batches: list, table_uuid: str) -> list[str]: - """Build the HTML table body with data rows.""" - html = [] - html.append("") - - row_count = 0 - for batch in batches: - for row_idx in range(batch.num_rows): - row_count += 1 - html.append("") - - for col_idx, column in enumerate(batch.columns): - # Get the raw value from the column - raw_value = self._get_cell_value(column, row_idx) - - # Always check for type formatters first to format the value - formatted_value = self._format_cell_value(raw_value) - - # Then apply either custom cell builder or standard cell formatting - if self._custom_cell_builder: - # Pass both the raw value and formatted value to let the - # builder decide - cell_html = self._custom_cell_builder( - raw_value, row_count, col_idx, table_uuid - ) - html.append(cell_html) - else: - # Standard cell formatting with formatted value - if ( - len(str(raw_value)) > self.max_cell_length - and self.enable_cell_expansion - ): - cell_html = self._build_expandable_cell( - formatted_value, row_count, col_idx, table_uuid - ) - else: - cell_html = self._build_regular_cell(formatted_value) - html.append(cell_html) - - html.append("") - - html.append("") - return html - - def _get_cell_value(self, column: Any, row_idx: int) -> Any: - """Extract a cell value from a column. - - Args: - column: Arrow array - row_idx: Row index - - Returns: - The raw cell value - """ - try: - value = column[row_idx] - - if hasattr(value, "as_py"): - return value.as_py() - except (AttributeError, TypeError): - pass - else: - return value - - def _format_cell_value(self, value: Any) -> str: - """Format a cell value for display. - - Uses registered type formatters if available. - - Args: - value: The cell value to format - - Returns: - Formatted cell value as string - """ - # Check for custom type formatters - for type_cls, formatter in self._type_formatters.items(): - if isinstance(value, type_cls): - return formatter(value) - - # If no formatter matched, return string representation - return str(value) - - def _build_expandable_cell( - self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str - ) -> str: - """Build an expandable cell for long content.""" - short_value = str(formatted_value)[: self.max_cell_length] - return ( - f"" - ) - - def _build_regular_cell(self, formatted_value: str) -> str: - """Build a regular table cell.""" - return ( - f"" - ) - - def _build_html_footer(self, has_more: bool) -> list[str]: - """Build the HTML footer with JavaScript and messages.""" - html = [] - - # Add JavaScript for interactivity only if cell expansion is enabled - # and we're not using the shared styles approach - if self.enable_cell_expansion and not self.use_shared_styles: - html.append(self._get_javascript()) - - # Add truncation message if needed - if has_more and self.show_truncation_message: - html.append("
Data truncated due to size.
") - - return html - - def _get_default_css(self) -> str: - """Get default CSS styles for the HTML table.""" - return """ - .expandable-container { - display: inline-block; - max-width: 200px; - } - .expandable { - white-space: nowrap; - overflow: hidden; - text-overflow: ellipsis; - display: block; - } - .full-text { - display: none; - white-space: normal; - } - .expand-btn { - cursor: pointer; - color: blue; - text-decoration: underline; - border: none; - background: none; - font-size: inherit; - display: block; - margin-top: 5px; - } - """ - - def _get_javascript(self) -> str: - """Get JavaScript code for interactive elements.""" - return """ - - """ - - -class FormatterManager: - """Manager class for the global DataFrame HTML formatter instance.""" - - _default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter() - - @classmethod - def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None: - """Set the global DataFrame HTML formatter. - - Args: - formatter: The formatter instance to use globally - """ - cls._default_formatter = formatter - _refresh_formatter_reference() - - @classmethod - def get_formatter(cls) -> DataFrameHtmlFormatter: - """Get the current global DataFrame HTML formatter. - - Returns: - The global HTML formatter instance - """ - return cls._default_formatter - - -def get_formatter() -> DataFrameHtmlFormatter: - """Get the current global DataFrame HTML formatter. - - This function is used by the DataFrame._repr_html_ implementation to access - the shared formatter instance. It can also be used directly when custom - HTML rendering is needed. - - Returns: - The global HTML formatter instance - - Example: - >>> from datafusion.html_formatter import get_formatter - >>> formatter = get_formatter() - >>> formatter.max_cell_length = 50 # Increase cell length - """ - return FormatterManager.get_formatter() - - -def set_formatter(formatter: DataFrameHtmlFormatter) -> None: - """Set the global DataFrame HTML formatter. - - Args: - formatter: The formatter instance to use globally - - Example: - >>> from datafusion.html_formatter import get_formatter, set_formatter - >>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100) - >>> set_formatter(custom_formatter) - """ - FormatterManager.set_formatter(formatter) - - -def configure_formatter(**kwargs: Any) -> None: - """Configure the global DataFrame HTML formatter. - - This function creates a new formatter with the provided configuration - and sets it as the global formatter for all DataFrames. - - Args: - **kwargs: Formatter configuration parameters like max_cell_length, - max_width, max_height, enable_cell_expansion, etc. - - Raises: - ValueError: If any invalid parameters are provided - - Example: - >>> from datafusion.html_formatter import configure_formatter - >>> configure_formatter( - ... max_cell_length=50, - ... max_height=500, - ... enable_cell_expansion=True, - ... use_shared_styles=True - ... ) - """ - # Valid parameters accepted by DataFrameHtmlFormatter - valid_params = { - "max_cell_length", - "max_width", - "max_height", - "max_memory_bytes", - "min_rows_display", - "repr_rows", - "enable_cell_expansion", - "custom_css", - "show_truncation_message", - "style_provider", - "use_shared_styles", - } - - # Check for invalid parameters - invalid_params = set(kwargs) - valid_params - if invalid_params: - msg = ( - f"Invalid formatter parameters: {', '.join(invalid_params)}. " - f"Valid parameters are: {', '.join(valid_params)}" - ) - raise ValueError(msg) - - # Create and set formatter with validated parameters - set_formatter(DataFrameHtmlFormatter(**kwargs)) - - -def reset_formatter() -> None: - """Reset the global DataFrame HTML formatter to default settings. - - This function creates a new formatter with default configuration - and sets it as the global formatter for all DataFrames. - - Example: - >>> from datafusion.html_formatter import reset_formatter - >>> reset_formatter() # Reset formatter to default settings - """ - formatter = DataFrameHtmlFormatter() - # Reset the styles_loaded flag to ensure styles will be reloaded - DataFrameHtmlFormatter._styles_loaded = False - set_formatter(formatter) - - -def reset_styles_loaded_state() -> None: - """Reset the styles loaded state to force reloading of styles. - - This can be useful when switching between notebook sessions or - when styles need to be refreshed. - - Example: - >>> from datafusion.html_formatter import reset_styles_loaded_state - >>> reset_styles_loaded_state() # Force styles to reload in next render - """ - DataFrameHtmlFormatter._styles_loaded = False - - -def _refresh_formatter_reference() -> None: - """Refresh formatter reference in any modules using it. - - This helps ensure that changes to the formatter are reflected in existing - DataFrames that might be caching the formatter reference. - """ - # This is a no-op but signals modules to refresh their reference +warnings.warn( + "The module 'html_formatter' is deprecated and will be removed in the next release." + "Please use 'dataframe_formatter' instead.", + DeprecationWarning, + stacklevel=3, +) diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index deaa30b3d..c9ae38d8e 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -37,14 +37,14 @@ from datafusion import ( functions as f, ) -from datafusion.expr import Window -from datafusion.html_formatter import ( +from datafusion.dataframe_formatter import ( DataFrameHtmlFormatter, configure_formatter, get_formatter, reset_formatter, reset_styles_loaded_state, ) +from datafusion.expr import Window from pyarrow.csv import write_csv MB = 1024 * 1024 diff --git a/src/dataframe.rs b/src/dataframe.rs index 3d68db279..c2ad4771e 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -24,6 +24,7 @@ use arrow::compute::can_cast_types; use arrow::error::ArrowError; use arrow::ffi::FFI_ArrowSchema; use arrow::ffi_stream::FFI_ArrowArrayStream; +use arrow::pyarrow::FromPyArrow; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::arrow::util::pretty; @@ -150,9 +151,9 @@ fn get_python_formatter_with_config(py: Python) -> PyResult { Ok(PythonFormatter { formatter, config }) } -/// Get the Python formatter from the datafusion.html_formatter module +/// Get the Python formatter from the datafusion.dataframe_formatter module fn import_python_formatter(py: Python) -> PyResult> { - let formatter_module = py.import("datafusion.html_formatter")?; + let formatter_module = py.import("datafusion.dataframe_formatter")?; let get_formatter = formatter_module.getattr("get_formatter")?; get_formatter.call0() } @@ -295,6 +296,46 @@ impl PyDataFrame { pub fn new(df: DataFrame) -> Self { Self { df: Arc::new(df) } } + + fn prepare_repr_string(&self, py: Python, as_html: bool) -> PyDataFusionResult { + // Get the Python formatter and config + let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; + let (batches, has_more) = wait_for_future( + py, + collect_record_batches_to_display(self.df.as_ref().clone(), config), + )??; + if batches.is_empty() { + // This should not be reached, but do it for safety since we index into the vector below + return Ok("No data to display".to_string()); + } + + let table_uuid = uuid::Uuid::new_v4().to_string(); + + // Convert record batches to PyObject list + let py_batches = batches + .into_iter() + .map(|rb| rb.to_pyarrow(py)) + .collect::>>()?; + + let py_schema = self.schema().into_pyobject(py)?; + + let kwargs = pyo3::types::PyDict::new(py); + let py_batches_list = PyList::new(py, py_batches.as_slice())?; + kwargs.set_item("batches", py_batches_list)?; + kwargs.set_item("schema", py_schema)?; + kwargs.set_item("has_more", has_more)?; + kwargs.set_item("table_uuid", table_uuid)?; + + let method_name = match as_html { + true => "format_html", + false => "format_str", + }; + + let html_result = formatter.call_method(method_name, (), Some(&kwargs))?; + let html_str: String = html_result.extract()?; + + Ok(html_str) + } } #[pymethods] @@ -321,18 +362,27 @@ impl PyDataFrame { } fn __repr__(&self, py: Python) -> PyDataFusionResult { - // Get the Python formatter config - let PythonFormatter { - formatter: _, - config, - } = get_python_formatter_with_config(py)?; - let (batches, has_more) = wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??; + self.prepare_repr_string(py, false) + } + + #[staticmethod] + #[expect(unused_variables)] + fn default_str_repr<'py>( + batches: Vec>, + schema: &Bound<'py, PyAny>, + has_more: bool, + table_uuid: &str, + ) -> PyResult { + let batches = batches + .into_iter() + .map(|batch| RecordBatch::from_pyarrow_bound(&batch)) + .collect::>>()? + .into_iter() + .filter(|batch| batch.num_rows() > 0) + .collect::>(); + if batches.is_empty() { - // This should not be reached, but do it for safety since we index into the vector below - return Ok("No data to display".to_string()); + return Ok("No data to display".to_owned()); } let batches_as_displ = @@ -347,38 +397,7 @@ impl PyDataFrame { } fn _repr_html_(&self, py: Python) -> PyDataFusionResult { - // Get the Python formatter and config - let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; - let (batches, has_more) = wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??; - if batches.is_empty() { - // This should not be reached, but do it for safety since we index into the vector below - return Ok("No data to display".to_string()); - } - - let table_uuid = uuid::Uuid::new_v4().to_string(); - - // Convert record batches to PyObject list - let py_batches = batches - .into_iter() - .map(|rb| rb.to_pyarrow(py)) - .collect::>>()?; - - let py_schema = self.schema().into_pyobject(py)?; - - let kwargs = pyo3::types::PyDict::new(py); - let py_batches_list = PyList::new(py, py_batches.as_slice())?; - kwargs.set_item("batches", py_batches_list)?; - kwargs.set_item("schema", py_schema)?; - kwargs.set_item("has_more", has_more)?; - kwargs.set_item("table_uuid", table_uuid)?; - - let html_result = formatter.call_method("format_html", (), Some(&kwargs))?; - let html_str: String = html_result.extract()?; - - Ok(html_str) + self.prepare_repr_string(py, true) } /// Calculate summary statistics for a DataFrame From 954563429384078a9e85c56ad553c7e3be7ac52a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 25 Jun 2025 11:29:35 -0400 Subject: [PATCH 058/206] feat: collect once during display() in jupyter notebooks (#1167) * Only collect one time during display() in jupyter notebooks * Check for juypter notebook environment specifically * Remove approach of checking environment which could not differentiate between jupyter console and notebook * Instead of trying to detect notebook vs console, collect one time when we have any kind if ipython environment. --- src/dataframe.rs | 36 ++++++++++++++++++++++++++---------- src/utils.rs | 11 +++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/dataframe.rs b/src/dataframe.rs index c2ad4771e..ab4749e35 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -51,7 +51,7 @@ use crate::physical_plan::PyExecutionPlan; use crate::record_batch::PyRecordBatchStream; use crate::sql::logical::PyLogicalPlan; use crate::utils::{ - get_tokio_runtime, py_obj_to_scalar_value, validate_pycapsule, wait_for_future, + get_tokio_runtime, is_ipython_env, py_obj_to_scalar_value, validate_pycapsule, wait_for_future, }; use crate::{ errors::PyDataFusionResult, @@ -289,21 +289,33 @@ impl PyParquetColumnOptions { #[derive(Clone)] pub struct PyDataFrame { df: Arc, + + // In IPython environment cache batches between __repr__ and _repr_html_ calls. + batches: Option<(Vec, bool)>, } impl PyDataFrame { /// creates a new PyDataFrame pub fn new(df: DataFrame) -> Self { - Self { df: Arc::new(df) } + Self { + df: Arc::new(df), + batches: None, + } } - fn prepare_repr_string(&self, py: Python, as_html: bool) -> PyDataFusionResult { + fn prepare_repr_string(&mut self, py: Python, as_html: bool) -> PyDataFusionResult { // Get the Python formatter and config let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?; - let (batches, has_more) = wait_for_future( - py, - collect_record_batches_to_display(self.df.as_ref().clone(), config), - )??; + + let should_cache = *is_ipython_env(py) && self.batches.is_none(); + let (batches, has_more) = match self.batches.take() { + Some(b) => b, + None => wait_for_future( + py, + collect_record_batches_to_display(self.df.as_ref().clone(), config), + )??, + }; + if batches.is_empty() { // This should not be reached, but do it for safety since we index into the vector below return Ok("No data to display".to_string()); @@ -313,7 +325,7 @@ impl PyDataFrame { // Convert record batches to PyObject list let py_batches = batches - .into_iter() + .iter() .map(|rb| rb.to_pyarrow(py)) .collect::>>()?; @@ -334,6 +346,10 @@ impl PyDataFrame { let html_result = formatter.call_method(method_name, (), Some(&kwargs))?; let html_str: String = html_result.extract()?; + if should_cache { + self.batches = Some((batches, has_more)); + } + Ok(html_str) } } @@ -361,7 +377,7 @@ impl PyDataFrame { } } - fn __repr__(&self, py: Python) -> PyDataFusionResult { + fn __repr__(&mut self, py: Python) -> PyDataFusionResult { self.prepare_repr_string(py, false) } @@ -396,7 +412,7 @@ impl PyDataFrame { Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}")) } - fn _repr_html_(&self, py: Python) -> PyDataFusionResult { + fn _repr_html_(&mut self, py: Python) -> PyDataFusionResult { self.prepare_repr_string(py, true) } diff --git a/src/utils.rs b/src/utils.rs index 90d654385..f4e121fd5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -39,6 +39,17 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime { RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap())) } +#[inline] +pub(crate) fn is_ipython_env(py: Python) -> &'static bool { + static IS_IPYTHON_ENV: OnceLock = OnceLock::new(); + IS_IPYTHON_ENV.get_or_init(|| { + py.import("IPython") + .and_then(|ipython| ipython.call_method0("get_ipython")) + .map(|ipython| !ipython.is_none()) + .unwrap_or(false) + }) +} + /// Utility to get the Global Datafussion CTX #[inline] pub(crate) fn get_global_ctx() -> &'static SessionContext { From 9362f53150e5423581757ed56883b3ca2c95b8a2 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 2 Jul 2025 08:08:53 -0400 Subject: [PATCH 059/206] feat: python based catalog and schema provider (#1156) * Exposing FFI to python * Exposing FFI to python * Workin progress on python catalog * Flushing out schema and catalog providers * Adding implementation of python based catalog and schema providers * Small updates after rebase * Add default in memory options for adding schema and catalogs * Add support for creating in memory catalog and schema * Update from database to schema in unit tests * xfailed label no longer applies to these unit tests * Defining abstract methods for catalog and schema providers * Working through issues between custom catalog and build in schema * Check types on schema provider to return * Add docstring * Add documentation about how to use catalog and schema providers * Re-add module to all after rebase * Minor bugfix * Clippy updates from the new rust version --------- Co-authored-by: renato2099 --- Cargo.lock | 19 + Cargo.toml | 2 + docs/source/user-guide/data-sources.rst | 56 ++ examples/datafusion-ffi-example/Cargo.lock | 1 + examples/datafusion-ffi-example/Cargo.toml | 1 + .../python/tests/_test_catalog_provider.py | 60 +++ .../src/catalog_provider.rs | 179 +++++++ examples/datafusion-ffi-example/src/lib.rs | 3 + python/datafusion/__init__.py | 1 + python/datafusion/catalog.py | 195 ++++++- python/datafusion/context.py | 24 +- python/datafusion/dataframe.py | 9 +- python/tests/test_catalog.py | 173 ++++++- python/tests/test_context.py | 40 +- python/tests/test_sql.py | 30 +- python/tests/test_substrait.py | 4 +- src/catalog.rs | 490 ++++++++++++++++-- src/common/data_type.rs | 120 ++--- src/context.rs | 61 ++- src/expr.rs | 15 +- src/expr/aggregate.rs | 2 +- src/expr/aggregate_expr.rs | 2 +- src/expr/alias.rs | 2 +- src/expr/analyze.rs | 2 +- src/expr/between.rs | 2 +- src/expr/column.rs | 2 +- src/expr/copy_to.rs | 4 +- src/expr/create_catalog.rs | 2 +- src/expr/create_catalog_schema.rs | 2 +- src/expr/create_external_table.rs | 2 +- src/expr/create_function.rs | 2 +- src/expr/create_index.rs | 2 +- src/expr/create_memory_table.rs | 2 +- src/expr/create_view.rs | 2 +- src/expr/describe_table.rs | 2 +- src/expr/distinct.rs | 5 +- src/expr/drop_catalog_schema.rs | 2 +- src/expr/drop_function.rs | 2 +- src/expr/drop_table.rs | 2 +- src/expr/drop_view.rs | 2 +- src/expr/empty_relation.rs | 2 +- src/expr/filter.rs | 2 +- src/expr/join.rs | 2 +- src/expr/like.rs | 6 +- src/expr/limit.rs | 2 +- src/expr/projection.rs | 2 +- src/expr/recursive_query.rs | 2 +- src/expr/repartition.rs | 2 +- src/expr/sort.rs | 2 +- src/expr/sort_expr.rs | 2 +- src/expr/subquery.rs | 2 +- src/expr/subquery_alias.rs | 2 +- src/expr/table_scan.rs | 2 +- src/expr/union.rs | 2 +- src/expr/unnest.rs | 2 +- src/expr/unnest_expr.rs | 2 +- src/expr/window.rs | 11 +- src/functions.rs | 2 +- src/lib.rs | 10 +- src/physical_plan.rs | 3 +- src/sql/logical.rs | 3 +- src/utils.rs | 5 +- 62 files changed, 1340 insertions(+), 258 deletions(-) create mode 100644 examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py create mode 100644 examples/datafusion-ffi-example/src/catalog_provider.rs diff --git a/Cargo.lock b/Cargo.lock index 112167cb4..a3e9336cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,6 +165,12 @@ dependencies = [ "zstd", ] +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "arrayref" version = "0.3.9" @@ -1503,6 +1509,7 @@ dependencies = [ "datafusion-proto", "datafusion-substrait", "futures", + "log", "mimalloc", "object_store", "prost", @@ -1510,6 +1517,7 @@ dependencies = [ "pyo3", "pyo3-async-runtimes", "pyo3-build-config", + "pyo3-log", "tokio", "url", "uuid", @@ -2953,6 +2961,17 @@ dependencies = [ "pyo3-build-config", ] +[[package]] +name = "pyo3-log" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45192e5e4a4d2505587e27806c7b710c231c40c56f3bfc19535d0bb25df52264" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + [[package]] name = "pyo3-macros" version = "0.24.2" diff --git a/Cargo.toml b/Cargo.toml index 4135e64e2..1f7895a50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ substrait = ["dep:datafusion-substrait"] tokio = { version = "1.45", features = ["macros", "rt", "rt-multi-thread", "sync"] } pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] } pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]} +pyo3-log = "0.12.4" arrow = { version = "55.1.0", features = ["pyarrow"] } datafusion = { version = "48.0.0", features = ["avro", "unicode_expressions"] } datafusion-substrait = { version = "48.0.0", optional = true } @@ -49,6 +50,7 @@ async-trait = "0.1.88" futures = "0.3" object_store = { version = "0.12.1", features = ["aws", "gcp", "azure", "http"] } url = "2" +log = "0.4.27" [build-dependencies] prost-types = "0.13.1" # keep in line with `datafusion-substrait` diff --git a/docs/source/user-guide/data-sources.rst b/docs/source/user-guide/data-sources.rst index ba5967c97..9c95d58e0 100644 --- a/docs/source/user-guide/data-sources.rst +++ b/docs/source/user-guide/data-sources.rst @@ -185,3 +185,59 @@ the interface as describe in the :ref:`Custom Table Provider `_ is provided in the DataFusion repository. + +Catalog +======= + +A common technique for organizing tables is using a three level hierarchical approach. DataFusion +supports this form of organizing using the :py:class:`~datafusion.catalog.Catalog`, +:py:class:`~datafusion.catalog.Schema`, and :py:class:`~datafusion.catalog.Table`. By default, +a :py:class:`~datafusion.context.SessionContext` comes with a single Catalog and a single Schema +with the names ``datafusion`` and ``default``, respectively. + +The default implementation uses an in-memory approach to the catalog and schema. We have support +for adding additional in-memory catalogs and schemas. This can be done like in the following +example: + +.. code-block:: python + + from datafusion.catalog import Catalog, Schema + + my_catalog = Catalog.memory_catalog() + my_schema = Schema.memory_schema() + + my_catalog.register_schema("my_schema_name", my_schema) + + ctx.register_catalog("my_catalog_name", my_catalog) + +You could then register tables in ``my_schema`` and access them either through the DataFrame +API or via sql commands such as ``"SELECT * from my_catalog_name.my_schema_name.my_table"``. + +User Defined Catalog and Schema +------------------------------- + +If the in-memory catalogs are insufficient for your uses, there are two approaches you can take +to implementing a custom catalog and/or schema. In the below discussion, we describe how to +implement these for a Catalog, but the approach to implementing for a Schema is nearly +identical. + +DataFusion supports Catalogs written in either Rust or Python. If you write a Catalog in Rust, +you will need to export it as a Python library via PyO3. There is a complete example of a +catalog implemented this way in the +`examples folder `_ +of our repository. Writing catalog providers in Rust provides typically can lead to significant +performance improvements over the Python based approach. + +To implement a Catalog in Python, you will need to inherit from the abstract base class +:py:class:`~datafusion.catalog.CatalogProvider`. There are examples in the +`unit tests `_ of +implementing a basic Catalog in Python where we simply keep a dictionary of the +registered Schemas. + +One important note for developers is that when we have a Catalog defined in Python, we have +two different ways of accessing this Catalog. First, we register the catalog with a Rust +wrapper. This allows for any rust based code to call the Python functions as necessary. +Second, if the user access the Catalog via the Python API, we identify this and return back +the original Python object that implements the Catalog. This is an important distinction +for developers because we do *not* return a Python wrapper around the Rust wrapper of the +original Python object. diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock index 075ebd5a1..e5a1ca8d1 100644 --- a/examples/datafusion-ffi-example/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -1448,6 +1448,7 @@ dependencies = [ "arrow", "arrow-array", "arrow-schema", + "async-trait", "datafusion", "datafusion-ffi", "pyo3", diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml index 0e17567b9..319163554 100644 --- a/examples/datafusion-ffi-example/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -27,6 +27,7 @@ pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] arrow = { version = "55.0.0" } arrow-array = { version = "55.0.0" } arrow-schema = { version = "55.0.0" } +async-trait = "0.1.88" [build-dependencies] pyo3-build-config = "0.23" diff --git a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py new file mode 100644 index 000000000..72aadf64c --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext +from datafusion_ffi_example import MyCatalogProvider + + +def test_catalog_provider(): + ctx = SessionContext() + + my_catalog_name = "my_catalog" + expected_schema_name = "my_schema" + expected_table_name = "my_table" + expected_table_columns = ["units", "price"] + + catalog_provider = MyCatalogProvider() + ctx.register_catalog_provider(my_catalog_name, catalog_provider) + my_catalog = ctx.catalog(my_catalog_name) + + my_catalog_schemas = my_catalog.names() + assert expected_schema_name in my_catalog_schemas + my_database = my_catalog.database(expected_schema_name) + assert expected_table_name in my_database.names() + my_table = my_database.table(expected_table_name) + assert expected_table_columns == my_table.schema.names + + result = ctx.table( + f"{my_catalog_name}.{expected_schema_name}.{expected_table_name}" + ).collect() + assert len(result) == 2 + + col0_result = [r.column(0) for r in result] + col1_result = [r.column(1) for r in result] + expected_col0 = [ + pa.array([10, 20, 30], type=pa.int32()), + pa.array([5, 7], type=pa.int32()), + ] + expected_col1 = [ + pa.array([1, 2, 5], type=pa.float64()), + pa.array([1.5, 2.5], type=pa.float64()), + ] + assert col0_result == expected_col0 + assert col1_result == expected_col1 diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs new file mode 100644 index 000000000..54e61cf3e --- /dev/null +++ b/examples/datafusion-ffi-example/src/catalog_provider.rs @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::{any::Any, fmt::Debug, sync::Arc}; + +use arrow::datatypes::Schema; +use async_trait::async_trait; +use datafusion::{ + catalog::{ + CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, TableProvider, + }, + common::exec_err, + datasource::MemTable, + error::{DataFusionError, Result}, +}; +use datafusion_ffi::catalog_provider::FFI_CatalogProvider; +use pyo3::types::PyCapsule; + +pub fn my_table() -> Arc { + use arrow::datatypes::{DataType, Field}; + use datafusion::common::record_batch; + + let schema = Arc::new(Schema::new(vec![ + Field::new("units", DataType::Int32, true), + Field::new("price", DataType::Float64, true), + ])); + + let partitions = vec![ + record_batch!( + ("units", Int32, vec![10, 20, 30]), + ("price", Float64, vec![1.0, 2.0, 5.0]) + ) + .unwrap(), + record_batch!( + ("units", Int32, vec![5, 7]), + ("price", Float64, vec![1.5, 2.5]) + ) + .unwrap(), + ]; + + Arc::new(MemTable::try_new(schema, vec![partitions]).unwrap()) +} + +#[derive(Debug)] +pub struct FixedSchemaProvider { + inner: MemorySchemaProvider, +} + +impl Default for FixedSchemaProvider { + fn default() -> Self { + let inner = MemorySchemaProvider::new(); + + let table = my_table(); + + let _ = inner.register_table("my_table".to_string(), table).unwrap(); + + Self { inner } + } +} + +#[async_trait] +impl SchemaProvider for FixedSchemaProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + self.inner.table_names() + } + + async fn table(&self, name: &str) -> Result>, DataFusionError> { + self.inner.table(name).await + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> Result>> { + self.inner.register_table(name, table) + } + + fn deregister_table(&self, name: &str) -> Result>> { + self.inner.deregister_table(name) + } + + fn table_exist(&self, name: &str) -> bool { + self.inner.table_exist(name) + } +} + +/// This catalog provider is intended only for unit tests. It prepopulates with one +/// schema and only allows for schemas named after four types of fruit. +#[pyclass( + name = "MyCatalogProvider", + module = "datafusion_ffi_example", + subclass +)] +#[derive(Debug)] +pub(crate) struct MyCatalogProvider { + inner: MemoryCatalogProvider, +} + +impl Default for MyCatalogProvider { + fn default() -> Self { + let inner = MemoryCatalogProvider::new(); + + let schema_name: &str = "my_schema"; + let _ = inner.register_schema(schema_name, Arc::new(FixedSchemaProvider::default())); + + Self { inner } + } +} + +impl CatalogProvider for MyCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + self.inner.schema_names() + } + + fn schema(&self, name: &str) -> Option> { + self.inner.schema(name) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> Result>> { + self.inner.register_schema(name, schema) + } + + fn deregister_schema( + &self, + name: &str, + cascade: bool, + ) -> Result>> { + self.inner.deregister_schema(name, cascade) + } +} + +#[pymethods] +impl MyCatalogProvider { + #[new] + pub fn new() -> Self { + Self { + inner: Default::default(), + } + } + + pub fn __datafusion_catalog_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_catalog_provider".into(); + let catalog_provider = + FFI_CatalogProvider::new(Arc::new(MyCatalogProvider::default()), None); + + PyCapsule::new(py, catalog_provider, Some(name)) + } +} diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs index ae08c3b65..3a4cf2247 100644 --- a/examples/datafusion-ffi-example/src/lib.rs +++ b/examples/datafusion-ffi-example/src/lib.rs @@ -15,10 +15,12 @@ // specific language governing permissions and limitations // under the License. +use crate::catalog_provider::MyCatalogProvider; use crate::table_function::MyTableFunction; use crate::table_provider::MyTableProvider; use pyo3::prelude::*; +pub(crate) mod catalog_provider; pub(crate) mod table_function; pub(crate) mod table_provider; @@ -26,5 +28,6 @@ pub(crate) mod table_provider; fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index fd7f4fc06..e9d2dba75 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -92,6 +92,7 @@ "TableFunction", "WindowFrame", "WindowUDF", + "catalog", "col", "column", "common", diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index 67ab3ead2..536b3a790 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -19,18 +19,33 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Protocol import datafusion._internal as df_internal if TYPE_CHECKING: import pyarrow as pa +try: + from warnings import deprecated # Python 3.13+ +except ImportError: + from typing_extensions import deprecated # Python 3.12 + + +__all__ = [ + "Catalog", + "CatalogProvider", + "Schema", + "SchemaProvider", + "Table", +] + class Catalog: """DataFusion data catalog.""" - def __init__(self, catalog: df_internal.Catalog) -> None: + def __init__(self, catalog: df_internal.catalog.RawCatalog) -> None: """This constructor is not typically called by the end user.""" self.catalog = catalog @@ -38,39 +53,95 @@ def __repr__(self) -> str: """Print a string representation of the catalog.""" return self.catalog.__repr__() - def names(self) -> list[str]: - """Returns the list of databases in this catalog.""" - return self.catalog.names() + def names(self) -> set[str]: + """This is an alias for `schema_names`.""" + return self.schema_names() + + def schema_names(self) -> set[str]: + """Returns the list of schemas in this catalog.""" + return self.catalog.schema_names() + + @staticmethod + def memory_catalog() -> Catalog: + """Create an in-memory catalog provider.""" + catalog = df_internal.catalog.RawCatalog.memory_catalog() + return Catalog(catalog) - def database(self, name: str = "public") -> Database: + def schema(self, name: str = "public") -> Schema: """Returns the database with the given ``name`` from this catalog.""" - return Database(self.catalog.database(name)) + schema = self.catalog.schema(name) + + return ( + Schema(schema) + if isinstance(schema, df_internal.catalog.RawSchema) + else schema + ) + + @deprecated("Use `schema` instead.") + def database(self, name: str = "public") -> Schema: + """Returns the database with the given ``name`` from this catalog.""" + return self.schema(name) + + def register_schema(self, name, schema) -> Schema | None: + """Register a schema with this catalog.""" + if isinstance(schema, Schema): + return self.catalog.register_schema(name, schema._raw_schema) + return self.catalog.register_schema(name, schema) + + def deregister_schema(self, name: str, cascade: bool = True) -> Schema | None: + """Deregister a schema from this catalog.""" + return self.catalog.deregister_schema(name, cascade) -class Database: - """DataFusion Database.""" +class Schema: + """DataFusion Schema.""" - def __init__(self, db: df_internal.Database) -> None: + def __init__(self, schema: df_internal.catalog.RawSchema) -> None: """This constructor is not typically called by the end user.""" - self.db = db + self._raw_schema = schema def __repr__(self) -> str: - """Print a string representation of the database.""" - return self.db.__repr__() + """Print a string representation of the schema.""" + return self._raw_schema.__repr__() + + @staticmethod + def memory_schema() -> Schema: + """Create an in-memory schema provider.""" + schema = df_internal.catalog.RawSchema.memory_schema() + return Schema(schema) def names(self) -> set[str]: - """Returns the list of all tables in this database.""" - return self.db.names() + """This is an alias for `table_names`.""" + return self.table_names() + + def table_names(self) -> set[str]: + """Returns the list of all tables in this schema.""" + return self._raw_schema.table_names def table(self, name: str) -> Table: - """Return the table with the given ``name`` from this database.""" - return Table(self.db.table(name)) + """Return the table with the given ``name`` from this schema.""" + return Table(self._raw_schema.table(name)) + + def register_table(self, name, table) -> None: + """Register a table provider in this schema.""" + if isinstance(table, Table): + return self._raw_schema.register_table(name, table.table) + return self._raw_schema.register_table(name, table) + + def deregister_table(self, name: str) -> None: + """Deregister a table provider from this schema.""" + return self._raw_schema.deregister_table(name) + + +@deprecated("Use `Schema` instead.") +class Database(Schema): + """See `Schema`.""" class Table: """DataFusion table.""" - def __init__(self, table: df_internal.Table) -> None: + def __init__(self, table: df_internal.catalog.RawTable) -> None: """This constructor is not typically called by the end user.""" self.table = table @@ -78,6 +149,11 @@ def __repr__(self) -> str: """Print a string representation of the table.""" return self.table.__repr__() + @staticmethod + def from_dataset(dataset: pa.dataset.Dataset) -> Table: + """Turn a pyarrow Dataset into a Table.""" + return Table(df_internal.catalog.RawTable.from_dataset(dataset)) + @property def schema(self) -> pa.Schema: """Returns the schema associated with this table.""" @@ -87,3 +163,86 @@ def schema(self) -> pa.Schema: def kind(self) -> str: """Returns the kind of table.""" return self.table.kind + + +class CatalogProvider(ABC): + """Abstract class for defining a Python based Catalog Provider.""" + + @abstractmethod + def schema_names(self) -> set[str]: + """Set of the names of all schemas in this catalog.""" + ... + + @abstractmethod + def schema(self, name: str) -> Schema | None: + """Retrieve a specific schema from this catalog.""" + ... + + def register_schema( # noqa: B027 + self, name: str, schema: SchemaProviderExportable | SchemaProvider | Schema + ) -> None: + """Add a schema to this catalog. + + This method is optional. If your catalog provides a fixed list of schemas, you + do not need to implement this method. + """ + + def deregister_schema(self, name: str, cascade: bool) -> None: # noqa: B027 + """Remove a schema from this catalog. + + This method is optional. If your catalog provides a fixed list of schemas, you + do not need to implement this method. + + Args: + name: The name of the schema to remove. + cascade: If true, deregister the tables within the schema. + """ + + +class SchemaProvider(ABC): + """Abstract class for defining a Python based Schema Provider.""" + + def owner_name(self) -> str | None: + """Returns the owner of the schema. + + This is an optional method. The default return is None. + """ + return None + + @abstractmethod + def table_names(self) -> set[str]: + """Set of the names of all tables in this schema.""" + ... + + @abstractmethod + def table(self, name: str) -> Table | None: + """Retrieve a specific table from this schema.""" + ... + + def register_table(self, name: str, table: Table) -> None: # noqa: B027 + """Add a table from this schema. + + This method is optional. If your schema provides a fixed list of tables, you do + not need to implement this method. + """ + + def deregister_table(self, name, cascade: bool) -> None: # noqa: B027 + """Remove a table from this schema. + + This method is optional. If your schema provides a fixed list of tables, you do + not need to implement this method. + """ + + @abstractmethod + def table_exist(self, name: str) -> bool: + """Returns true if the table exists in this schema.""" + ... + + +class SchemaProviderExportable(Protocol): + """Type hint for object that has __datafusion_schema_provider__ PyCapsule. + + https://docs.rs/datafusion/latest/datafusion/catalog/trait.SchemaProvider.html + """ + + def __datafusion_schema_provider__(self) -> object: ... diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 5b99b0d26..bce51d644 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -29,7 +29,7 @@ except ImportError: from typing_extensions import deprecated # Python 3.12 -from datafusion.catalog import Catalog, Table +from datafusion.catalog import Catalog, CatalogProvider, Table from datafusion.dataframe import DataFrame from datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list from datafusion.record_batch import RecordBatchStream @@ -80,6 +80,15 @@ class TableProviderExportable(Protocol): def __datafusion_table_provider__(self) -> object: ... # noqa: D105 +class CatalogProviderExportable(Protocol): + """Type hint for object that has __datafusion_catalog_provider__ PyCapsule. + + https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProvider.html + """ + + def __datafusion_catalog_provider__(self) -> object: ... # noqa: D105 + + class SessionConfig: """Session configuration options.""" @@ -749,6 +758,19 @@ def deregister_table(self, name: str) -> None: """Remove a table from the session.""" self.ctx.deregister_table(name) + def catalog_names(self) -> set[str]: + """Returns the list of catalogs in this context.""" + return self.ctx.catalog_names() + + def register_catalog_provider( + self, name: str, provider: CatalogProviderExportable | CatalogProvider | Catalog + ) -> None: + """Register a catalog provider.""" + if isinstance(provider, Catalog): + self.ctx.register_catalog_provider(name, provider.catalog) + else: + self.ctx.register_catalog_provider(name, provider) + def register_table_provider( self, name: str, provider: TableProviderExportable ) -> None: diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 991e6875a..61cb09438 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -760,19 +760,16 @@ def join_on( exprs = [expr.expr for expr in on_exprs] return DataFrame(self.df.join_on(right.df, exprs, how)) - def explain(self, verbose: bool = False, analyze: bool = False) -> DataFrame: - """Return a DataFrame with the explanation of its plan so far. + def explain(self, verbose: bool = False, analyze: bool = False) -> None: + """Print an explanation of the DataFrame's plan so far. If ``analyze`` is specified, runs the plan and reports metrics. Args: verbose: If ``True``, more details will be included. analyze: If ``Tru`e``, the plan will run and metrics reported. - - Returns: - DataFrame with the explanation of its plan. """ - return DataFrame(self.df.explain(verbose, analyze)) + self.df.explain(verbose, analyze) def logical_plan(self) -> LogicalPlan: """Return the unoptimized ``LogicalPlan``. diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py index 23b328458..1f9ecbfc3 100644 --- a/python/tests/test_catalog.py +++ b/python/tests/test_catalog.py @@ -14,9 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations +import datafusion as dfn import pyarrow as pa +import pyarrow.dataset as ds import pytest +from datafusion import SessionContext, Table # Note we take in `database` as a variable even though we don't use @@ -27,9 +31,9 @@ def test_basic(ctx, database): ctx.catalog("non-existent") default = ctx.catalog() - assert default.names() == ["public"] + assert default.names() == {"public"} - for db in [default.database("public"), default.database()]: + for db in [default.schema("public"), default.schema()]: assert db.names() == {"csv1", "csv", "csv2"} table = db.table("csv") @@ -41,3 +45,168 @@ def test_basic(ctx, database): pa.field("float", pa.float64(), nullable=True), ] ) + + +def create_dataset() -> Table: + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + dataset = ds.dataset([batch]) + return Table.from_dataset(dataset) + + +class CustomSchemaProvider(dfn.catalog.SchemaProvider): + def __init__(self): + self.tables = {"table1": create_dataset()} + + def table_names(self) -> set[str]: + return set(self.tables.keys()) + + def register_table(self, name: str, table: Table): + self.tables[name] = table + + def deregister_table(self, name, cascade: bool = True): + del self.tables[name] + + def table(self, name: str) -> Table | None: + return self.tables[name] + + def table_exist(self, name: str) -> bool: + return name in self.tables + + +class CustomCatalogProvider(dfn.catalog.CatalogProvider): + def __init__(self): + self.schemas = {"my_schema": CustomSchemaProvider()} + + def schema_names(self) -> set[str]: + return set(self.schemas.keys()) + + def schema(self, name: str): + return self.schemas[name] + + def register_schema(self, name: str, schema: dfn.catalog.Schema): + self.schemas[name] = schema + + def deregister_schema(self, name, cascade: bool): + del self.schemas[name] + + +def test_python_catalog_provider(ctx: SessionContext): + ctx.register_catalog_provider("my_catalog", CustomCatalogProvider()) + + # Check the default catalog provider + assert ctx.catalog("datafusion").names() == {"public"} + + my_catalog = ctx.catalog("my_catalog") + assert my_catalog.names() == {"my_schema"} + + my_catalog.register_schema("second_schema", CustomSchemaProvider()) + assert my_catalog.schema_names() == {"my_schema", "second_schema"} + + my_catalog.deregister_schema("my_schema") + assert my_catalog.schema_names() == {"second_schema"} + + +def test_in_memory_providers(ctx: SessionContext): + catalog = dfn.catalog.Catalog.memory_catalog() + ctx.register_catalog_provider("in_mem_catalog", catalog) + + assert ctx.catalog_names() == {"datafusion", "in_mem_catalog"} + + schema = dfn.catalog.Schema.memory_schema() + catalog.register_schema("in_mem_schema", schema) + + schema.register_table("my_table", create_dataset()) + + batches = ctx.sql("select * from in_mem_catalog.in_mem_schema.my_table").collect() + + assert len(batches) == 1 + assert batches[0].column(0) == pa.array([1, 2, 3]) + assert batches[0].column(1) == pa.array([4, 5, 6]) + + +def test_python_schema_provider(ctx: SessionContext): + catalog = ctx.catalog() + + catalog.deregister_schema("public") + + catalog.register_schema("test_schema1", CustomSchemaProvider()) + assert catalog.names() == {"test_schema1"} + + catalog.register_schema("test_schema2", CustomSchemaProvider()) + catalog.deregister_schema("test_schema1") + assert catalog.names() == {"test_schema2"} + + +def test_python_table_provider(ctx: SessionContext): + catalog = ctx.catalog() + + catalog.register_schema("custom_schema", CustomSchemaProvider()) + schema = catalog.schema("custom_schema") + + assert schema.table_names() == {"table1"} + + schema.deregister_table("table1") + schema.register_table("table2", create_dataset()) + assert schema.table_names() == {"table2"} + + # Use the default schema instead of our custom schema + + schema = catalog.schema() + + schema.register_table("table3", create_dataset()) + assert schema.table_names() == {"table3"} + + schema.deregister_table("table3") + schema.register_table("table4", create_dataset()) + assert schema.table_names() == {"table4"} + + +def test_in_end_to_end_python_providers(ctx: SessionContext): + """Test registering all python providers and running a query against them.""" + + all_catalog_names = [ + "datafusion", + "custom_catalog", + "in_mem_catalog", + ] + + all_schema_names = [ + "custom_schema", + "in_mem_schema", + ] + + ctx.register_catalog_provider(all_catalog_names[1], CustomCatalogProvider()) + ctx.register_catalog_provider( + all_catalog_names[2], dfn.catalog.Catalog.memory_catalog() + ) + + for catalog_name in all_catalog_names: + catalog = ctx.catalog(catalog_name) + + # Clean out previous schemas if they exist so we can start clean + for schema_name in catalog.schema_names(): + catalog.deregister_schema(schema_name, cascade=False) + + catalog.register_schema(all_schema_names[0], CustomSchemaProvider()) + catalog.register_schema(all_schema_names[1], dfn.catalog.Schema.memory_schema()) + + for schema_name in all_schema_names: + schema = catalog.schema(schema_name) + + for table_name in schema.table_names(): + schema.deregister_table(table_name) + + schema.register_table("test_table", create_dataset()) + + for catalog_name in all_catalog_names: + for schema_name in all_schema_names: + table_full_name = f"{catalog_name}.{schema_name}.test_table" + + batches = ctx.sql(f"select * from {table_full_name}").collect() + + assert len(batches) == 1 + assert batches[0].column(0) == pa.array([1, 2, 3]) + assert batches[0].column(1) == pa.array([4, 5, 6]) diff --git a/python/tests/test_context.py b/python/tests/test_context.py index 4a15ac9cf..6dbcc0d5e 100644 --- a/python/tests/test_context.py +++ b/python/tests/test_context.py @@ -57,7 +57,7 @@ def test_runtime_configs(tmp_path, path_to_str): ctx = SessionContext(config, runtime) assert ctx is not None - db = ctx.catalog("foo").database("bar") + db = ctx.catalog("foo").schema("bar") assert db is not None @@ -70,7 +70,7 @@ def test_temporary_files(tmp_path, path_to_str): ctx = SessionContext(config, runtime) assert ctx is not None - db = ctx.catalog("foo").database("bar") + db = ctx.catalog("foo").schema("bar") assert db is not None @@ -91,7 +91,7 @@ def test_create_context_with_all_valid_args(): ctx = SessionContext(config, runtime) # verify that at least some of the arguments worked - ctx.catalog("foo").database("bar") + ctx.catalog("foo").schema("bar") with pytest.raises(KeyError): ctx.catalog("datafusion") @@ -105,7 +105,7 @@ def test_register_record_batches(ctx): ctx.register_record_batches("t", [[batch]]) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} result = ctx.sql("SELECT a+b, a-b FROM t").collect() @@ -121,7 +121,7 @@ def test_create_dataframe_registers_unique_table_name(ctx): ) df = ctx.create_dataframe([[batch]]) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -141,7 +141,7 @@ def test_create_dataframe_registers_with_defined_table_name(ctx): ) df = ctx.create_dataframe([[batch]], name="tbl") - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -155,7 +155,7 @@ def test_from_arrow_table(ctx): # convert to DataFrame df = ctx.from_arrow(table) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -200,7 +200,7 @@ def test_from_arrow_table_with_name(ctx): # convert to DataFrame with optional name df = ctx.from_arrow(table, name="tbl") - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert tables[0] == "tbl" @@ -213,7 +213,7 @@ def test_from_arrow_table_empty(ctx): # convert to DataFrame df = ctx.from_arrow(table) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -228,7 +228,7 @@ def test_from_arrow_table_empty_no_schema(ctx): # convert to DataFrame df = ctx.from_arrow(table) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -246,7 +246,7 @@ def test_from_pylist(ctx): ] df = ctx.from_pylist(data) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -260,7 +260,7 @@ def test_from_pydict(ctx): data = {"a": [1, 2, 3], "b": [4, 5, 6]} df = ctx.from_pydict(data) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -276,7 +276,7 @@ def test_from_pandas(ctx): pandas_df = pd.DataFrame(data) df = ctx.from_pandas(pandas_df) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -292,7 +292,7 @@ def test_from_polars(ctx): polars_df = pd.DataFrame(data) df = ctx.from_polars(polars_df) - tables = list(ctx.catalog().database().names()) + tables = list(ctx.catalog().schema().names()) assert df assert len(tables) == 1 @@ -303,7 +303,7 @@ def test_from_polars(ctx): def test_register_table(ctx, database): default = ctx.catalog() - public = default.database("public") + public = default.schema("public") assert public.names() == {"csv", "csv1", "csv2"} table = public.table("csv") @@ -313,7 +313,7 @@ def test_register_table(ctx, database): def test_read_table(ctx, database): default = ctx.catalog() - public = default.database("public") + public = default.schema("public") assert public.names() == {"csv", "csv1", "csv2"} table = public.table("csv") @@ -323,7 +323,7 @@ def test_read_table(ctx, database): def test_deregister_table(ctx, database): default = ctx.catalog() - public = default.database("public") + public = default.schema("public") assert public.names() == {"csv", "csv1", "csv2"} ctx.deregister_table("csv") @@ -339,7 +339,7 @@ def test_register_dataset(ctx): dataset = ds.dataset([batch]) ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} result = ctx.sql("SELECT a+b, a-b FROM t").collect() @@ -356,7 +356,7 @@ def test_dataset_filter(ctx, capfd): dataset = ds.dataset([batch]) ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} df = ctx.sql("SELECT a+b, a-b FROM t WHERE a BETWEEN 2 and 3 AND b > 5") # Make sure the filter was pushed down in Physical Plan @@ -455,7 +455,7 @@ def test_dataset_filter_nested_data(ctx): dataset = ds.dataset([batch]) ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} df = ctx.table("t") diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 41cee4ef3..c383edc60 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -75,7 +75,7 @@ def test_register_csv(ctx, tmp_path): ) ctx.register_csv("csv3", path, schema=alternative_schema) - assert ctx.catalog().database().names() == { + assert ctx.catalog().schema().names() == { "csv", "csv1", "csv2", @@ -150,7 +150,7 @@ def test_register_parquet(ctx, tmp_path): path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) ctx.register_parquet("t", path) ctx.register_parquet("t1", str(path)) - assert ctx.catalog().database().names() == {"t", "t1"} + assert ctx.catalog().schema().names() == {"t", "t1"} result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() result = pa.Table.from_batches(result) @@ -188,7 +188,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty parquet_pruning=True, file_extension=".parquet", ) - assert ctx.catalog().database().names() == {"datapp"} + assert ctx.catalog().schema().names() == {"datapp"} result = ctx.sql("SELECT grp, COUNT(*) AS cnt FROM datapp GROUP BY grp").collect() result = pa.Table.from_batches(result) @@ -204,7 +204,7 @@ def test_register_dataset(ctx, tmp_path, path_to_str): dataset = ds.dataset(path, format="parquet") ctx.register_dataset("t", dataset) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() result = pa.Table.from_batches(result) @@ -251,7 +251,7 @@ def test_register_json(ctx, tmp_path): ) ctx.register_json("json3", path, schema=alternative_schema) - assert ctx.catalog().database().names() == { + assert ctx.catalog().schema().names() == { "json", "json1", "json2", @@ -308,7 +308,7 @@ def test_execute(ctx, tmp_path): path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) ctx.register_parquet("t", path) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} # count result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a IS NOT NULL").collect() @@ -451,18 +451,10 @@ def test_udf( id="datetime_ns", ), # Not writtable to parquet - pytest.param( - helpers.data_timedelta("s"), id="timedelta_s", marks=pytest.mark.xfail - ), - pytest.param( - helpers.data_timedelta("ms"), id="timedelta_ms", marks=pytest.mark.xfail - ), - pytest.param( - helpers.data_timedelta("us"), id="timedelta_us", marks=pytest.mark.xfail - ), - pytest.param( - helpers.data_timedelta("ns"), id="timedelta_ns", marks=pytest.mark.xfail - ), + pytest.param(helpers.data_timedelta("s"), id="timedelta_s"), + pytest.param(helpers.data_timedelta("ms"), id="timedelta_ms"), + pytest.param(helpers.data_timedelta("us"), id="timedelta_us"), + pytest.param(helpers.data_timedelta("ns"), id="timedelta_ns"), ], ) def test_simple_select(ctx, tmp_path, arr): @@ -524,7 +516,7 @@ def test_register_listing_table( schema=table.schema if pass_schema else None, file_sort_order=file_sort_order, ) - assert ctx.catalog().database().names() == {"my_table"} + assert ctx.catalog().schema().names() == {"my_table"} result = ctx.sql( "SELECT grp, COUNT(*) AS count FROM my_table GROUP BY grp" diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py index f367a447d..43aa327d4 100644 --- a/python/tests/test_substrait.py +++ b/python/tests/test_substrait.py @@ -34,7 +34,7 @@ def test_substrait_serialization(ctx): ctx.register_record_batches("t", [[batch]]) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} # For now just make sure the method calls blow up substrait_plan = ss.Serde.serialize_to_plan("SELECT * FROM t", ctx) @@ -59,7 +59,7 @@ def test_substrait_file_serialization(ctx, tmp_path, path_to_str): ctx.register_record_batches("t", [[batch]]) - assert ctx.catalog().database().names() == {"t"} + assert ctx.catalog().schema().names() == {"t"} path = tmp_path / "substrait_plan" path = str(path) if path_to_str else path diff --git a/src/catalog.rs b/src/catalog.rs index 83f8d08cb..17d4ec3b8 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -15,44 +15,54 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashSet; -use std::sync::Arc; - -use pyo3::exceptions::PyKeyError; -use pyo3::prelude::*; - -use crate::errors::{PyDataFusionError, PyDataFusionResult}; -use crate::utils::wait_for_future; +use crate::dataset::Dataset; +use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError, PyDataFusionResult}; +use crate::utils::{validate_pycapsule, wait_for_future}; +use async_trait::async_trait; +use datafusion::catalog::{MemoryCatalogProvider, MemorySchemaProvider}; +use datafusion::common::DataFusionError; use datafusion::{ arrow::pyarrow::ToPyArrow, catalog::{CatalogProvider, SchemaProvider}, datasource::{TableProvider, TableType}, }; +use datafusion_ffi::schema_provider::{FFI_SchemaProvider, ForeignSchemaProvider}; +use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; +use pyo3::exceptions::PyKeyError; +use pyo3::prelude::*; +use pyo3::types::PyCapsule; +use pyo3::IntoPyObjectExt; +use std::any::Any; +use std::collections::HashSet; +use std::sync::Arc; -#[pyclass(name = "Catalog", module = "datafusion", subclass)] +#[pyclass(name = "RawCatalog", module = "datafusion.catalog", subclass)] +#[derive(Clone)] pub struct PyCatalog { pub catalog: Arc, } -#[pyclass(name = "Database", module = "datafusion", subclass)] -pub struct PyDatabase { - pub database: Arc, +#[pyclass(name = "RawSchema", module = "datafusion.catalog", subclass)] +#[derive(Clone)] +pub struct PySchema { + pub schema: Arc, } -#[pyclass(name = "Table", module = "datafusion", subclass)] +#[pyclass(name = "RawTable", module = "datafusion.catalog", subclass)] +#[derive(Clone)] pub struct PyTable { pub table: Arc, } -impl PyCatalog { - pub fn new(catalog: Arc) -> Self { +impl From> for PyCatalog { + fn from(catalog: Arc) -> Self { Self { catalog } } } -impl PyDatabase { - pub fn new(database: Arc) -> Self { - Self { database } +impl From> for PySchema { + fn from(schema: Arc) -> Self { + Self { schema } } } @@ -68,36 +78,109 @@ impl PyTable { #[pymethods] impl PyCatalog { - fn names(&self) -> Vec { - self.catalog.schema_names() + #[new] + fn new(catalog: PyObject) -> Self { + let catalog_provider = + Arc::new(RustWrappedPyCatalogProvider::new(catalog)) as Arc; + catalog_provider.into() + } + + #[staticmethod] + fn memory_catalog() -> Self { + let catalog_provider = + Arc::new(MemoryCatalogProvider::default()) as Arc; + catalog_provider.into() + } + + fn schema_names(&self) -> HashSet { + self.catalog.schema_names().into_iter().collect() } #[pyo3(signature = (name="public"))] - fn database(&self, name: &str) -> PyResult { - match self.catalog.schema(name) { - Some(database) => Ok(PyDatabase::new(database)), - None => Err(PyKeyError::new_err(format!( - "Database with name {name} doesn't exist." - ))), - } + fn schema(&self, name: &str) -> PyResult { + let schema = self + .catalog + .schema(name) + .ok_or(PyKeyError::new_err(format!( + "Schema with name {name} doesn't exist." + )))?; + + Python::with_gil(|py| { + match schema + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => Ok(wrapped_schema.schema_provider.clone_ref(py)), + None => PySchema::from(schema).into_py_any(py), + } + }) + } + + fn register_schema(&self, name: &str, schema_provider: Bound<'_, PyAny>) -> PyResult<()> { + let provider = if schema_provider.hasattr("__datafusion_schema_provider__")? { + let capsule = schema_provider + .getattr("__datafusion_schema_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_schema_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignSchemaProvider = provider.into(); + Arc::new(provider) as Arc + } else { + match schema_provider.extract::() { + Ok(py_schema) => py_schema.schema, + Err(_) => Arc::new(RustWrappedPySchemaProvider::new(schema_provider.into())) + as Arc, + } + }; + + let _ = self + .catalog + .register_schema(name, provider) + .map_err(py_datafusion_err)?; + + Ok(()) + } + + fn deregister_schema(&self, name: &str, cascade: bool) -> PyResult<()> { + let _ = self + .catalog + .deregister_schema(name, cascade) + .map_err(py_datafusion_err)?; + + Ok(()) } fn __repr__(&self) -> PyResult { - Ok(format!( - "Catalog(schema_names=[{}])", - self.names().join(";") - )) + let mut names: Vec = self.schema_names().into_iter().collect(); + names.sort(); + Ok(format!("Catalog(schema_names=[{}])", names.join(", "))) } } #[pymethods] -impl PyDatabase { - fn names(&self) -> HashSet { - self.database.table_names().into_iter().collect() +impl PySchema { + #[new] + fn new(schema_provider: PyObject) -> Self { + let schema_provider = + Arc::new(RustWrappedPySchemaProvider::new(schema_provider)) as Arc; + schema_provider.into() + } + + #[staticmethod] + fn memory_schema() -> Self { + let schema_provider = Arc::new(MemorySchemaProvider::default()) as Arc; + schema_provider.into() + } + + #[getter] + fn table_names(&self) -> HashSet { + self.schema.table_names().into_iter().collect() } fn table(&self, name: &str, py: Python) -> PyDataFusionResult { - if let Some(table) = wait_for_future(py, self.database.table(name))?? { + if let Some(table) = wait_for_future(py, self.schema.table(name))?? { Ok(PyTable::new(table)) } else { Err(PyDataFusionError::Common(format!( @@ -107,14 +190,49 @@ impl PyDatabase { } fn __repr__(&self) -> PyResult { - Ok(format!( - "Database(table_names=[{}])", - Vec::from_iter(self.names()).join(";") - )) + let mut names: Vec = self.table_names().into_iter().collect(); + names.sort(); + Ok(format!("Schema(table_names=[{}])", names.join(";"))) } - // register_table - // deregister_table + fn register_table(&self, name: &str, table_provider: Bound<'_, PyAny>) -> PyResult<()> { + let provider = if table_provider.hasattr("__datafusion_table_provider__")? { + let capsule = table_provider + .getattr("__datafusion_table_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_table_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignTableProvider = provider.into(); + Arc::new(provider) as Arc + } else { + match table_provider.extract::() { + Ok(py_table) => py_table.table, + Err(_) => { + let py = table_provider.py(); + let provider = Dataset::new(&table_provider, py)?; + Arc::new(provider) as Arc + } + } + }; + + let _ = self + .schema + .register_table(name.to_string(), provider) + .map_err(py_datafusion_err)?; + + Ok(()) + } + + fn deregister_table(&self, name: &str) -> PyResult<()> { + let _ = self + .schema + .deregister_table(name) + .map_err(py_datafusion_err)?; + + Ok(()) + } } #[pymethods] @@ -125,6 +243,14 @@ impl PyTable { self.table.schema().to_pyarrow(py) } + #[staticmethod] + fn from_dataset(py: Python<'_>, dataset: &Bound<'_, PyAny>) -> PyResult { + let ds = Arc::new(Dataset::new(dataset, py).map_err(py_datafusion_err)?) + as Arc; + + Ok(Self::new(ds)) + } + /// Get the type of this table for metadata/catalog purposes. #[getter] fn kind(&self) -> &str { @@ -145,3 +271,285 @@ impl PyTable { // fn has_exact_statistics // fn supports_filter_pushdown } + +#[derive(Debug)] +pub(crate) struct RustWrappedPySchemaProvider { + schema_provider: PyObject, + owner_name: Option, +} + +impl RustWrappedPySchemaProvider { + pub fn new(schema_provider: PyObject) -> Self { + let owner_name = Python::with_gil(|py| { + schema_provider + .bind(py) + .getattr("owner_name") + .ok() + .map(|name| name.to_string()) + }); + + Self { + schema_provider, + owner_name, + } + } + + fn table_inner(&self, name: &str) -> PyResult>> { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + let py_table_method = provider.getattr("table")?; + + let py_table = py_table_method.call((name,), None)?; + if py_table.is_none() { + return Ok(None); + } + + if py_table.hasattr("__datafusion_table_provider__")? { + let capsule = provider.getattr("__datafusion_table_provider__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_table_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignTableProvider = provider.into(); + + Ok(Some(Arc::new(provider) as Arc)) + } else { + if let Ok(inner_table) = py_table.getattr("table") { + if let Ok(inner_table) = inner_table.extract::() { + return Ok(Some(inner_table.table)); + } + } + + match py_table.extract::() { + Ok(py_table) => Ok(Some(py_table.table)), + Err(_) => { + let ds = Dataset::new(&py_table, py).map_err(py_datafusion_err)?; + Ok(Some(Arc::new(ds) as Arc)) + } + } + } + }) + } +} + +#[async_trait] +impl SchemaProvider for RustWrappedPySchemaProvider { + fn owner_name(&self) -> Option<&str> { + self.owner_name.as_deref() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + + provider + .getattr("table_names") + .and_then(|names| names.extract::>()) + .unwrap_or_else(|err| { + log::error!("Unable to get table_names: {err}"); + Vec::default() + }) + }) + } + + async fn table( + &self, + name: &str, + ) -> datafusion::common::Result>, DataFusionError> { + self.table_inner(name).map_err(to_datafusion_err) + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> datafusion::common::Result>> { + let py_table = PyTable::new(table); + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + let _ = provider + .call_method1("register_table", (name, py_table)) + .map_err(to_datafusion_err)?; + // Since the definition of `register_table` says that an error + // will be returned if the table already exists, there is no + // case where we want to return a table provider as output. + Ok(None) + }) + } + + fn deregister_table( + &self, + name: &str, + ) -> datafusion::common::Result>> { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + let table = provider + .call_method1("deregister_table", (name,)) + .map_err(to_datafusion_err)?; + if table.is_none() { + return Ok(None); + } + + // If we can turn this table provider into a `Dataset`, return it. + // Otherwise, return None. + let dataset = match Dataset::new(&table, py) { + Ok(dataset) => Some(Arc::new(dataset) as Arc), + Err(_) => None, + }; + + Ok(dataset) + }) + } + + fn table_exist(&self, name: &str) -> bool { + Python::with_gil(|py| { + let provider = self.schema_provider.bind(py); + provider + .call_method1("table_exist", (name,)) + .and_then(|pyobj| pyobj.extract()) + .unwrap_or(false) + }) + } +} + +#[derive(Debug)] +pub(crate) struct RustWrappedPyCatalogProvider { + pub(crate) catalog_provider: PyObject, +} + +impl RustWrappedPyCatalogProvider { + pub fn new(catalog_provider: PyObject) -> Self { + Self { catalog_provider } + } + + fn schema_inner(&self, name: &str) -> PyResult>> { + Python::with_gil(|py| { + let provider = self.catalog_provider.bind(py); + + let py_schema = provider.call_method1("schema", (name,))?; + if py_schema.is_none() { + return Ok(None); + } + + if py_schema.hasattr("__datafusion_schema_provider__")? { + let capsule = provider + .getattr("__datafusion_schema_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_schema_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignSchemaProvider = provider.into(); + + Ok(Some(Arc::new(provider) as Arc)) + } else { + if let Ok(inner_schema) = py_schema.getattr("schema") { + if let Ok(inner_schema) = inner_schema.extract::() { + return Ok(Some(inner_schema.schema)); + } + } + match py_schema.extract::() { + Ok(inner_schema) => Ok(Some(inner_schema.schema)), + Err(_) => { + let py_schema = RustWrappedPySchemaProvider::new(py_schema.into()); + + Ok(Some(Arc::new(py_schema) as Arc)) + } + } + } + }) + } +} + +#[async_trait] +impl CatalogProvider for RustWrappedPyCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + Python::with_gil(|py| { + let provider = self.catalog_provider.bind(py); + provider + .getattr("schema_names") + .and_then(|names| names.extract::>()) + .unwrap_or_else(|err| { + log::error!("Unable to get schema_names: {err}"); + Vec::default() + }) + }) + } + + fn schema(&self, name: &str) -> Option> { + self.schema_inner(name).unwrap_or_else(|err| { + log::error!("CatalogProvider schema returned error: {err}"); + None + }) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> datafusion::common::Result>> { + // JRIGHT HERE + // let py_schema: PySchema = schema.into(); + Python::with_gil(|py| { + let py_schema = match schema + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => wrapped_schema.schema_provider.as_any(), + None => &PySchema::from(schema) + .into_py_any(py) + .map_err(to_datafusion_err)?, + }; + + let provider = self.catalog_provider.bind(py); + let schema = provider + .call_method1("register_schema", (name, py_schema)) + .map_err(to_datafusion_err)?; + if schema.is_none() { + return Ok(None); + } + + let schema = Arc::new(RustWrappedPySchemaProvider::new(schema.into())) + as Arc; + + Ok(Some(schema)) + }) + } + + fn deregister_schema( + &self, + name: &str, + cascade: bool, + ) -> datafusion::common::Result>> { + Python::with_gil(|py| { + let provider = self.catalog_provider.bind(py); + let schema = provider + .call_method1("deregister_schema", (name, cascade)) + .map_err(to_datafusion_err)?; + if schema.is_none() { + return Ok(None); + } + + let schema = Arc::new(RustWrappedPySchemaProvider::new(schema.into())) + as Arc; + + Ok(Some(schema)) + }) + } +} + +pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/src/common/data_type.rs b/src/common/data_type.rs index f5f8a6b06..5cf9d6e9f 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -172,7 +172,7 @@ impl DataTypeMap { SqlType::DATE, )), DataType::Duration(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Interval(interval_unit) => Ok(DataTypeMap::new( DataType::Interval(*interval_unit), @@ -189,7 +189,7 @@ impl DataTypeMap { SqlType::BINARY, )), DataType::FixedSizeBinary(_) => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", arrow_type)), + DataFusionError::NotImplemented(format!("{arrow_type:?}")), )), DataType::LargeBinary => Ok(DataTypeMap::new( DataType::LargeBinary, @@ -207,23 +207,22 @@ impl DataTypeMap { SqlType::VARCHAR, )), DataType::List(_) => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - arrow_type + "{arrow_type:?}" )))), DataType::FixedSizeList(_, _) => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", arrow_type)), + DataFusionError::NotImplemented(format!("{arrow_type:?}")), )), DataType::LargeList(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Struct(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Union(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Dictionary(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Decimal128(precision, scale) => Ok(DataTypeMap::new( DataType::Decimal128(*precision, *scale), @@ -236,23 +235,22 @@ impl DataTypeMap { SqlType::DECIMAL, )), DataType::Map(_, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::RunEndEncoded(_, _) => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", arrow_type)), + DataFusionError::NotImplemented(format!("{arrow_type:?}")), )), DataType::BinaryView => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::Utf8View => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - arrow_type + "{arrow_type:?}" )))), DataType::ListView(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), DataType::LargeListView(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", arrow_type), + format!("{arrow_type:?}"), ))), } } @@ -379,8 +377,7 @@ impl DataTypeMap { "double" => Ok(DataType::Float64), "byte_array" => Ok(DataType::Utf8), _ => Err(PyValueError::new_err(format!( - "Unable to determine Arrow Data Type from Parquet String type: {:?}", - parquet_str_type + "Unable to determine Arrow Data Type from Parquet String type: {parquet_str_type:?}" ))), }; DataTypeMap::map_from_arrow_type(&arrow_dtype?) @@ -404,12 +401,10 @@ impl DataTypeMap { pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult { match sql_type { SqlType::ANY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::ARRAY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::BIGINT => Ok(DataTypeMap::new( DataType::Int64, @@ -432,11 +427,10 @@ impl DataTypeMap { SqlType::CHAR, )), SqlType::COLUMN_LIST => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::CURSOR => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::DATE => Ok(DataTypeMap::new( DataType::Date64, @@ -449,8 +443,7 @@ impl DataTypeMap { SqlType::DECIMAL, )), SqlType::DISTINCT => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::DOUBLE => Ok(DataTypeMap::new( DataType::Decimal256(1, 1), @@ -458,7 +451,7 @@ impl DataTypeMap { SqlType::DOUBLE, )), SqlType::DYNAMIC_STAR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::FLOAT => Ok(DataTypeMap::new( DataType::Decimal128(1, 1), @@ -466,8 +459,7 @@ impl DataTypeMap { SqlType::FLOAT, )), SqlType::GEOMETRY => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::INTEGER => Ok(DataTypeMap::new( DataType::Int8, @@ -475,55 +467,52 @@ impl DataTypeMap { SqlType::INTEGER, )), SqlType::INTERVAL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::INTERVAL_DAY => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_DAY_HOUR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_DAY_MINUTE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_DAY_SECOND => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_HOUR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_HOUR_MINUTE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_HOUR_SECOND => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_MINUTE => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_MINUTE_SECOND => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::INTERVAL_MONTH => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_SECOND => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_YEAR => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::INTERVAL_YEAR_MONTH => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::MAP => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::MULTISET => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::NULL => Ok(DataTypeMap::new( DataType::Null, @@ -531,20 +520,16 @@ impl DataTypeMap { SqlType::NULL, )), SqlType::OTHER => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::REAL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::ROW => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::SARG => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::SMALLINT => Ok(DataTypeMap::new( DataType::Int16, @@ -552,25 +537,22 @@ impl DataTypeMap { SqlType::SMALLINT, )), SqlType::STRUCTURED => Err(py_datafusion_err(DataFusionError::NotImplemented( - format!("{:?}", sql_type), + format!("{sql_type:?}"), ))), SqlType::SYMBOL => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::TIME => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::TIME_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::TIMESTAMP => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err( - DataFusionError::NotImplemented(format!("{:?}", sql_type)), + DataFusionError::NotImplemented(format!("{sql_type:?}")), )), SqlType::TINYINT => Ok(DataTypeMap::new( DataType::Int8, @@ -578,8 +560,7 @@ impl DataTypeMap { SqlType::TINYINT, )), SqlType::UNKNOWN => Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - sql_type + "{sql_type:?}" )))), SqlType::VARBINARY => Ok(DataTypeMap::new( DataType::LargeBinary, @@ -682,8 +663,7 @@ impl PyDataType { "datetime64" => Ok(DataType::Date64), "object" => Ok(DataType::Utf8), _ => Err(PyValueError::new_err(format!( - "Unable to determine Arrow Data Type from Arrow String type: {:?}", - arrow_str_type + "Unable to determine Arrow Data Type from Arrow String type: {arrow_str_type:?}" ))), }; Ok(PyDataType { diff --git a/src/context.rs b/src/context.rs index 6ce1f12bc..36133a33d 100644 --- a/src/context.rs +++ b/src/context.rs @@ -31,7 +31,7 @@ use uuid::Uuid; use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; -use crate::catalog::{PyCatalog, PyTable}; +use crate::catalog::{PyCatalog, PyTable, RustWrappedPyCatalogProvider}; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; @@ -49,6 +49,7 @@ use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_f use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::arrow::pyarrow::PyArrowType; use datafusion::arrow::record_batch::RecordBatch; +use datafusion::catalog::CatalogProvider; use datafusion::common::TableReference; use datafusion::common::{exec_err, ScalarValue}; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; @@ -69,8 +70,10 @@ use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; +use datafusion_ffi::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvider}; use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType}; +use pyo3::IntoPyObjectExt; use tokio::task::JoinHandle; /// Configuration options for a SessionContext @@ -365,7 +368,7 @@ impl PySessionContext { } else { &upstream_host }; - let url_string = format!("{}{}", scheme, derived_host); + let url_string = format!("{scheme}{derived_host}"); let url = Url::parse(&url_string).unwrap(); self.ctx.runtime_env().register_object_store(&url, store); Ok(()) @@ -614,6 +617,34 @@ impl PySessionContext { Ok(()) } + pub fn register_catalog_provider( + &mut self, + name: &str, + provider: Bound<'_, PyAny>, + ) -> PyDataFusionResult<()> { + let provider = if provider.hasattr("__datafusion_catalog_provider__")? { + let capsule = provider + .getattr("__datafusion_catalog_provider__")? + .call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_catalog_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: ForeignCatalogProvider = provider.into(); + Arc::new(provider) as Arc + } else { + match provider.extract::() { + Ok(py_catalog) => py_catalog.catalog, + Err(_) => Arc::new(RustWrappedPyCatalogProvider::new(provider.into())) + as Arc, + } + }; + + let _ = self.ctx.register_catalog(name, provider); + + Ok(()) + } + /// Construct datafusion dataframe from Arrow Table pub fn register_table_provider( &mut self, @@ -845,14 +876,24 @@ impl PySessionContext { } #[pyo3(signature = (name="datafusion"))] - pub fn catalog(&self, name: &str) -> PyResult { - match self.ctx.catalog(name) { - Some(catalog) => Ok(PyCatalog::new(catalog)), - None => Err(PyKeyError::new_err(format!( - "Catalog with name {} doesn't exist.", - &name, - ))), - } + pub fn catalog(&self, name: &str) -> PyResult { + let catalog = self.ctx.catalog(name).ok_or(PyKeyError::new_err(format!( + "Catalog with name {name} doesn't exist." + )))?; + + Python::with_gil(|py| { + match catalog + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => Ok(wrapped_schema.catalog_provider.clone_ref(py)), + None => PyCatalog::from(catalog).into_py_any(py), + } + }) + } + + pub fn catalog_names(&self) -> HashSet { + self.ctx.catalog_names().into_iter().collect() } pub fn tables(&self) -> HashSet { diff --git a/src/expr.rs b/src/expr.rs index 6b1d01d65..f1e002367 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -171,12 +171,10 @@ impl PyExpr { Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?), Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?), Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( - "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}", - value + "Converting Expr::ScalarFunction to a Python object is not implemented: {value:?}" ))), Expr::WindowFunction(value) => Err(py_unsupported_variant_err(format!( - "Converting Expr::WindowFunction to a Python object is not implemented: {:?}", - value + "Converting Expr::WindowFunction to a Python object is not implemented: {value:?}" ))), Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?), Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?), @@ -188,8 +186,7 @@ impl PyExpr { } #[allow(deprecated)] Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!( - "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}", - qualifier, options + "Converting Expr::Wildcard to a Python object is not implemented : {qualifier:?} {options:?}" ))), Expr::GroupingSet(value) => { Ok(grouping_set::PyGroupingSet::from(value.clone()).into_bound_py_any(py)?) @@ -198,8 +195,7 @@ impl PyExpr { Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?) } Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!( - "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {:?} - {:?}", - data_type, column + "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {data_type:?} - {column:?}" ))), Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?), } @@ -755,8 +751,7 @@ impl PyExpr { Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type), Expr::Literal(scalar_value, _) => DataTypeMap::map_from_scalar_value(scalar_value), _ => Err(py_type_err(format!( - "Non Expr::Literal encountered in types: {:?}", - expr + "Non Expr::Literal encountered in types: {expr:?}" ))), } } diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs index a99d83d23..fd4393271 100644 --- a/src/expr/aggregate.rs +++ b/src/expr/aggregate.rs @@ -116,7 +116,7 @@ impl PyAggregate { } fn __repr__(&self) -> PyResult { - Ok(format!("Aggregate({})", self)) + Ok(format!("Aggregate({self})")) } } diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs index c09f116e3..7c5d3d31f 100644 --- a/src/expr/aggregate_expr.rs +++ b/src/expr/aggregate_expr.rs @@ -75,6 +75,6 @@ impl PyAggregateFunction { /// Get a String representation of this column fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/alias.rs b/src/expr/alias.rs index e8e03cfad..40746f200 100644 --- a/src/expr/alias.rs +++ b/src/expr/alias.rs @@ -64,6 +64,6 @@ impl PyAlias { /// Get a String representation of this column fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs index 62f93cd26..e8081e95b 100644 --- a/src/expr/analyze.rs +++ b/src/expr/analyze.rs @@ -69,7 +69,7 @@ impl PyAnalyze { } fn __repr__(&self) -> PyResult { - Ok(format!("Analyze({})", self)) + Ok(format!("Analyze({self})")) } } diff --git a/src/expr/between.rs b/src/expr/between.rs index a2cac1442..817f1baae 100644 --- a/src/expr/between.rs +++ b/src/expr/between.rs @@ -71,6 +71,6 @@ impl PyBetween { } fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/column.rs b/src/expr/column.rs index 365dbc0d2..50f316f1c 100644 --- a/src/expr/column.rs +++ b/src/expr/column.rs @@ -45,7 +45,7 @@ impl PyColumn { /// Get the column relation fn relation(&self) -> Option { - self.col.relation.as_ref().map(|r| format!("{}", r)) + self.col.relation.as_ref().map(|r| format!("{r}")) } /// Get the fully-qualified column name diff --git a/src/expr/copy_to.rs b/src/expr/copy_to.rs index ebfcb8ebc..473dabfed 100644 --- a/src/expr/copy_to.rs +++ b/src/expr/copy_to.rs @@ -106,7 +106,7 @@ impl PyCopyTo { } fn __repr__(&self) -> PyResult { - Ok(format!("CopyTo({})", self)) + Ok(format!("CopyTo({self})")) } fn __name__(&self) -> PyResult { @@ -129,7 +129,7 @@ impl Display for PyFileType { #[pymethods] impl PyFileType { fn __repr__(&self) -> PyResult { - Ok(format!("FileType({})", self)) + Ok(format!("FileType({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_catalog.rs b/src/expr/create_catalog.rs index f4ea0f517..d2d2ee8f6 100644 --- a/src/expr/create_catalog.rs +++ b/src/expr/create_catalog.rs @@ -81,7 +81,7 @@ impl PyCreateCatalog { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateCatalog({})", self)) + Ok(format!("CreateCatalog({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_catalog_schema.rs b/src/expr/create_catalog_schema.rs index 85f447e1e..e794962f5 100644 --- a/src/expr/create_catalog_schema.rs +++ b/src/expr/create_catalog_schema.rs @@ -81,7 +81,7 @@ impl PyCreateCatalogSchema { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateCatalogSchema({})", self)) + Ok(format!("CreateCatalogSchema({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_external_table.rs b/src/expr/create_external_table.rs index 01ce7d0ca..3e35af006 100644 --- a/src/expr/create_external_table.rs +++ b/src/expr/create_external_table.rs @@ -164,7 +164,7 @@ impl PyCreateExternalTable { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateExternalTable({})", self)) + Ok(format!("CreateExternalTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_function.rs b/src/expr/create_function.rs index 6f3c3f0ff..c02ceebb1 100644 --- a/src/expr/create_function.rs +++ b/src/expr/create_function.rs @@ -163,7 +163,7 @@ impl PyCreateFunction { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateFunction({})", self)) + Ok(format!("CreateFunction({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_index.rs b/src/expr/create_index.rs index 13dadbc3f..0f4b5011a 100644 --- a/src/expr/create_index.rs +++ b/src/expr/create_index.rs @@ -110,7 +110,7 @@ impl PyCreateIndex { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateIndex({})", self)) + Ok(format!("CreateIndex({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs index 8872b2d47..37f4d3420 100644 --- a/src/expr/create_memory_table.rs +++ b/src/expr/create_memory_table.rs @@ -78,7 +78,7 @@ impl PyCreateMemoryTable { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateMemoryTable({})", self)) + Ok(format!("CreateMemoryTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs index 87bb76876..718e404d0 100644 --- a/src/expr/create_view.rs +++ b/src/expr/create_view.rs @@ -75,7 +75,7 @@ impl PyCreateView { } fn __repr__(&self) -> PyResult { - Ok(format!("CreateView({})", self)) + Ok(format!("CreateView({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/describe_table.rs b/src/expr/describe_table.rs index 5658a13f2..6c48f3c77 100644 --- a/src/expr/describe_table.rs +++ b/src/expr/describe_table.rs @@ -61,7 +61,7 @@ impl PyDescribeTable { } fn __repr__(&self) -> PyResult { - Ok(format!("DescribeTable({})", self)) + Ok(format!("DescribeTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs index b62b776f8..889e7099d 100644 --- a/src/expr/distinct.rs +++ b/src/expr/distinct.rs @@ -48,8 +48,7 @@ impl Display for PyDistinct { Distinct::All(input) => write!( f, "Distinct ALL - \nInput: {:?}", - input, + \nInput: {input:?}", ), Distinct::On(distinct_on) => { write!( @@ -71,7 +70,7 @@ impl PyDistinct { } fn __repr__(&self) -> PyResult { - Ok(format!("Distinct({})", self)) + Ok(format!("Distinct({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/drop_catalog_schema.rs b/src/expr/drop_catalog_schema.rs index b7420a99c..b4a4c521c 100644 --- a/src/expr/drop_catalog_schema.rs +++ b/src/expr/drop_catalog_schema.rs @@ -101,7 +101,7 @@ impl PyDropCatalogSchema { } fn __repr__(&self) -> PyResult { - Ok(format!("DropCatalogSchema({})", self)) + Ok(format!("DropCatalogSchema({self})")) } } diff --git a/src/expr/drop_function.rs b/src/expr/drop_function.rs index 9fbd78fdc..fca9eb94b 100644 --- a/src/expr/drop_function.rs +++ b/src/expr/drop_function.rs @@ -76,7 +76,7 @@ impl PyDropFunction { } fn __repr__(&self) -> PyResult { - Ok(format!("DropFunction({})", self)) + Ok(format!("DropFunction({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/drop_table.rs b/src/expr/drop_table.rs index 96983c1cf..3f442539a 100644 --- a/src/expr/drop_table.rs +++ b/src/expr/drop_table.rs @@ -70,7 +70,7 @@ impl PyDropTable { } fn __repr__(&self) -> PyResult { - Ok(format!("DropTable({})", self)) + Ok(format!("DropTable({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/drop_view.rs b/src/expr/drop_view.rs index 1d1ab1e59..6196c8bb5 100644 --- a/src/expr/drop_view.rs +++ b/src/expr/drop_view.rs @@ -83,7 +83,7 @@ impl PyDropView { } fn __repr__(&self) -> PyResult { - Ok(format!("DropView({})", self)) + Ok(format!("DropView({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs index a1534ac15..758213423 100644 --- a/src/expr/empty_relation.rs +++ b/src/expr/empty_relation.rs @@ -65,7 +65,7 @@ impl PyEmptyRelation { /// Get a String representation of this column fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } fn __name__(&self) -> PyResult { diff --git a/src/expr/filter.rs b/src/expr/filter.rs index 9bdb667cd..4fcb600cd 100644 --- a/src/expr/filter.rs +++ b/src/expr/filter.rs @@ -72,7 +72,7 @@ impl PyFilter { } fn __repr__(&self) -> String { - format!("Filter({})", self) + format!("Filter({self})") } } diff --git a/src/expr/join.rs b/src/expr/join.rs index 76ec532e7..b8d1d9da7 100644 --- a/src/expr/join.rs +++ b/src/expr/join.rs @@ -177,7 +177,7 @@ impl PyJoin { } fn __repr__(&self) -> PyResult { - Ok(format!("Join({})", self)) + Ok(format!("Join({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/like.rs b/src/expr/like.rs index 2e1f060bd..f180f5d4c 100644 --- a/src/expr/like.rs +++ b/src/expr/like.rs @@ -75,7 +75,7 @@ impl PyLike { } fn __repr__(&self) -> String { - format!("Like({})", self) + format!("Like({self})") } } @@ -133,7 +133,7 @@ impl PyILike { } fn __repr__(&self) -> String { - format!("Like({})", self) + format!("Like({self})") } } @@ -191,6 +191,6 @@ impl PySimilarTo { } fn __repr__(&self) -> String { - format!("Like({})", self) + format!("Like({self})") } } diff --git a/src/expr/limit.rs b/src/expr/limit.rs index c2a33ff89..92552814e 100644 --- a/src/expr/limit.rs +++ b/src/expr/limit.rs @@ -81,7 +81,7 @@ impl PyLimit { } fn __repr__(&self) -> PyResult { - Ok(format!("Limit({})", self)) + Ok(format!("Limit({self})")) } } diff --git a/src/expr/projection.rs b/src/expr/projection.rs index dc7e5e3c1..b5a9ef34a 100644 --- a/src/expr/projection.rs +++ b/src/expr/projection.rs @@ -85,7 +85,7 @@ impl PyProjection { } fn __repr__(&self) -> PyResult { - Ok(format!("Projection({})", self)) + Ok(format!("Projection({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/recursive_query.rs b/src/expr/recursive_query.rs index 65181f7d3..2517b7417 100644 --- a/src/expr/recursive_query.rs +++ b/src/expr/recursive_query.rs @@ -89,7 +89,7 @@ impl PyRecursiveQuery { } fn __repr__(&self) -> PyResult { - Ok(format!("RecursiveQuery({})", self)) + Ok(format!("RecursiveQuery({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/repartition.rs b/src/expr/repartition.rs index 3e782d6af..48b5e7041 100644 --- a/src/expr/repartition.rs +++ b/src/expr/repartition.rs @@ -108,7 +108,7 @@ impl PyRepartition { } fn __repr__(&self) -> PyResult { - Ok(format!("Repartition({})", self)) + Ok(format!("Repartition({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/sort.rs b/src/expr/sort.rs index ed4947591..79a8aee50 100644 --- a/src/expr/sort.rs +++ b/src/expr/sort.rs @@ -87,7 +87,7 @@ impl PySort { } fn __repr__(&self) -> PyResult { - Ok(format!("Sort({})", self)) + Ok(format!("Sort({self})")) } } diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs index 12f74e4d8..79e35d978 100644 --- a/src/expr/sort_expr.rs +++ b/src/expr/sort_expr.rs @@ -85,6 +85,6 @@ impl PySortExpr { } fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs index 5ebfe6927..77f56f9a9 100644 --- a/src/expr/subquery.rs +++ b/src/expr/subquery.rs @@ -62,7 +62,7 @@ impl PySubquery { } fn __repr__(&self) -> PyResult { - Ok(format!("Subquery({})", self)) + Ok(format!("Subquery({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs index 267a4d485..3302e7f23 100644 --- a/src/expr/subquery_alias.rs +++ b/src/expr/subquery_alias.rs @@ -72,7 +72,7 @@ impl PySubqueryAlias { } fn __repr__(&self) -> PyResult { - Ok(format!("SubqueryAlias({})", self)) + Ok(format!("SubqueryAlias({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs index 6a0d53f0f..329964687 100644 --- a/src/expr/table_scan.rs +++ b/src/expr/table_scan.rs @@ -136,7 +136,7 @@ impl PyTableScan { } fn __repr__(&self) -> PyResult { - Ok(format!("TableScan({})", self)) + Ok(format!("TableScan({self})")) } } diff --git a/src/expr/union.rs b/src/expr/union.rs index 5a08ccc13..e0b221398 100644 --- a/src/expr/union.rs +++ b/src/expr/union.rs @@ -66,7 +66,7 @@ impl PyUnion { } fn __repr__(&self) -> PyResult { - Ok(format!("Union({})", self)) + Ok(format!("Union({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/unnest.rs b/src/expr/unnest.rs index 8e70e0990..c8833347f 100644 --- a/src/expr/unnest.rs +++ b/src/expr/unnest.rs @@ -66,7 +66,7 @@ impl PyUnnest { } fn __repr__(&self) -> PyResult { - Ok(format!("Unnest({})", self)) + Ok(format!("Unnest({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/unnest_expr.rs b/src/expr/unnest_expr.rs index 2234d24b1..634186ed8 100644 --- a/src/expr/unnest_expr.rs +++ b/src/expr/unnest_expr.rs @@ -58,7 +58,7 @@ impl PyUnnestExpr { } fn __repr__(&self) -> PyResult { - Ok(format!("UnnestExpr({})", self)) + Ok(format!("UnnestExpr({self})")) } fn __name__(&self) -> PyResult { diff --git a/src/expr/window.rs b/src/expr/window.rs index 052d9eeb4..a408731c2 100644 --- a/src/expr/window.rs +++ b/src/expr/window.rs @@ -185,8 +185,7 @@ impl PyWindowFrame { "groups" => WindowFrameUnits::Groups, _ => { return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, + "{units:?}", )))); } }; @@ -197,8 +196,7 @@ impl PyWindowFrame { WindowFrameUnits::Rows => WindowFrameBound::Preceding(ScalarValue::UInt64(None)), WindowFrameUnits::Groups => { return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, + "{units:?}", )))); } }, @@ -210,8 +208,7 @@ impl PyWindowFrame { WindowFrameUnits::Range => WindowFrameBound::Following(ScalarValue::UInt64(None)), WindowFrameUnits::Groups => { return Err(py_datafusion_err(DataFusionError::NotImplemented(format!( - "{:?}", - units, + "{units:?}", )))); } }, @@ -236,7 +233,7 @@ impl PyWindowFrame { /// Get a String representation of this window frame fn __repr__(&self) -> String { - format!("{}", self) + format!("{self}") } } diff --git a/src/functions.rs b/src/functions.rs index b2bafcb65..b40500b8b 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -937,7 +937,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(left))?; m.add_wrapped(wrap_pyfunction!(length))?; m.add_wrapped(wrap_pyfunction!(ln))?; - m.add_wrapped(wrap_pyfunction!(log))?; + m.add_wrapped(wrap_pyfunction!(self::log))?; m.add_wrapped(wrap_pyfunction!(log10))?; m.add_wrapped(wrap_pyfunction!(log2))?; m.add_wrapped(wrap_pyfunction!(lower))?; diff --git a/src/lib.rs b/src/lib.rs index 1293eee3c..29d3f41da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,10 +77,10 @@ pub(crate) struct TokioRuntime(tokio::runtime::Runtime); /// datafusion directory. #[pymodule] fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { + // Initialize logging + pyo3_log::init(); + // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; @@ -98,6 +98,10 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + let catalog = PyModule::new(py, "catalog")?; + catalog::init_module(&catalog)?; + m.add_submodule(&catalog)?; + // Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/ let common = PyModule::new(py, "common")?; common::init_module(&common)?; diff --git a/src/physical_plan.rs b/src/physical_plan.rs index f0be45c6a..49db643e1 100644 --- a/src/physical_plan.rs +++ b/src/physical_plan.rs @@ -78,8 +78,7 @@ impl PyExecutionPlan { let proto_plan = datafusion_proto::protobuf::PhysicalPlanNode::decode(bytes).map_err(|e| { PyRuntimeError::new_err(format!( - "Unable to decode logical node from serialized bytes: {}", - e + "Unable to decode logical node from serialized bytes: {e}" )) })?; diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 198d68bdc..97d320470 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -201,8 +201,7 @@ impl PyLogicalPlan { let proto_plan = datafusion_proto::protobuf::LogicalPlanNode::decode(bytes).map_err(|e| { PyRuntimeError::new_err(format!( - "Unable to decode logical node from serialized bytes: {}", - e + "Unable to decode logical node from serialized bytes: {e}" )) })?; diff --git a/src/utils.rs b/src/utils.rs index f4e121fd5..3b30de5de 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -109,8 +109,7 @@ pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyRe let capsule_name = capsule_name.unwrap().to_str()?; if capsule_name != name { return Err(PyValueError::new_err(format!( - "Expected name '{}' in PyCapsule, instead got '{}'", - name, capsule_name + "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'" ))); } @@ -127,7 +126,7 @@ pub(crate) fn py_obj_to_scalar_value(py: Python, obj: PyObject) -> PyResult Date: Wed, 2 Jul 2025 09:59:02 -0400 Subject: [PATCH 060/206] feat: add FFI support for user defined functions (#1145) * Intermediate work adding ffi scalar udf * Add scalar UDF and example * Add aggregate udf via ffi * Initial commit for window ffi integration * Remove unused import --- docs/source/contributor-guide/ffi.rst | 2 +- examples/datafusion-ffi-example/Cargo.lock | 217 ++++++++++-------- examples/datafusion-ffi-example/Cargo.toml | 8 +- .../python/tests/_test_aggregate_udf.py | 77 +++++++ .../python/tests/_test_scalar_udf.py | 70 ++++++ .../python/tests/_test_window_udf.py | 89 +++++++ .../src/aggregate_udf.rs | 81 +++++++ .../src/catalog_provider.rs | 1 - examples/datafusion-ffi-example/src/lib.rs | 9 + .../datafusion-ffi-example/src/scalar_udf.rs | 91 ++++++++ .../datafusion-ffi-example/src/window_udf.rs | 81 +++++++ python/datafusion/user_defined.py | 107 ++++++++- src/functions.rs | 2 +- src/udaf.rs | 31 ++- src/udf.rs | 25 +- src/udwf.rs | 27 ++- 16 files changed, 805 insertions(+), 113 deletions(-) create mode 100644 examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py create mode 100644 examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py create mode 100644 examples/datafusion-ffi-example/python/tests/_test_window_udf.py create mode 100644 examples/datafusion-ffi-example/src/aggregate_udf.rs create mode 100644 examples/datafusion-ffi-example/src/scalar_udf.rs create mode 100644 examples/datafusion-ffi-example/src/window_udf.rs diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst index c1f9806b3..a40af1234 100644 --- a/docs/source/contributor-guide/ffi.rst +++ b/docs/source/contributor-guide/ffi.rst @@ -176,7 +176,7 @@ By convention the ``datafusion-python`` library expects a Python object that has ``TableProvider`` PyCapsule to have this capsule accessible by calling a function named ``__datafusion_table_provider__``. You can see a complete working example of how to share a ``TableProvider`` from one python library to DataFusion Python in the -`repository examples folder `_. +`repository examples folder `_. This section has been written using ``TableProvider`` as an example. It is the first extension that has been written using this approach and the most thoroughly implemented. diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock index e5a1ca8d1..1b4ca6bee 100644 --- a/examples/datafusion-ffi-example/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -323,6 +323,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" dependencies = [ "bitflags", + "serde", + "serde_json", ] [[package]] @@ -748,9 +750,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" +checksum = "cc6cb8c2c81eada072059983657d6c9caf3fddefc43b4a65551d243253254a96" dependencies = [ "arrow", "arrow-ipc", @@ -775,7 +777,6 @@ dependencies = [ "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", - "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -790,7 +791,7 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.1", "regex", "sqlparser", "tempfile", @@ -803,9 +804,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" +checksum = "b7be8d1b627843af62e447396db08fe1372d882c0eb8d0ea655fd1fbc33120ee" dependencies = [ "arrow", "async-trait", @@ -829,9 +830,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" +checksum = "38ab16c5ae43f65ee525fc493ceffbc41f40dee38b01f643dfcfc12959e92038" dependencies = [ "arrow", "async-trait", @@ -852,9 +853,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" +checksum = "d3d56b2ac9f476b93ca82e4ef5fb00769c8a3f248d12b4965af7e27635fa7e12" dependencies = [ "ahash", "arrow", @@ -876,9 +877,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" +checksum = "16015071202d6133bc84d72756176467e3e46029f3ce9ad2cb788f9b1ff139b2" dependencies = [ "futures", "log", @@ -887,9 +888,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" +checksum = "b77523c95c89d2a7eb99df14ed31390e04ab29b43ff793e562bdc1716b07e17b" dependencies = [ "arrow", "async-compression", @@ -912,7 +913,7 @@ dependencies = [ "log", "object_store", "parquet", - "rand", + "rand 0.9.1", "tempfile", "tokio", "tokio-util", @@ -923,9 +924,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +checksum = "40d25c5e2c0ebe8434beeea997b8e88d55b3ccc0d19344293f2373f65bc524fc" dependencies = [ "arrow", "async-trait", @@ -948,9 +949,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +checksum = "3dc6959e1155741ab35369e1dc7673ba30fc45ed568fad34c01b7cb1daeb4d4c" dependencies = [ "arrow", "async-trait", @@ -973,9 +974,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +checksum = "b7a6afdfe358d70f4237f60eaef26ae5a1ce7cb2c469d02d5fc6c7fd5d84e58b" dependencies = [ "arrow", "async-trait", @@ -998,21 +999,21 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.1", "tokio", ] [[package]] name = "datafusion-doc" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" +checksum = "9bcd8a3e3e3d02ea642541be23d44376b5d5c37c2938cce39b3873cdf7186eea" [[package]] name = "datafusion-execution" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" +checksum = "670da1d45d045eee4c2319b8c7ea57b26cf48ab77b630aaa50b779e406da476a" dependencies = [ "arrow", "dashmap", @@ -1022,16 +1023,16 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand", + "rand 0.9.1", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" +checksum = "b3a577f64bdb7e2cc4043cd97f8901d8c504711fde2dbcb0887645b00d7c660b" dependencies = [ "arrow", "chrono", @@ -1050,9 +1051,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" +checksum = "51b7916806ace3e9f41884f230f7f38ebf0e955dfbd88266da1826f29a0b9a6a" dependencies = [ "arrow", "datafusion-common", @@ -1063,9 +1064,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cf3fe9ab492c56daeb7beed526690d33622d388b8870472e0b7b7f55490338c" +checksum = "980cca31de37f5dadf7ea18e4ffc2b6833611f45bed5ef9de0831d2abb50f1ef" dependencies = [ "abi_stable", "arrow", @@ -1073,7 +1074,9 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-functions-aggregate-common", "datafusion-proto", + "datafusion-proto-common", "futures", "log", "prost", @@ -1081,11 +1084,25 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-ffi-example" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "async-trait", + "datafusion", + "datafusion-ffi", + "pyo3", + "pyo3-build-config", +] + [[package]] name = "datafusion-functions" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" +checksum = "7fb31c9dc73d3e0c365063f91139dc273308f8a8e124adda9898db8085d68357" dependencies = [ "arrow", "arrow-buffer", @@ -1103,7 +1120,7 @@ dependencies = [ "itertools", "log", "md-5", - "rand", + "rand 0.9.1", "regex", "sha2", "unicode-segmentation", @@ -1112,9 +1129,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" +checksum = "ebb72c6940697eaaba9bd1f746a697a07819de952b817e3fb841fb75331ad5d4" dependencies = [ "ahash", "arrow", @@ -1133,9 +1150,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" +checksum = "d7fdc54656659e5ecd49bf341061f4156ab230052611f4f3609612a0da259696" dependencies = [ "ahash", "arrow", @@ -1146,9 +1163,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" +checksum = "fad94598e3374938ca43bca6b675febe557e7a14eb627d617db427d70d65118b" dependencies = [ "arrow", "arrow-ord", @@ -1167,9 +1184,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" +checksum = "de2fc6c2946da5cab8364fb28b5cac3115f0f3a87960b235ed031c3f7e2e639b" dependencies = [ "arrow", "async-trait", @@ -1183,10 +1200,11 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" +checksum = "3e5746548a8544870a119f556543adcd88fe0ba6b93723fe78ad0439e0fbb8b4" dependencies = [ + "arrow", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -1200,9 +1218,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" +checksum = "dcbe9404382cda257c434f22e13577bee7047031dfdb6216dd5e841b9465e6fe" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1210,9 +1228,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" +checksum = "8dce50e3b637dab0d25d04d2fe79dfdca2b257eabd76790bffd22c7f90d700c8" dependencies = [ "datafusion-expr", "quote", @@ -1221,9 +1239,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" +checksum = "03cfaacf06445dc3bbc1e901242d2a44f2cae99a744f49f3fefddcee46240058" dependencies = [ "arrow", "chrono", @@ -1240,9 +1258,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" +checksum = "1908034a89d7b2630898e06863583ae4c00a0dd310c1589ca284195ee3f7f8a6" dependencies = [ "ahash", "arrow", @@ -1262,9 +1280,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" +checksum = "47b7a12dd59ea07614b67dbb01d85254fbd93df45bcffa63495e11d3bdf847df" dependencies = [ "ahash", "arrow", @@ -1276,9 +1294,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" +checksum = "4371cc4ad33978cc2a8be93bd54a232d3f2857b50401a14631c0705f3f910aae" dependencies = [ "arrow", "datafusion-common", @@ -1295,9 +1313,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" +checksum = "dc47bc33025757a5c11f2cd094c5b6b5ed87f46fa33c023e6fdfa25fcbfade23" dependencies = [ "ahash", "arrow", @@ -1325,9 +1343,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a1afb2bdb05de7ff65be6883ebfd4ec027bd9f1f21c46aa3afd01927160a83" +checksum = "d8f5d9acd7d96e3bf2a7bb04818373cab6e51de0356e3694b94905fee7b4e8b6" dependencies = [ "arrow", "chrono", @@ -1341,9 +1359,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b7a5876ebd6b564fb9a1fd2c3a2a9686b787071a256b47e4708f0916f9e46f" +checksum = "09ecb5ec152c4353b60f7a5635489834391f7a291d2b39a4820cd469e318b78e" dependencies = [ "arrow", "datafusion-common", @@ -1352,9 +1370,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +checksum = "d7485da32283985d6b45bd7d13a65169dcbe8c869e25d01b2cfbc425254b4b49" dependencies = [ "arrow", "async-trait", @@ -1376,9 +1394,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "47.0.0" +version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" +checksum = "a466b15632befddfeac68c125f0260f569ff315c6831538cbb40db754134e0df" dependencies = [ "arrow", "bigdecimal", @@ -1441,20 +1459,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "ffi-table-provider" -version = "0.1.0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", - "async-trait", - "datafusion", - "datafusion-ffi", - "pyo3", - "pyo3-build-config", -] - [[package]] name = "fixedbitset" version = "0.5.7" @@ -1488,6 +1492,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1666,6 +1676,11 @@ name = "hashbrown" version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -2271,12 +2286,14 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" dependencies = [ "fixedbitset", + "hashbrown 0.15.3", "indexmap", + "serde", ] [[package]] @@ -2305,7 +2322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -2484,19 +2501,27 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.3", ] [[package]] name = "rand_chacha" -version = "0.3.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.3", ] [[package]] @@ -2504,8 +2529,14 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.3.3", ] [[package]] @@ -3032,9 +3063,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "getrandom 0.3.3", "js-sys", diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml index 319163554..b26ab48e3 100644 --- a/examples/datafusion-ffi-example/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -16,13 +16,13 @@ # under the License. [package] -name = "ffi-table-provider" -version = "0.1.0" +name = "datafusion-ffi-example" +version = "0.2.0" edition = "2021" [dependencies] -datafusion = { version = "47.0.0" } -datafusion-ffi = { version = "47.0.0" } +datafusion = { version = "48.0.0" } +datafusion-ffi = { version = "48.0.0" } pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] } arrow = { version = "55.0.0" } arrow-array = { version = "55.0.0" } diff --git a/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py b/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py new file mode 100644 index 000000000..7ea6b295c --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_aggregate_udf.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext, col, udaf +from datafusion_ffi_example import MySumUDF + + +def setup_context_with_table(): + ctx = SessionContext() + + # Pick numbers here so we get the same value in both groups + # since we cannot be certain of the output order of batches + batch = pa.RecordBatch.from_arrays( + [ + pa.array([1, 2, 3, None], type=pa.int64()), + pa.array([1, 1, 2, 2], type=pa.int64()), + ], + names=["a", "b"], + ) + ctx.register_record_batches("test_table", [[batch]]) + return ctx + + +def test_ffi_aggregate_register(): + ctx = setup_context_with_table() + my_udaf = udaf(MySumUDF()) + ctx.register_udaf(my_udaf) + + result = ctx.sql("select my_custom_sum(a) from test_table group by b").collect() + + assert len(result) == 2 + assert result[0].num_columns == 1 + + result = [r.column(0) for r in result] + expected = [ + pa.array([3], type=pa.int64()), + pa.array([3], type=pa.int64()), + ] + + assert result == expected + + +def test_ffi_aggregate_call_directly(): + ctx = setup_context_with_table() + my_udaf = udaf(MySumUDF()) + + result = ( + ctx.table("test_table").aggregate([col("b")], [my_udaf(col("a"))]).collect() + ) + + assert len(result) == 2 + assert result[0].num_columns == 2 + + result = [r.column(1) for r in result] + expected = [ + pa.array([3], type=pa.int64()), + pa.array([3], type=pa.int64()), + ] + + assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py b/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py new file mode 100644 index 000000000..0c949c34a --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_scalar_udf.py @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext, col, udf +from datafusion_ffi_example import IsNullUDF + + +def setup_context_with_table(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3, None])], + names=["a"], + ) + ctx.register_record_batches("test_table", [[batch]]) + return ctx + + +def test_ffi_scalar_register(): + ctx = setup_context_with_table() + my_udf = udf(IsNullUDF()) + ctx.register_udf(my_udf) + + result = ctx.sql("select my_custom_is_null(a) from test_table").collect() + + assert len(result) == 1 + assert result[0].num_columns == 1 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([False, False, False, True], type=pa.bool_()), + ] + + assert result == expected + + +def test_ffi_scalar_call_directly(): + ctx = setup_context_with_table() + my_udf = udf(IsNullUDF()) + + result = ctx.table("test_table").select(my_udf(col("a"))).collect() + + assert len(result) == 1 + assert result[0].num_columns == 1 + print(result) + + result = [r.column(0) for r in result] + expected = [ + pa.array([False, False, False, True], type=pa.bool_()), + ] + + assert result == expected diff --git a/examples/datafusion-ffi-example/python/tests/_test_window_udf.py b/examples/datafusion-ffi-example/python/tests/_test_window_udf.py new file mode 100644 index 000000000..7d96994b9 --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_window_udf.py @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +from datafusion import SessionContext, col, udwf +from datafusion_ffi_example import MyRankUDF + + +def setup_context_with_table(): + ctx = SessionContext() + + # Pick numbers here so we get the same value in both groups + # since we cannot be certain of the output order of batches + batch = pa.RecordBatch.from_arrays( + [ + pa.array([40, 10, 30, 20], type=pa.int64()), + ], + names=["a"], + ) + ctx.register_record_batches("test_table", [[batch]]) + return ctx + + +def test_ffi_window_register(): + ctx = setup_context_with_table() + my_udwf = udwf(MyRankUDF()) + ctx.register_udwf(my_udwf) + + result = ctx.sql( + "select a, my_custom_rank() over (order by a) from test_table" + ).collect() + assert len(result) == 1 + assert result[0].num_columns == 2 + + results = [ + (result[0][0][idx].as_py(), result[0][1][idx].as_py()) for idx in range(4) + ] + results.sort() + + expected = [ + (10, 1), + (20, 2), + (30, 3), + (40, 4), + ] + assert results == expected + + +def test_ffi_window_call_directly(): + ctx = setup_context_with_table() + my_udwf = udwf(MyRankUDF()) + + result = ( + ctx.table("test_table") + .select(col("a"), my_udwf().order_by(col("a")).build()) + .collect() + ) + + assert len(result) == 1 + assert result[0].num_columns == 2 + + results = [ + (result[0][0][idx].as_py(), result[0][1][idx].as_py()) for idx in range(4) + ] + results.sort() + + expected = [ + (10, 1), + (20, 2), + (30, 3), + (40, 4), + ] + assert results == expected diff --git a/examples/datafusion-ffi-example/src/aggregate_udf.rs b/examples/datafusion-ffi-example/src/aggregate_udf.rs new file mode 100644 index 000000000..9481fe9c6 --- /dev/null +++ b/examples/datafusion-ffi-example/src/aggregate_udf.rs @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_schema::DataType; +use datafusion::error::Result as DataFusionResult; +use datafusion::functions_aggregate::sum::Sum; +use datafusion::logical_expr::function::AccumulatorArgs; +use datafusion::logical_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature}; +use datafusion_ffi::udaf::FFI_AggregateUDF; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::any::Any; +use std::sync::Arc; + +#[pyclass(name = "MySumUDF", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct MySumUDF { + inner: Arc, +} + +#[pymethods] +impl MySumUDF { + #[new] + fn new() -> Self { + Self { + inner: Arc::new(Sum::new()), + } + } + + fn __datafusion_aggregate_udf__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_aggregate_udf".into(); + + let func = Arc::new(AggregateUDF::from(self.clone())); + let provider = FFI_AggregateUDF::from(func); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl AggregateUDFImpl for MySumUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "my_custom_sum" + } + + fn signature(&self) -> &Signature { + self.inner.signature() + } + + fn return_type(&self, arg_types: &[DataType]) -> DataFusionResult { + self.inner.return_type(arg_types) + } + + fn accumulator(&self, acc_args: AccumulatorArgs) -> DataFusionResult> { + self.inner.accumulator(acc_args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> DataFusionResult> { + self.inner.coerce_types(arg_types) + } +} diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs index 54e61cf3e..cd2616916 100644 --- a/examples/datafusion-ffi-example/src/catalog_provider.rs +++ b/examples/datafusion-ffi-example/src/catalog_provider.rs @@ -24,7 +24,6 @@ use datafusion::{ catalog::{ CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, TableProvider, }, - common::exec_err, datasource::MemTable, error::{DataFusionError, Result}, }; diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs index 3a4cf2247..79af276fd 100644 --- a/examples/datafusion-ffi-example/src/lib.rs +++ b/examples/datafusion-ffi-example/src/lib.rs @@ -16,18 +16,27 @@ // under the License. use crate::catalog_provider::MyCatalogProvider; +use crate::aggregate_udf::MySumUDF; +use crate::scalar_udf::IsNullUDF; use crate::table_function::MyTableFunction; use crate::table_provider::MyTableProvider; +use crate::window_udf::MyRankUDF; use pyo3::prelude::*; pub(crate) mod catalog_provider; +pub(crate) mod aggregate_udf; +pub(crate) mod scalar_udf; pub(crate) mod table_function; pub(crate) mod table_provider; +pub(crate) mod window_udf; #[pymodule] fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/examples/datafusion-ffi-example/src/scalar_udf.rs b/examples/datafusion-ffi-example/src/scalar_udf.rs new file mode 100644 index 000000000..727666638 --- /dev/null +++ b/examples/datafusion-ffi-example/src/scalar_udf.rs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{Array, BooleanArray}; +use arrow_schema::DataType; +use datafusion::common::ScalarValue; +use datafusion::error::Result as DataFusionResult; +use datafusion::logical_expr::{ + ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, + Volatility, +}; +use datafusion_ffi::udf::FFI_ScalarUDF; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::any::Any; +use std::sync::Arc; + +#[pyclass(name = "IsNullUDF", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct IsNullUDF { + signature: Signature, +} + +#[pymethods] +impl IsNullUDF { + #[new] + fn new() -> Self { + Self { + signature: Signature::new(TypeSignature::Any(1), Volatility::Immutable), + } + } + + fn __datafusion_scalar_udf__<'py>(&self, py: Python<'py>) -> PyResult> { + let name = cr"datafusion_scalar_udf".into(); + + let func = Arc::new(ScalarUDF::from(self.clone())); + let provider = FFI_ScalarUDF::from(func); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl ScalarUDFImpl for IsNullUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "my_custom_is_null" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> DataFusionResult { + Ok(DataType::Boolean) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> DataFusionResult { + let input = &args.args[0]; + + Ok(match input { + ColumnarValue::Array(arr) => match arr.is_nullable() { + true => { + let nulls = arr.nulls().unwrap(); + let nulls = BooleanArray::from_iter(nulls.iter().map(|x| Some(!x))); + ColumnarValue::Array(Arc::new(nulls)) + } + false => ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))), + }, + ColumnarValue::Scalar(sv) => { + ColumnarValue::Scalar(ScalarValue::Boolean(Some(sv == &ScalarValue::Null))) + } + }) + } +} diff --git a/examples/datafusion-ffi-example/src/window_udf.rs b/examples/datafusion-ffi-example/src/window_udf.rs new file mode 100644 index 000000000..e0d397956 --- /dev/null +++ b/examples/datafusion-ffi-example/src/window_udf.rs @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_schema::{DataType, FieldRef}; +use datafusion::error::Result as DataFusionResult; +use datafusion::functions_window::rank::rank_udwf; +use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; +use datafusion::logical_expr::{PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl}; +use datafusion_ffi::udwf::FFI_WindowUDF; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; +use std::any::Any; +use std::sync::Arc; + +#[pyclass(name = "MyRankUDF", module = "datafusion_ffi_example", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct MyRankUDF { + inner: Arc, +} + +#[pymethods] +impl MyRankUDF { + #[new] + fn new() -> Self { + Self { inner: rank_udwf() } + } + + fn __datafusion_window_udf__<'py>(&self, py: Python<'py>) -> PyResult> { + let name = cr"datafusion_window_udf".into(); + + let func = Arc::new(WindowUDF::from(self.clone())); + let provider = FFI_WindowUDF::from(func); + + PyCapsule::new(py, provider, Some(name)) + } +} + +impl WindowUDFImpl for MyRankUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "my_custom_rank" + } + + fn signature(&self) -> &Signature { + self.inner.signature() + } + + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> DataFusionResult> { + self.inner + .inner() + .partition_evaluator(partition_evaluator_args) + } + + fn field(&self, field_args: WindowUDFFieldArgs) -> DataFusionResult { + self.inner.inner().field(field_args) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> DataFusionResult> { + self.inner.coerce_types(arg_types) + } +} diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index dd634c7fb..bd686acbb 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -22,7 +22,7 @@ import functools from abc import ABCMeta, abstractmethod from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, TypeVar, overload import pyarrow as pa @@ -77,6 +77,12 @@ def __str__(self) -> str: return self.name.lower() +class ScalarUDFExportable(Protocol): + """Type hint for object that has __datafusion_scalar_udf__ PyCapsule.""" + + def __datafusion_scalar_udf__(self) -> object: ... # noqa: D105 + + class ScalarUDF: """Class for performing scalar user-defined functions (UDF). @@ -96,6 +102,9 @@ def __init__( See helper method :py:func:`udf` for argument details. """ + if hasattr(func, "__datafusion_scalar_udf__"): + self._udf = df_internal.ScalarUDF.from_pycapsule(func) + return if isinstance(input_types, pa.DataType): input_types = [input_types] self._udf = df_internal.ScalarUDF( @@ -134,6 +143,10 @@ def udf( name: Optional[str] = None, ) -> ScalarUDF: ... + @overload + @staticmethod + def udf(func: ScalarUDFExportable) -> ScalarUDF: ... + @staticmethod def udf(*args: Any, **kwargs: Any): # noqa: D417 """Create a new User-Defined Function (UDF). @@ -147,7 +160,10 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417 Args: func (Callable, optional): Only needed when calling as a function. - Skip this argument when using ``udf`` as a decorator. + Skip this argument when using `udf` as a decorator. If you have a Rust + backed ScalarUDF within a PyCapsule, you can pass this parameter + and ignore the rest. They will be determined directly from the + underlying function. See the online documentation for more information. input_types (list[pa.DataType]): The data types of the arguments to ``func``. This list must be of the same length as the number of arguments. @@ -215,12 +231,31 @@ def wrapper(*args: Any, **kwargs: Any): return decorator + if hasattr(args[0], "__datafusion_scalar_udf__"): + return ScalarUDF.from_pycapsule(args[0]) + if args and callable(args[0]): # Case 1: Used as a function, require the first parameter to be callable return _function(*args, **kwargs) # Case 2: Used as a decorator with parameters return _decorator(*args, **kwargs) + @staticmethod + def from_pycapsule(func: ScalarUDFExportable) -> ScalarUDF: + """Create a Scalar UDF from ScalarUDF PyCapsule object. + + This function will instantiate a Scalar UDF that uses a DataFusion + ScalarUDF that is exported via the FFI bindings. + """ + name = str(func.__class__) + return ScalarUDF( + name=name, + func=func, + input_types=None, + return_type=None, + volatility=None, + ) + class Accumulator(metaclass=ABCMeta): """Defines how an :py:class:`AggregateUDF` accumulates values.""" @@ -242,6 +277,12 @@ def evaluate(self) -> pa.Scalar: """Return the resultant value.""" +class AggregateUDFExportable(Protocol): + """Type hint for object that has __datafusion_aggregate_udf__ PyCapsule.""" + + def __datafusion_aggregate_udf__(self) -> object: ... # noqa: D105 + + class AggregateUDF: """Class for performing scalar user-defined functions (UDF). @@ -263,6 +304,9 @@ def __init__( See :py:func:`udaf` for a convenience function and argument descriptions. """ + if hasattr(accumulator, "__datafusion_aggregate_udf__"): + self._udaf = df_internal.AggregateUDF.from_pycapsule(accumulator) + return self._udaf = df_internal.AggregateUDF( name, accumulator, @@ -307,7 +351,7 @@ def udaf( ) -> AggregateUDF: ... @staticmethod - def udaf(*args: Any, **kwargs: Any): # noqa: D417 + def udaf(*args: Any, **kwargs: Any): # noqa: D417, C901 """Create a new User-Defined Aggregate Function (UDAF). This class allows you to define an aggregate function that can be used in @@ -364,6 +408,10 @@ def udf4() -> Summarize: Args: accum: The accumulator python function. Only needed when calling as a function. Skip this argument when using ``udaf`` as a decorator. + If you have a Rust backed AggregateUDF within a PyCapsule, you can + pass this parameter and ignore the rest. They will be determined + directly from the underlying function. See the online documentation + for more information. input_types: The data types of the arguments to ``accum``. return_type: The data type of the return value. state_type: The data types of the intermediate accumulation. @@ -422,12 +470,32 @@ def wrapper(*args: Any, **kwargs: Any) -> Expr: return decorator + if hasattr(args[0], "__datafusion_aggregate_udf__"): + return AggregateUDF.from_pycapsule(args[0]) + if args and callable(args[0]): # Case 1: Used as a function, require the first parameter to be callable return _function(*args, **kwargs) # Case 2: Used as a decorator with parameters return _decorator(*args, **kwargs) + @staticmethod + def from_pycapsule(func: AggregateUDFExportable) -> AggregateUDF: + """Create an Aggregate UDF from AggregateUDF PyCapsule object. + + This function will instantiate a Aggregate UDF that uses a DataFusion + AggregateUDF that is exported via the FFI bindings. + """ + name = str(func.__class__) + return AggregateUDF( + name=name, + accumulator=func, + input_types=None, + return_type=None, + state_type=None, + volatility=None, + ) + class WindowEvaluator: """Evaluator class for user-defined window functions (UDWF). @@ -588,6 +656,12 @@ def include_rank(self) -> bool: return False +class WindowUDFExportable(Protocol): + """Type hint for object that has __datafusion_window_udf__ PyCapsule.""" + + def __datafusion_window_udf__(self) -> object: ... # noqa: D105 + + class WindowUDF: """Class for performing window user-defined functions (UDF). @@ -608,6 +682,9 @@ def __init__( See :py:func:`udwf` for a convenience function and argument descriptions. """ + if hasattr(func, "__datafusion_window_udf__"): + self._udwf = df_internal.WindowUDF.from_pycapsule(func) + return self._udwf = df_internal.WindowUDF( name, func, input_types, return_type, str(volatility) ) @@ -683,7 +760,10 @@ def biased_numbers() -> BiasedNumbers: Args: func: Only needed when calling as a function. Skip this argument when - using ``udwf`` as a decorator. + using ``udwf`` as a decorator. If you have a Rust backed WindowUDF + within a PyCapsule, you can pass this parameter and ignore the rest. + They will be determined directly from the underlying function. See + the online documentation for more information. input_types: The data types of the arguments. return_type: The data type of the return value. volatility: See :py:class:`Volatility` for allowed values. @@ -692,6 +772,9 @@ def biased_numbers() -> BiasedNumbers: Returns: A user-defined window function that can be used in window function calls. """ + if hasattr(args[0], "__datafusion_window_udf__"): + return WindowUDF.from_pycapsule(args[0]) + if args and callable(args[0]): # Case 1: Used as a function, require the first parameter to be callable return WindowUDF._create_window_udf(*args, **kwargs) @@ -759,6 +842,22 @@ def wrapper(*args: Any, **kwargs: Any) -> Expr: return decorator + @staticmethod + def from_pycapsule(func: WindowUDFExportable) -> WindowUDF: + """Create a Window UDF from WindowUDF PyCapsule object. + + This function will instantiate a Window UDF that uses a DataFusion + WindowUDF that is exported via the FFI bindings. + """ + name = str(func.__class__) + return WindowUDF( + name=name, + func=func, + input_types=None, + return_type=None, + volatility=None, + ) + class TableFunction: """Class for performing user-defined table functions (UDTF). diff --git a/src/functions.rs b/src/functions.rs index b40500b8b..eeef48385 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -682,7 +682,7 @@ pub fn approx_percentile_cont_with_weight( add_builder_fns_to_aggregate(agg_fn, None, filter, None, None) } -// We handle first_value explicitly because the signature expects an order_by +// We handle last_value explicitly because the signature expects an order_by // https://github.com/apache/datafusion/issues/12376 #[pyfunction] #[pyo3(signature = (expr, distinct=None, filter=None, order_by=None, null_treatment=None))] diff --git a/src/udaf.rs b/src/udaf.rs index 34a9cd51d..78f4e2b0c 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -19,6 +19,10 @@ use std::sync::Arc; use pyo3::{prelude::*, types::PyTuple}; +use crate::common::data_type::PyScalarValue; +use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; +use crate::expr::PyExpr; +use crate::utils::{parse_volatility, validate_pycapsule}; use datafusion::arrow::array::{Array, ArrayRef}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; @@ -27,11 +31,8 @@ use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::{ create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, }; - -use crate::common::data_type::PyScalarValue; -use crate::errors::to_datafusion_err; -use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use datafusion_ffi::udaf::{FFI_AggregateUDF, ForeignAggregateUDF}; +use pyo3::types::PyCapsule; #[derive(Debug)] struct RustAccumulator { @@ -183,6 +184,26 @@ impl PyAggregateUDF { Ok(Self { function }) } + #[staticmethod] + pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { + if func.hasattr("__datafusion_aggregate_udf__")? { + let capsule = func.getattr("__datafusion_aggregate_udf__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_aggregate_udf")?; + + let udaf = unsafe { capsule.reference::() }; + let udaf: ForeignAggregateUDF = udaf.try_into()?; + + Ok(Self { + function: udaf.into(), + }) + } else { + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_aggregate_udf__ does not exist on AggregateUDF object.".to_string(), + )) + } + } + /// creates a new PyExpr with the call of the udf #[pyo3(signature = (*args))] fn __call__(&self, args: Vec) -> PyResult { diff --git a/src/udf.rs b/src/udf.rs index 574c9d7b5..de1e3f18c 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use datafusion_ffi::udf::{FFI_ScalarUDF, ForeignScalarUDF}; +use pyo3::types::PyCapsule; use pyo3::{prelude::*, types::PyTuple}; use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; @@ -29,8 +31,9 @@ use datafusion::logical_expr::ScalarUDF; use datafusion::logical_expr::{create_udf, ColumnarValue}; use crate::errors::to_datafusion_err; +use crate::errors::{py_datafusion_err, PyDataFusionResult}; use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use crate::utils::{parse_volatility, validate_pycapsule}; /// Create a Rust callable function from a python function that expects pyarrow arrays fn pyarrow_function_to_rust( @@ -105,6 +108,26 @@ impl PyScalarUDF { Ok(Self { function }) } + #[staticmethod] + pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { + if func.hasattr("__datafusion_scalar_udf__")? { + let capsule = func.getattr("__datafusion_scalar_udf__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_scalar_udf")?; + + let udf = unsafe { capsule.reference::() }; + let udf: ForeignScalarUDF = udf.try_into()?; + + Ok(Self { + function: udf.into(), + }) + } else { + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_scalar_udf__ does not exist on ScalarUDF object.".to_string(), + )) + } + } + /// creates a new PyExpr with the call of the udf #[pyo3(signature = (*args))] fn __call__(&self, args: Vec) -> PyResult { diff --git a/src/udwf.rs b/src/udwf.rs index a0c8cc59a..4fb98916b 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -27,16 +27,17 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use crate::common::data_type::PyScalarValue; -use crate::errors::to_datafusion_err; +use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; use crate::expr::PyExpr; -use crate::utils::parse_volatility; +use crate::utils::{parse_volatility, validate_pycapsule}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::{ PartitionEvaluator, PartitionEvaluatorFactory, Signature, Volatility, WindowUDF, WindowUDFImpl, }; -use pyo3::types::{PyList, PyTuple}; +use datafusion_ffi::udwf::{FFI_WindowUDF, ForeignWindowUDF}; +use pyo3::types::{PyCapsule, PyList, PyTuple}; #[derive(Debug)] struct RustPartitionEvaluator { @@ -245,6 +246,26 @@ impl PyWindowUDF { Ok(self.function.call(args).into()) } + #[staticmethod] + pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { + if func.hasattr("__datafusion_window_udf__")? { + let capsule = func.getattr("__datafusion_window_udf__")?.call0()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_window_udf")?; + + let udwf = unsafe { capsule.reference::() }; + let udwf: ForeignWindowUDF = udwf.try_into()?; + + Ok(Self { + function: udwf.into(), + }) + } else { + Err(crate::errors::PyDataFusionError::Common( + "__datafusion_window_udf__ does not exist on WindowUDF object.".to_string(), + )) + } + } + fn __repr__(&self) -> PyResult { Ok(format!("WindowUDF({})", self.function.name())) } From 2e1b71369eefc97c22b82be84bbabb414f748fb9 Mon Sep 17 00:00:00 2001 From: kosiew Date: Fri, 4 Jul 2025 20:36:05 +0800 Subject: [PATCH 061/206] refactor: style loading logic in DataFrameHtmlFormatter (#1177) --- python/datafusion/dataframe_formatter.py | 117 ++++++++--------------- python/tests/test_dataframe.py | 60 +++++------- 2 files changed, 63 insertions(+), 114 deletions(-) diff --git a/python/datafusion/dataframe_formatter.py b/python/datafusion/dataframe_formatter.py index 27f00f9c3..2323224b8 100644 --- a/python/datafusion/dataframe_formatter.py +++ b/python/datafusion/dataframe_formatter.py @@ -135,9 +135,6 @@ class DataFrameHtmlFormatter: session """ - # Class variable to track if styles have been loaded in the notebook - _styles_loaded = False - def __init__( self, max_cell_length: int = 25, @@ -260,23 +257,6 @@ def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None: """ self._custom_header_builder = builder - @classmethod - def is_styles_loaded(cls) -> bool: - """Check if HTML styles have been loaded in the current session. - - This method is primarily intended for debugging UI rendering issues - related to style loading. - - Returns: - True if styles have been loaded, False otherwise - - Example: - >>> from datafusion.dataframe_formatter import DataFrameHtmlFormatter - >>> DataFrameHtmlFormatter.is_styles_loaded() - False - """ - return cls._styles_loaded - def format_html( self, batches: list, @@ -315,18 +295,7 @@ def format_html( # Build HTML components html = [] - # Only include styles and scripts if: - # 1. Not using shared styles, OR - # 2. Using shared styles but they haven't been loaded yet - include_styles = ( - not self.use_shared_styles or not DataFrameHtmlFormatter._styles_loaded - ) - - if include_styles: - html.extend(self._build_html_header()) - # If we're using shared styles, mark them as loaded - if self.use_shared_styles: - DataFrameHtmlFormatter._styles_loaded = True + html.extend(self._build_html_header()) html.extend(self._build_table_container_start()) @@ -338,7 +307,7 @@ def format_html( html.append("") # Add footer (JavaScript and messages) - if include_styles and self.enable_cell_expansion: + if self.enable_cell_expansion: html.append(self._get_javascript()) # Always add truncation message if needed (independent of styles) @@ -375,14 +344,20 @@ def format_str( def _build_html_header(self) -> list[str]: """Build the HTML header with CSS styles.""" - html = [] - html.append("") + html.append(f"") return html def _build_table_container_start(self) -> list[str]: @@ -570,28 +545,31 @@ def _get_default_css(self) -> str: def _get_javascript(self) -> str: """Get JavaScript code for interactive elements.""" return """ - - """ + +""" class FormatterManager: @@ -712,24 +690,9 @@ def reset_formatter() -> None: >>> reset_formatter() # Reset formatter to default settings """ formatter = DataFrameHtmlFormatter() - # Reset the styles_loaded flag to ensure styles will be reloaded - DataFrameHtmlFormatter._styles_loaded = False set_formatter(formatter) -def reset_styles_loaded_state() -> None: - """Reset the styles loaded state to force reloading of styles. - - This can be useful when switching between notebook sessions or - when styles need to be refreshed. - - Example: - >>> from datafusion.html_formatter import reset_styles_loaded_state - >>> reset_styles_loaded_state() # Force styles to reload in next render - """ - DataFrameHtmlFormatter._styles_loaded = False - - def _refresh_formatter_reference() -> None: """Refresh formatter reference in any modules using it. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index c9ae38d8e..a3870ead8 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -42,7 +42,6 @@ configure_formatter, get_formatter, reset_formatter, - reset_styles_loaded_state, ) from datafusion.expr import Window from pyarrow.csv import write_csv @@ -2177,27 +2176,15 @@ def test_html_formatter_shared_styles(df, clean_formatter_state): # First, ensure we're using shared styles configure_formatter(use_shared_styles=True) - # Get HTML output for first table - should include styles html_first = df._repr_html_() - - # Verify styles are included in first render - assert "
" - f"{field.name}
" - f"
" - "" - "" - f"{formatted_value}" - f"" - f"
" - f"
{formatted_value}