From 5296c0cfcf8e6fcb654d5935252469bf04f929e9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 10 Feb 2024 10:23:30 -0700 Subject: [PATCH 001/349] Allow PyDataFrame to be used from other projects (#582) * Allow PyDataFrame to be used from other projects * revert --- src/context.rs | 4 ++-- src/lib.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/context.rs b/src/context.rs index 9053e4f2e..f34fbce81 100644 --- a/src/context.rs +++ b/src/context.rs @@ -842,7 +842,7 @@ impl PySessionContext { } } -fn convert_table_partition_cols( +pub fn convert_table_partition_cols( table_partition_cols: Vec<(String, String)>, ) -> Result, DataFusionError> { table_partition_cols @@ -856,7 +856,7 @@ fn convert_table_partition_cols( .collect::, _>>() } -fn parse_file_compression_type( +pub fn parse_file_compression_type( file_compression_type: Option, ) -> Result { FileCompressionType::from_str(&*file_compression_type.unwrap_or("".to_string()).as_str()) diff --git a/src/lib.rs b/src/lib.rs index 5e57db9cf..49c325a53 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ mod config; #[allow(clippy::borrow_deref_ref)] pub mod context; #[allow(clippy::borrow_deref_ref)] -mod dataframe; +pub mod dataframe; mod dataset; mod dataset_exec; pub mod errors; From 476ca228cfcf75012a73932c4fb1fb53271c1e8a Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:34:04 +0100 Subject: [PATCH 002/349] docs: Add ASF attribution (#580) * Add ASF attribution and update docs README * Fix formatting * Fix unit test * Undo changes in functions & tests and requirements.txt --- docs/README.md | 45 ++++++++++++++++-------- docs/requirements.txt | 2 +- docs/source/_templates/docs-sidebar.html | 2 +- docs/source/_templates/layout.html | 21 +++++++++++ docs/source/conf.py | 6 ++-- 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/docs/README.md b/docs/README.md index 85278588e..8cb101d92 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,15 +28,31 @@ when changes are merged to the main branch. It's recommended to install build dependencies and build the documentation inside a Python `venv`. -```bash -python -m pip install -r requirements-310.txt -``` +To prepare building the documentation run the following on the root level of the project: + +1. Set up virtual environment if it was not already created + ```bash + python3 -m venv venv + ``` +1. Activate virtual environment + ```bash + source venv/bin/activate + ``` +1. Install Datafusion's Python dependencies + ```bash + pip install -r requirements-310.txt + ``` +1. Install documentation dependencies + ```bash + pip install -r docs/requirements.txt + ``` ## Build & Preview Run the provided script to build the HTML pages. ```bash +cd docs ./build.sh ``` @@ -50,14 +66,15 @@ firefox build/html/index.html ## Release Process -The documentation is served through the -[arrow-site](https://github.com/apache/arrow-site/) repo. To release a new -version of the docs, follow these steps: - -1. Run `./build.sh` inside `docs` folder to generate the docs website inside the `build/html` folder. -2. Clone the arrow-site repo -3. Checkout to the `asf-site` branch (NOT `master`) -4. Copy build artifacts into `arrow-site` repo's `datafusion` folder with a command such as - - `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac) - - `rsync -avzr ./build/html/ ../../arrow-site/datafusion/` -5. Commit changes in `arrow-site` and send a PR. +This documentation is hosted at https://arrow.apache.org/datafusion-python/ + +When the PR is merged to the `main` branch of the DataFusion +repository, a [github workflow](https://github.com/apache/arrow-datafusion-python/blob/main/.github/workflows/docs.yaml) which: + +1. Builds the html content +2. Pushes the html content to the [`asf-site`](https://github.com/apache/arrow-datafusion-python/tree/asf-site) branch in this repository. + +The Apache Software Foundation provides https://arrow.apache.org/, +which serves content based on the configuration in +[.asf.yaml](https://github.com/apache/arrow-datafusion-python/blob/main/.asf.yaml), +which specifies the target as https://arrow.apache.org/datafusion-python/. \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index eb7f5b7bb..67f1ec6ac 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -sphinx==5.3.0 +sphinx pydata-sphinx-theme==0.8.0 myst-parser maturin diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html index bc2bf0092..6541b7713 100644 --- a/docs/source/_templates/docs-sidebar.html +++ b/docs/source/_templates/docs-sidebar.html @@ -1,6 +1,6 @@ - +