diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..2608b45 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c006475..8f9ff42 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,11 +9,11 @@ jobs: deploy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v6 with: - python-version: '3.7' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 1916809..a6fad55 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -14,16 +14,16 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] pandas-presence: ['with_pandas', 'without_pandas'] env: PYTHON_VERSION: ${{ matrix.python-version }} PANDAS_PRESENCE: ${{ matrix.pandas-presence }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} id: gha-python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} continue-on-error: true @@ -50,8 +50,8 @@ jobs: PYTHON_ENV="py$(echo $PYTHON_VERSION | sed 's/\.//;s/\-dev//')" tox -e "${PYTHON_ENV}-${PANDAS_PRESENCE}" - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1.0.10 + uses: codecov/codecov-action@v5 with: - file: ./coverage.xml + files: ./coverage.xml flags: unittests env_vars: PYTHON_VERSION,PANDAS_PRESENCE diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61cb4d6..9a2f844 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,12 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - id: fix-byte-order-marker - id: destroyed-symlinks - - id: fix-encoding-pragma - args: ["--remove"] - id: mixed-line-ending - id: name-tests-test args: ["--pytest-test-first"] @@ -17,7 +15,7 @@ repos: exclude: ".ipynb" - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.7 + rev: v0.15.2 hooks: - id: ruff-format types_or: [ python, pyi, jupyter ] diff --git a/README.md b/README.md index ecd78f3..3be1ec7 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ building design matrices. Patsy brings the convenience of [R](http://www.r-proje ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/patsy.svg) ![https://patsy.readthedocs.io/](https://img.shields.io/badge/docs-read%20now-blue.svg) ![PyPI - Status](https://img.shields.io/pypi/status/patsy.svg) -![https://coveralls.io/r/pydata/patsy?branch=master](https://coveralls.io/repos/pydata/patsy/badge.png?branch=master) +[![https://coveralls.io/r/pydata/patsy?branch=master](https://coveralls.io/repos/pydata/patsy/badge.png?branch=master)](https://coveralls.io/github/pydata/patsy) ![https://doi.org/10.5281/zenodo.592075](https://zenodo.org/badge/DOI/10.5281/zenodo.592075.svg) - **Documentation:** diff --git a/doc/changes.rst b/doc/changes.rst index 6d0a590..878968f 100644 --- a/doc/changes.rst +++ b/doc/changes.rst @@ -8,6 +8,11 @@ All Patsy releases are archived at Zenodo: .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.592075.svg :target: https://doi.org/10.5281/zenodo.592075 +v1.0.2 +------ + +* Fixed compatibility with Pandas 3's new `StringDtype`. + v1.0.1 ------ @@ -82,7 +87,7 @@ New features: characters as valid formula descriptions in the high-level formula API (:func:`dmatrix` and friends). This is intended as a convenience for people using Python 2 with ``from - __future__ import unicode_literals``. (See :ref:`py2-versus-py3`.) + __future__ import unicode_literals``. Bug fixes: diff --git a/doc/conf.py b/doc/conf.py index 2ad5d64..f509ffe 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -75,9 +75,9 @@ # Undocumented trick: if we def setup here in conf.py, it gets called just # like an extension's setup function. def setup(app): - app.add_javascript("show-code.js") - app.add_javascript("facebox.js") - app.add_stylesheet("facebox.css") + app.add_js_file("show-code.js") + app.add_js_file("facebox.js") + app.add_js_file("facebox.css") # Add any paths that contain templates here, relative to this directory. @@ -240,9 +240,9 @@ def setup(app): autoclass_content = "both" intersphinx_mapping = { - "python": ("http://docs.python.org", None), - "numpy": ("http://docs.scipy.org/doc/numpy", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), } -autodoc_member_order = "source" +autodoc_member_order = "bysource" diff --git a/doc/index.rst b/doc/index.rst index f4e591c..8422374 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -4,38 +4,27 @@ contain the root `toctree` directive. patsy - Describing statistical models in Python -=================================================== +=============================================== + +Patsy documentation Contents: .. toctree:: :maxdepth: 2 - overview.rst - - quickstart.rst - - formulas.rst - - categorical-coding.rst - - stateful-transforms.rst - - spline-regression.rst - - expert-model-specification.rst - - library-developers.rst - - R-comparison.rst - - py2-versus-py3.rst - - API-reference.rst - - builtins-reference.rst - - changes.rst + overview + quickstart + formulas + categorical-coding + stateful-transforms + spline-regression + expert-model-specification + library-developers + R-comparison + API-reference + builtins-reference + changes Indices and tables ================== diff --git a/doc/spline-regression.rst b/doc/spline-regression.rst index df0d265..ae50247 100644 --- a/doc/spline-regression.rst +++ b/doc/spline-regression.rst @@ -179,7 +179,7 @@ marginal spline bases patterns can be observed on the x and y contour projection ....: {"x1": x1.ravel(), "x2": x2.ravel(), "df": df}) ....: - In [80]: print y.shape + In [80]: print(y.shape) In [90]: fig = plt.figure() diff --git a/patsy/missing.py b/patsy/missing.py index b4d8a01..0ea262b 100644 --- a/patsy/missing.py +++ b/patsy/missing.py @@ -183,7 +183,7 @@ def _handle_NA_drop(self, values, is_NAs, origins): total_mask |= is_NA good_mask = ~total_mask # "..." to handle 1- versus 2-dim indexing - return [v[good_mask, ...] for v in values] + return [v[good_mask] if v.ndim == 1 else v[good_mask, ...] for v in values] __getstate__ = no_pickling diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 35c86a1..2fad925 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -979,3 +979,21 @@ def test_C_and_pandas_categorical(): assert np.allclose( dmatrix("C(obj, levels=['a', 'b'])", d), [[1, 0], [1, 1], [1, 0]] ) + + +def test_NAActioon_pandas_string_index(): + if not have_pandas: + return + from patsy.missing import NAAction + + formula = "1 + x + z" + action = NAAction("drop") + data = pandas.DataFrame( + {"z": [1.0, np.nan, 2.0], "x": [1, 2, 3]}, index=["a", "b", "c"] + ) + dm = dmatrix(formula, data, 0, NA_action=action, return_type="dataframe") + di = dm.design_info + data2 = pandas.DataFrame({"z": [4.0, 5.0], "x": [6, 7]}) + dm2 = dmatrix(di, data2, 0, return_type="dataframe") + assert np.allclose(dm2, [[1.0, 6.0, 4.0], [1.0, 7.0, 5.0]]) + assert list(dm2.columns) == ["Intercept", "x", "z"] diff --git a/patsy/util.py b/patsy/util.py index 2c1c19d..d666d38 100644 --- a/patsy/util.py +++ b/patsy/util.py @@ -40,14 +40,19 @@ try: import pandas except ImportError: - have_pandas = False + PANDAS3 = have_pandas = False else: have_pandas = True + import packaging.version + + pandas_version = packaging.version.parse(pandas.__version__) + PANDAS3 = pandas_version >= packaging.version.parse("3.0.0.dev0") # Pandas versions < 0.9.0 don't have Categorical # Can drop this guard whenever we drop support for such older versions of # pandas. have_pandas_categorical = have_pandas and hasattr(pandas, "Categorical") +have_pandas_string_dtype = have_pandas and hasattr(pandas, "StringDtype") if not have_pandas: _pandas_is_categorical_dtype = None else: @@ -65,6 +70,11 @@ ) have_pandas_categorical_dtype = _pandas_is_categorical_dtype is not None + +def safe_is_pandas_string_dtype(x): + return have_pandas_string_dtype and isinstance(x, pandas.StringDtype) + + # The handling of the `copy` keyword has been changed since numpy>=2. # https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword # If numpy<2 support is dropped, this try-clause can be removed. @@ -118,7 +128,9 @@ def test_asarray_or_pandas(): assert s_view1.name == "A" assert np.array_equal(s_view1.index, [10, 20, 30]) s_view1[10] = 101 - assert s[10] == 101 + # pandas 3 uses copy-on-write, so no longer valid + if not PANDAS3: + assert s[10] == 101 s_copy = asarray_or_pandas(s, copy=True) assert s_copy.name == "A" assert np.array_equal(s_copy.index, [10, 20, 30]) @@ -130,14 +142,18 @@ def test_asarray_or_pandas(): assert s_view2.name == "A" assert np.array_equal(s_view2.index, [10, 20, 30]) s_view2[10] = 99 - assert s[10] == 99 + # pandas 3 uses copy-on-write, so no longer valid + if not PANDAS3: + assert s[10] == 99 df = pandas.DataFrame([[1, 2, 3]], columns=["A", "B", "C"], index=[10]) df_view1 = asarray_or_pandas(df) df_view1.loc[10, "A"] = 101 assert np.array_equal(df_view1.columns, ["A", "B", "C"]) assert np.array_equal(df_view1.index, [10]) - assert df.loc[10, "A"] == 101 + # pandas 3 uses copy-on-write, so no longer valid + if not PANDAS3: + assert df.loc[10, "A"] == 101 df_copy = asarray_or_pandas(df, copy=True) assert np.array_equal(df_copy, df) assert np.array_equal(df_copy.columns, ["A", "B", "C"]) @@ -799,7 +815,8 @@ def test_safe_is_pandas_categorical(): # https://github.com/pydata/pandas/issues/9581 # https://github.com/pydata/pandas/issues/9581#issuecomment-77099564 def safe_issubdtype(dt1, dt2): - if safe_is_pandas_categorical_dtype(dt1): + # The second condition is needed to support pandas >= 3 (!) + if safe_is_pandas_categorical_dtype(dt1) or safe_is_pandas_string_dtype(dt1): return False return np.issubdtype(dt1, dt2) diff --git a/patsy/version.py b/patsy/version.py index c16a1be..48a94bc 100644 --- a/patsy/version.py +++ b/patsy/version.py @@ -6,15 +6,15 @@ # places -- it is imported by patsy/__init__.py, execfile'd by setup.py, etc. # We use a simple scheme: -# 1.0.1 -> 1.0.1+dev -> 1.1.0 -> 1.1.0+dev +# 1.0.2 -> 1.0.2+dev -> 1.1.0 -> 1.1.0+dev # where the +dev versions are never released into the wild, they're just what # we stick into the VCS in between releases. # # This is compatible with PEP 440: # http://legacy.python.org/dev/peps/pep-0440/ # via the use of the "local suffix" "+dev", which is disallowed on index -# servers and causes 1.0.1+dev to sort after plain 1.0.1, which is what we -# want. (Contrast with the special suffix 1.0.1.dev, which sorts *before* -# 1.0.1.) +# servers and causes 1.0.2+dev to sort after plain 1.0.2, which is what we +# want. (Contrast with the special suffix 1.0.2.dev, which sorts *before* +# 1.0.2.) -__version__ = "1.0.1" +__version__ = "1.0.2" diff --git a/setup.py b/setup.py index 4613449..2f9c77a 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ # Possibly we need an even newer numpy than this, but we definitely # need at least 1.4 for triu_indices "numpy >= 1.4", + "packaging", ], extras_require={ "test": ["pytest", "pytest-cov", "scipy"], @@ -47,6 +48,7 @@ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Scientific/Engineering", ], ) diff --git a/tox.ini b/tox.ini index c2f2e6a..8feb390 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py36,py37,py38,py39,py310,py311,py312,py313}-{with_pandas,without_pandas} +envlist = {py36,py37,py38,py39,py310,py311,py312,py313,py314}-{with_pandas,without_pandas} [gh-actions] python = @@ -11,6 +11,7 @@ python = 3.11: py311 3.12: py312 3.13: py313 + 3.14: py314 [testenv] deps=