pennmem · Riley16 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,86 @@
+name: tests
+
+on:
+  push:
+    branches: [main, modernize-package]
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  test:
+    name: py${{ matrix.python-version }} / numpy${{ matrix.numpy }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # pybeh_pd is pure-Python and supports both numpy majors. numpy 1.x has
+        # no wheels for Python 3.13, so that cell is excluded.
+        python-version: ["3.11", "3.12", "3.13"]
+        numpy: ["1", "2"]
+        exclude:
+          - python-version: "3.13"
+            numpy: "1"
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install package + test deps
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e ".[test]"
+
+      - name: Pin numpy major
+        run: |
+          if [ "${{ matrix.numpy }}" = "1" ]; then
+            python -m pip install "numpy<2"
+          else
+            python -m pip install "numpy>=2"
+          fi
+
+      - name: Show resolved versions
+        run: python -c "import numpy, pandas, scipy; print('numpy', numpy.__version__, '| pandas', pandas.__version__, '| scipy', scipy.__version__)"
+
+      - name: Run test suite
+        run: pytest -q --cov=pybeh_pd --cov-report=term-missing
+
+  lint:
+    name: pyright + ruff (pyflakes)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install package + lint tools
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e . pyright ruff
+      - name: pyright (0 errors required)
+        run: pyright --project .
+      - name: ruff pyflakes (F)
+        run: ruff check --select F src/pybeh_pd
+
+  build:
+    name: build wheel + sdist
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Build distributions
+        run: |
+          python -m pip install --upgrade pip build
+          python -m build
+      - name: Smoke-install the wheel and import
+        run: |
+          python -m pip install dist/*.whl
+          python -c "import pybeh_pd as pb; print('OK', len([n for n in dir(pb) if not n.startswith('_')]), 'public names')"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/*
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,43 @@
+# Python bytecode
 __pycache__/
 **/__pycache__/
 *.pyc
+
+# Build / packaging artifacts
+build/
+dist/
+*.egg-info/
+*.egg
+*.so
+.eggs/
+wheelhouse/
+
+# Test / coverage artifacts
+.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+coverage.xml
+.cache/
+
+# Environments / tooling
+.pixi/
+.venv/
+venv/
+.env
+
+# Editor / OS cruft
+.DS_Store
+**/.DS_Store
+.idea/
+.vscode/
+
+# Renovation reports & scratch (uncommitted, human-facing)
+RENOVATION_TODO.md
+bug_report.md
+potential_dead_code.md
+documentation_report.md
+*_report.md
+
+# Claude working files (never committed, per user preference)
+.claude/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,62 @@
+# Changelog
+
+All notable changes to `pybeh_pd` are documented here. This project adheres to
+[Semantic Versioning](https://semver.org/).
+
+## 0.1.0 — 2026-06-10
+
+First packaged release. The package was modernized from a single-file,
+`sys.path.append`-style module into a clean, tested, pip-installable package
+**without changing any observable analysis behavior** (a golden-master regression
+suite locks the numerical output).
+
+### Packaging & build
+- Converted to a `src/` layout package: `pybeh_pd/{__init__, _core, _pybeh}.py`.
+  The vendored pybeh subset (formerly `pybeh_copy.py`) is now the internal
+  `pybeh_pd._pybeh`, imported via a proper intra-package import (no more reliance
+  on the repo root being on `sys.path`).
+- Added a `pyproject.toml` (hatchling backend); installable via
+  `pip install .` / `pip install -e .` / `pip install git+https://…`. Builds clean
+  wheel + sdist.
+- Single-sourced `__version__` (read by hatchling).
+- Added a `pixi.toml` reproducible dev environment (Python 3.11/3.12/3.13) with
+  `build` / `test` / `test-cov` tasks.
+- Moved example notebooks and `LoftusMasson.m` into `examples/` (excluded from the
+  wheel); removed hardcoded `sys.path.append` lines from the notebooks.
+- Added an MIT `LICENSE`.
+
+### Compatibility
+- Supports Python 3.11–3.13 and both numpy 1.x and 2.x (pandas 2.x/3.x). Verified
+  green on numpy 1.24/1.26/2.4 across pandas 2.3/3.0.
+
+### Tests & CI
+- Added a golden-master **behavior-lock** regression suite plus comprehensive
+  per-function unit tests (hand-coded values, mathematical properties, edge
+  cases) covering all public functions.
+- Added real-data integration tests on a committed 5-session ltpFR2 sample (no
+  rhino/cmlreaders needed at test time), reproducing the temporal contiguity
+  effect and temporal clustering factor.
+- Added GitHub Actions: test matrix (Python 3.11–3.13 × numpy 1/2), wheel/sdist
+  build, and a pyright + ruff lint job.
+
+### Code quality
+- Added pyright type annotations across the package (0 errors / 0 warnings,
+  basic mode); added `pyrightconfig.json`.
+- Removed pyflakes-flagged dead imports and unused locals (behavior-neutral).
+
+### Known bugs (discovered, NOT fixed in this release)
+These were found during modernization and intentionally left unchanged so the
+behavior baseline stays clean; they are pinned by tests and will be fixed in a
+follow-up against the regression suite:
+- **B1** `loftus_masson_unequal_variance_kahana` references an undefined `mat`
+  (raises on every call).
+- **B2** `dist_fact` calls `warnings.warn` without importing `warnings`.
+- **B3** `pd_dist_fact_list`'s `if count == np.nan` guard never fires.
+- **B4** `pd_min_temp_fact` with the default `max_n_reps=1` raises `IndexError`.
+- **B5** the percentile-rank helpers crash on a Python-scalar `actual` under
+  numpy 2.
+- **B6** operator-precedence bug misclassifies NaN-padded recalls as intrusions
+  in `make_recalls_matrix` / `make_poss_recalls_matrix`.
+- **B7** `cousineau` with a single condition raises `ZeroDivisionError`.
+- **B8** the documented `groupby('subject').apply(pd_crp, …)` idiom breaks on
+  pandas 3 (the grouping column is dropped); use an explicit per-subject loop.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Computational Memory Lab, University of Pennsylvania
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -1,11 +1,103 @@
 # pybeh_pd
-
-A lightweight wrapper around pybeh for easy analysis and plotting of free recall data with pandas and seaborn.
-See the attached notebooks for examples of how to use this module and psifr test for comparison to a related package.
 
-It is just a module not yet a package so import by adding the path, i.e.
+A lightweight, pure-Python wrapper around [pybeh](https://github.com/pennmem/pybeh)
+for analyzing and plotting **free-recall** behavioral data with pandas and seaborn.
+It re-expresses pybeh's matrix-based analyses as functions that take **tidy,
+long-format pandas DataFrames** of presentation/recall events, and adds
+within-subject confidence-interval helpers for plotting.
+
+A minimal subset of `pybeh` is vendored (`pybeh_pd._pybeh`), so **no separate
+`pybeh` install is required**.
+
+## What it provides
+
+Given a long-format events DataFrame (one row per presented/recalled item), the
+`pd_*` functions compute, per subject:
+
+- **`pd_crp`** — lag conditional response probability (lag-CRP; the temporal
+  contiguity effect).
+- **`pd_temp_fact`** — temporal clustering factor (Polyn, Norman & Kahana, 2009).
+- **`pd_sem_crp`** — semantic CRP, binned by a similarity space.
+- **`pd_dist_fact`** — distance/similarity clustering factor.
+- **`pd_min_crp` / `pd_min_temp_fact`** — repeated-presentation-aware variants.
+- **`pd_sem_crp_list`, `pd_dist_fact_list`, and `*_sub`** — per-list / per-subject
+  aggregating variants.
+- **CI helpers** for within-subject error bars: `cousineau`
+  (Cousineau–Morey–O'Brien), `loftus_masson_analytic`, and the Loftus–Masson
+  Kahana ports.
+
+The underlying matrix builders (`make_recalls_matrix`, `get_all_matrices`, ...)
+and pybeh primitives (`crp`, `temp_fact`, ...) are also exposed.
+
+## Install
+
+Pure Python — install from source with pip:
+
+```bash
+pip install .                                   # from a clone
+pip install git+https://github.com/pennmem/pybeh_pd.git   # from GitHub
+pip install -e .                                # editable (for development)
 ```
-import sys
-sys.path.append('~/pybeh_pd')
+
+Requires Python 3.11–3.13 and numpy / pandas / scipy (installed automatically).
+
+## Quickstart
+
+```python
+import pandas as pd
 import pybeh_pd as pb
-```
+
+# One subject, two 4-item lists. WORD rows are presentations; REC_WORD rows are
+# recalls. `itemno` is the item id; recalls reference the presented item numbers.
+events = pd.DataFrame({
+    "subject": "subj1", "session": 0,
+    "list":   [0, 0, 0, 0,  0, 0, 0,    1, 1, 1, 1,  1, 1, 1],
+    "type":   ["WORD"] * 4 + ["REC_WORD"] * 3 + ["WORD"] * 4 + ["REC_WORD"] * 3,
+    "itemno": [11, 12, 13, 14,  12, 13, 14,   21, 22, 23, 24,  23, 22, 21],
+})
+
+# Expects one subject's events; group by subject for many-subject data:
+#   events.groupby("subject").apply(pb.pd_crp, itemno_column="itemno")
+crp = pb.pd_crp(events, lag_num=3)
+print(crp[["lag", "prob"]])
+#   lag 0 is NaN; forward (+1) and backward (-1) adjacent transitions dominate.
+
+print("temporal clustering factor:", pb.pd_temp_fact(events))
+```
+
+The default column names are `subject` / `session` / `list` (the trial index),
+`type` (with `"WORD"` presentations and `"REC_WORD"` recalls), and `itemno`; all
+are overridable via keyword arguments (e.g. `itemno_column="item_num"`).
+
+See [`examples/`](examples/) for notebooks demonstrating FR1, catFR1, repFR1, and
+the Loftus–Masson confidence intervals on real datasets.
+
+## Development
+
+This repo uses [pixi](https://pixi.sh) for a reproducible dev environment:
+
+```bash
+pixi run build      # confirm the package imports
+pixi run test       # run the test suite
+pixi run test-cov   # with coverage
+```
+
+Or with plain pip/pytest:
+
+```bash
+pip install -e ".[test]"
+pytest
+```
+
+The test suite includes a golden-master **behavior-lock** regression suite,
+per-function unit tests, and integration tests on a committed 5-session sample of
+real ltpFR2 free-recall data (no external data access required).
+
+## Continuous integration
+
+GitHub Actions runs the suite across Python 3.11–3.13 and both numpy 1.x and 2.x
+(pandas 2.x/3.x), plus a wheel/sdist build and a pyright + ruff lint job.
+
+## License
+
+MIT.
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,15 @@
+"""Pytest configuration.
+
+Puts the ``src/`` layout package and the ``tests/`` helper dir on ``sys.path`` so
+``import pybeh_pd`` and ``from factories import ...`` / ``from real_data import
+...`` resolve during testing without requiring an install. When the package is
+also editable-installed (the pixi envs), the import resolves to the same ``src/``
+tree, so there is no conflict.
+"""
+import os
+import sys
+
+_ROOT = os.path.dirname(os.path.abspath(__file__))
+for _p in (os.path.join(_ROOT, "src"), os.path.join(_ROOT, "tests")):
+    if _p not in sys.path:
+        sys.path.insert(0, _p)
diff --git a/FR1_analyses.ipynb → examples/FR1_analyses.ipynb b/FR1_analyses.ipynb → examples/FR1_analyses.ipynb
@@ -13,7 +13,6 @@
     "import seaborn as sns\n",
     "import pandas as pd\n",
     "import sys\n",
-    "sys.path.append('/home1/djhalp/pybeh_pd')\n",
     "import pybeh_pd as pb"
    ]
   },

diff --git a/LoftusMasson.m → examples/LoftusMasson.m b/LoftusMasson.m → examples/LoftusMasson.m
diff --git a/catFR1_analyses.ipynb → examples/catFR1_analyses.ipynb b/catFR1_analyses.ipynb → examples/catFR1_analyses.ipynb
@@ -13,7 +13,6 @@
     "import seaborn as sns\n",
     "import pandas as pd\n",
     "import sys\n",
-    "sys.path.append('/home1/djhalp/pybeh_pd')\n",
     "import pybeh_pd as pb"
    ]
   },

diff --git a/loftus_masson_example.ipynb → examples/loftus_masson_example.ipynb b/loftus_masson_example.ipynb → examples/loftus_masson_example.ipynb
@@ -12,7 +12,6 @@
     "from scipy import stats\n",
     "import pandas as pd\n",
     "import sys\n",
-    "sys.path.append('/home1/djhalp/pybeh_pd')\n",
     "import pybeh_pd as pb\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline"

diff --git a/psifr_test.ipynb → examples/psifr_test.ipynb b/psifr_test.ipynb → examples/psifr_test.ipynb
diff --git a/repFR1_analyses.ipynb → examples/repFR1_analyses.ipynb b/repFR1_analyses.ipynb → examples/repFR1_analyses.ipynb
@@ -24,7 +24,6 @@
     "import seaborn as sns\n",
     "import pandas as pd\n",
     "import sys\n",
-    "sys.path.append('~/pybeh_pd')\n",
     "import pybeh_pd as pb\n",
     "import importlib\n",
     "importlib.reload(pb)"