diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..8c08b87 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,47 @@ +name: pre-commit + +on: + pull_request: + push: + branches: [master] + +jobs: + pre-commit: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + # Need full history so the diff range below can resolve the + # base/before SHA against actual commits, not just HEAD. + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install pre-commit + run: pip install pre-commit + + - name: Install image tooling (jpegoptim, oxipng, svgo) + run: | + sudo apt-get update + sudo apt-get install -y jpegoptim + cargo install oxipng --locked + npm install -g svgo + + - name: Run pre-commit on the PR's changed files + if: github.event_name == 'pull_request' + run: | + pre-commit run --show-diff-on-failure \ + --from-ref ${{ github.event.pull_request.base.sha }} \ + --to-ref ${{ github.event.pull_request.head.sha }} + + - name: Run pre-commit on the push's changed files + if: github.event_name == 'push' + run: | + pre-commit run --show-diff-on-failure \ + --from-ref ${{ github.event.before }} \ + --to-ref ${{ github.sha }} diff --git a/.github/workflows/site-health.yml b/.github/workflows/site-health.yml new file mode 100644 index 0000000..b9cc02d --- /dev/null +++ b/.github/workflows/site-health.yml @@ -0,0 +1,76 @@ +name: site-health + +on: + pull_request: + paths: + - '*.html' + - '*.md' + - '_layouts/**' + - '_includes/**' + - '_config.yml' + - '_data/**' + - '_posts/**' + - 'images/**' + - 'css/**' + - 'js/**' + - 'Gemfile' + - 'Gemfile.lock' + - 'lychee.toml' + - '.html5validator.yaml' + schedule: + # Weekly Monday 9am UTC, matching update-publications cadence. + - cron: '0 9 * * 1' + workflow_dispatch: + +jobs: + site-health: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.3' + bundler-cache: true + + - name: Build site + run: bundle exec jekyll build + + - name: Check links with lychee + id: lychee + uses: lycheeverse/lychee-action@v2 + with: + # --exclude-path takes a regex matched against file paths. + # Skip legacy blog posts (deferred to a content-cleanup phase). + args: >- + --config ./lychee.toml + --no-progress + --root-dir ${{ github.workspace }}/_site + --exclude-path _site/blog + _site + output: lychee-report.md + fail: true + + # On failure, post the lychee report as a sticky PR comment so + # the specific failing URLs are easy to see without digging + # through job logs. Sticky = updates in place rather than piling up. + - name: Comment lychee report on PR + if: failure() && github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: lychee + path: lychee-report.md + + - name: Validate HTML + id: html5validator + # Soft-fail for now: surface errors via the workflow log but + # don't gate the PR on them. The Jekyll site has Bootstrap-3-era + # HTML that needs a separate cleanup pass to be HTML5-spec clean. + continue-on-error: true + uses: Cyb3r-Jak3/html5validator-action@v7.2.0 + with: + root: _site/ + config: .html5validator.yaml diff --git a/.github/workflows/update-publications.yml b/.github/workflows/update-publications.yml index 060c643..83fc043 100644 --- a/.github/workflows/update-publications.yml +++ b/.github/workflows/update-publications.yml @@ -9,32 +9,32 @@ on: jobs: update-publications: runs-on: ubuntu-latest - + steps: - name: Checkout repository uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} - + - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install scholarly pyyaml "httpx==0.27.2" - + - name: Run publication updater run: | python scholar_scraper.py - + - name: Check for changes id: git-check run: | git diff --exit-code _data/publications.yaml || echo "changed=true" >> $GITHUB_OUTPUT - + - name: Commit and push if changed if: steps.git-check.outputs.changed == 'true' run: | @@ -42,4 +42,4 @@ jobs: git config --local user.name "github-actions[bot]" git add _data/publications.yaml git commit -m "Auto-update publications from Google Scholar [skip ci]" - git push \ No newline at end of file + git push diff --git a/.gitignore b/.gitignore index 3c7baf4..a5833ec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ _site/ .sass-cache/ -Gemfile -Gemfile.lock +.lycheecache **/.DS_Store diff --git a/.html5validator.yaml b/.html5validator.yaml new file mode 100644 index 0000000..906d129 --- /dev/null +++ b/.html5validator.yaml @@ -0,0 +1,10 @@ +# html5validator config. See https://github.com/svenkreiss/html5validator. +# Used by .github/workflows/site-health.yml. + +root: _site +match: '*.html' + +# Patterns to ignore. Add Bootstrap-3-era / Jekyll-specific noise here +# as it surfaces; keep the list short so real errors aren't masked. +ignore_re: + [] diff --git a/.image-size-overrides b/.image-size-overrides new file mode 100644 index 0000000..4df66e6 --- /dev/null +++ b/.image-size-overrides @@ -0,0 +1,6 @@ +# Allow-list for images that legitimately exceed the 1 MB size cap. +# One repo-relative path per line. Lines starting with # are comments. +# Example: +# images/research/big-zebrafish-figure.png +# +# Files listed here will not block commits but will still print a warning. diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000..249f7a0 --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,12 @@ +{ + "default": true, + "MD001": false, + "MD013": false, + "MD025": false, + "MD033": false, + "MD034": false, + "MD036": false, + "MD041": false, + "MD045": false, + "MD059": false +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a4b54c8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,58 @@ +# Pre-commit hook configuration. See https://pre-commit.com/ for docs. +# Contributor setup: README.md > Contributing. + +repos: + - repo: local + hooks: + - id: jpegoptim + name: jpegoptim (compress JPEGs to q=85, strip metadata) + entry: jpegoptim --max=85 --strip-all --preserve --all-progressive + language: system + types: [jpeg] + + - id: oxipng + name: oxipng (lossless PNG optimization) + entry: oxipng --opt 4 --strip safe + language: system + types: [png] + + - id: svgo + name: svgo (SVG minification) + entry: svgo --multipass + language: system + types: [svg] + + # Runs after the compression hooks so it sees post-compression sizes. + - id: image-size-cap + name: image-size-cap (warn >500 KB, block >1 MB) + entry: python3 scripts/check_image_size.py + language: system + types_or: [image, svg] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + exclude: '\.svg$' + - id: end-of-file-fixer + # svgo strips the trailing newline; let it win on .svg files. + exclude: '\.svg$' + - id: check-yaml + - id: check-json + - id: check-merge-conflict + - id: mixed-line-ending + args: [--fix=lf] + - id: check-added-large-files + args: [--maxkb=1000] + - id: detect-private-key + + - repo: https://github.com/adrienverge/yamllint + rev: v1.38.0 + hooks: + - id: yamllint + + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.48.0 + hooks: + - id: markdownlint + args: [--fix] diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..69b9175 --- /dev/null +++ b/.yamllint @@ -0,0 +1,15 @@ +# yamllint configuration. See https://yamllint.readthedocs.io/. +# Start from the bundled "relaxed" profile and loosen further for data +# files (where long lines are normal and indent style varies). + +extends: relaxed + +rules: + document-start: disable + line-length: disable + indentation: + spaces: 2 + indent-sequences: whatever + check-multi-line-strings: false + truthy: + check-keys: false diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..e1bc0c3 --- /dev/null +++ b/Gemfile @@ -0,0 +1,5 @@ +source "https://rubygems.org" + +# GitHub Pages pins all gem versions used by the live build, so installing +# this locally gives a reproducible preview that matches production. +gem "github-pages", group: :jekyll_plugins diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..0de80f3 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,313 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (8.1.3) + base64 + bigdecimal + concurrent-ruby (~> 1.0, >= 1.3.1) + connection_pool (>= 2.2.5) + drb + i18n (>= 1.6, < 2) + json + logger (>= 1.4.2) + minitest (>= 5.1) + securerandom (>= 0.3) + tzinfo (~> 2.0, >= 2.0.5) + uri (>= 0.13.1) + addressable (2.9.0) + public_suffix (>= 2.0.2, < 8.0) + base64 (0.3.0) + bigdecimal (4.1.2) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.12.2) + colorator (1.1.0) + commonmarker (0.23.12) + concurrent-ruby (1.3.6) + connection_pool (3.0.2) + csv (3.3.5) + dnsruby (1.73.1) + base64 (>= 0.2) + logger (~> 1.6) + simpleidn (~> 0.2.1) + drb (2.2.3) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + ethon (0.18.0) + ffi (>= 1.15.0) + logger + eventmachine (1.2.7) + execjs (2.10.1) + faraday (2.14.1) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-net_http (3.4.2) + net-http (~> 0.5) + ffi (1.17.4-aarch64-linux-gnu) + ffi (1.17.4-aarch64-linux-musl) + ffi (1.17.4-arm-linux-gnu) + ffi (1.17.4-arm-linux-musl) + ffi (1.17.4-arm64-darwin) + ffi (1.17.4-x86_64-darwin) + ffi (1.17.4-x86_64-linux-gnu) + ffi (1.17.4-x86_64-linux-musl) + forwardable-extended (2.6.0) + gemoji (4.1.0) + github-pages (232) + github-pages-health-check (= 1.18.2) + jekyll (= 3.10.0) + jekyll-avatar (= 0.8.0) + jekyll-coffeescript (= 1.2.2) + jekyll-commonmark-ghpages (= 0.5.1) + jekyll-default-layout (= 0.1.5) + jekyll-feed (= 0.17.0) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.16.1) + jekyll-include-cache (= 0.2.1) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.3) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.8.0) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.2.0) + jekyll-theme-cayman (= 0.2.0) + jekyll-theme-dinky (= 0.2.0) + jekyll-theme-hacker (= 0.2.0) + jekyll-theme-leap-day (= 0.2.0) + jekyll-theme-merlot (= 0.2.0) + jekyll-theme-midnight (= 0.2.0) + jekyll-theme-minimal (= 0.2.0) + jekyll-theme-modernist (= 0.2.0) + jekyll-theme-primer (= 0.6.0) + jekyll-theme-slate (= 0.2.0) + jekyll-theme-tactile (= 0.2.0) + jekyll-theme-time-machine (= 0.2.0) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.13.0) + kramdown (= 2.4.0) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.4) + mercenary (~> 0.3) + minima (= 2.5.1) + nokogiri (>= 1.16.2, < 2.0) + rouge (= 3.30.0) + terminal-table (~> 1.4) + webrick (~> 1.8) + github-pages-health-check (1.18.2) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (>= 4, < 8) + public_suffix (>= 3.0, < 6.0) + typhoeus (~> 1.3) + html-pipeline (2.14.3) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.8.1) + i18n (1.14.8) + concurrent-ruby (~> 1.0) + jekyll (3.10.0) + addressable (~> 2.4) + colorator (~> 1.0) + csv (~> 3.0) + em-websocket (~> 0.5) + i18n (>= 0.7, < 2) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + webrick (>= 1.0) + jekyll-avatar (0.8.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.2.2) + coffee-script (~> 2.2) + coffee-script-source (~> 1.12) + jekyll-commonmark (1.4.0) + commonmarker (~> 0.22) + jekyll-commonmark-ghpages (0.5.1) + commonmarker (>= 0.23.7, < 1.1.0) + jekyll (>= 3.9, < 4.0) + jekyll-commonmark (~> 1.4.0) + rouge (>= 2.0, < 5.0) + jekyll-default-layout (0.1.5) + jekyll (>= 3.0, < 5.0) + jekyll-feed (0.17.0) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.16.1) + jekyll (>= 3.4, < 5.0) + octokit (>= 4, < 7, != 4.4.0) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-mentions (1.6.0) + html-pipeline (~> 2.3) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.3) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.6.0) + jekyll (> 3.5, < 5.0) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + jemoji (0.13.0) + gemoji (>= 3, < 5) + html-pipeline (~> 2.2) + jekyll (>= 3.0, < 5.0) + json (2.19.5) + kramdown (2.4.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.10.0) + logger + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + logger (1.7.0) + mercenary (0.3.6) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (6.0.6) + drb (~> 2.0) + prism (~> 1.5) + net-http (0.9.1) + uri (>= 0.11.1) + nokogiri (1.19.3-aarch64-linux-gnu) + racc (~> 1.4) + nokogiri (1.19.3-aarch64-linux-musl) + racc (~> 1.4) + nokogiri (1.19.3-arm-linux-gnu) + racc (~> 1.4) + nokogiri (1.19.3-arm-linux-musl) + racc (~> 1.4) + nokogiri (1.19.3-arm64-darwin) + racc (~> 1.4) + nokogiri (1.19.3-x86_64-darwin) + racc (~> 1.4) + nokogiri (1.19.3-x86_64-linux-gnu) + racc (~> 1.4) + nokogiri (1.19.3-x86_64-linux-musl) + racc (~> 1.4) + octokit (4.25.1) + faraday (>= 1, < 3) + sawyer (~> 0.9) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + prism (1.9.0) + public_suffix (5.1.1) + racc (1.8.1) + rb-fsevent (0.11.2) + rb-inotify (0.11.1) + ffi (~> 1.0) + rexml (3.4.4) + rouge (3.30.0) + rubyzip (2.4.1) + safe_yaml (1.0.5) + sass (3.7.4) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.9.3) + addressable (>= 2.3.5) + faraday (>= 0.17.3, < 3) + securerandom (0.4.1) + simpleidn (0.2.3) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + typhoeus (1.6.0) + ethon (>= 0.18.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + unicode-display_width (1.8.0) + uri (1.1.1) + webrick (1.9.2) + +PLATFORMS + aarch64-linux-gnu + aarch64-linux-musl + arm-linux-gnu + arm-linux-musl + arm64-darwin + x86_64-darwin + x86_64-linux-gnu + x86_64-linux-musl + +DEPENDENCIES + github-pages + +BUNDLED WITH + 2.5.22 diff --git a/README.md b/README.md index e1fb94b..ba61ca5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # pearsonlab.github.io + Lab webpage ### Notes + In order to render publications into page: 1. Download citations from Google Scholar in .bib format. @@ -9,3 +11,106 @@ In order to render publications into page: 1. `. process_refs` Everything else should just work. + +## Contributing + +### Local preview + +The site is built by Jekyll. To preview changes locally: + +```sh +bundle install +bundle exec jekyll serve --livereload +``` + +Then open http://localhost:4000. + +### Pre-commit hooks + +The repo uses [pre-commit](https://pre-commit.com) to compress images and +catch other small issues before they land. One-time setup: + +```sh +pip install pre-commit +pre-commit install + +# image tooling (one of these per platform): +sudo apt install jpegoptim && cargo install oxipng # Ubuntu/Debian +brew install jpegoptim oxipng # macOS +npm install -g svgo +``` + +After that, every `git commit` will automatically: + +- Compress staged JPEGs to quality 85 with `jpegoptim` +- Losslessly optimize staged PNGs with `oxipng` +- Minify staged SVGs with `svgo` +- Block any image still over 1 MB after compression, and warn on images + between 500 KB and 1 MB (see "Image size policy" below) +- Strip trailing whitespace, fix mixed line endings, ensure files end + with a final newline +- Validate YAML and JSON files (`_data/`, `_config.yml`, etc.) +- Block accidental merge-conflict markers and committed private keys +- Lint Markdown posts with `markdownlint --fix` (auto-fixes most + formatting issues; see `.markdownlint.json` for the disabled rules) +- Style-check YAML files with `yamllint` (config: `.yamllint`) + +If a hook modifies a file, the commit is aborted; re-stage the modified +file and commit again. To run the hooks manually across just your +changes vs. `master`: + +```sh +pre-commit run --from-ref origin/master --to-ref HEAD +``` + +(Avoid `--all-files` — it will pick up the legacy ~26 MB of +unoptimized images and fail. A separate one-shot backfill phase +will normalize those.) + +The same hooks run in CI on every PR — scoped to the PR's changed +files. If you skip the local install, CI will tell you what would +have changed. + +### Per-page SEO and social previews + +The site uses [jekyll-seo-tag](https://github.com/jekyll/jekyll-seo-tag) to +emit OpenGraph and Twitter card meta tags. Defaults come from `_config.yml` +(site title, description, lab logo as fallback image). Pages can override +any of these via front matter: + +```yaml +--- +title: "Pearson Lab Research" +description: "We study ..." +image: /images/research/cover.png +--- +``` + +A `sitemap.xml` is generated automatically at build time by +[jekyll-sitemap](https://github.com/jekyll/jekyll-sitemap). + +### Site health + +A separate CI workflow (`.github/workflows/site-health.yml`) builds the +site and runs: + +- **lychee** to check for broken links (config: `lychee.toml`) +- **html5validator** to check rendered HTML (config: `.html5validator.yaml`) + +This runs on PRs that touch site content, on push to `master`, and once +a week on Mondays. The weekly schedule catches link rot from external +hosts before someone notices on the live site. + +### Image size policy + +To keep the repo lean, images are subject to: + +- **Hard cap:** 1 MB per file (post-compression). Commits with larger + images are blocked. +- **Soft warning:** 500 KB. Commits succeed but print a warning. + +Most properly-sized lab-member photos at q=85 land in 100–300 KB. If a +figure genuinely needs to exceed 1 MB (e.g., a high-resolution research +figure where detail matters), add its repo-relative path to +`.image-size-overrides` and commit that change with a brief justification +in the commit message. diff --git a/_config.yml b/_config.yml index eea9272..4b0db5b 100644 --- a/_config.yml +++ b/_config.yml @@ -1,2 +1,21 @@ title: "Pearson Lab at Duke University" +description: >- + Computational neuroscience at Duke University. We build statistical and + machine-learning tools to understand how brains generate behavior. +url: "https://pearsonlab.github.io" +author: "Pearson Lab" + markdown: kramdown + +plugins: + - jekyll-sitemap + - jekyll-seo-tag + +# Default OpenGraph image for social previews. Applied site-wide via +# Jekyll defaults so jekyll-seo-tag picks it up. Pages can override +# via `image:` in front matter. +defaults: + - scope: + path: "" + values: + image: /images/plab_hex_icon_gray.png diff --git a/_includes/blog_image.html b/_includes/blog_image.html index 2fdc734..f6a3bc6 100644 --- a/_includes/blog_image.html +++ b/_includes/blog_image.html @@ -1,4 +1,4 @@ -
{{ include.description }}
{{ include.description }}
\ No newline at end of file + diff --git a/_includes/data/time_alloc.json b/_includes/data/time_alloc.json index 5593639..25272ac 100644 --- a/_includes/data/time_alloc.json +++ b/_includes/data/time_alloc.json @@ -1 +1 @@ -{"Experimental Design": [5, 12, 11, 20, 40, 15, 3, 25, 6, 6, 9, 45, 14, 9, 20, 10, 10, 6, 4, 10, 12, 11, 12, 16, 21, 20, 26, 10, 10, 10, 20, 9, 13, 5, 21, 21, 32, 5, 10, 16, 12, 20, 8, 16, 11, 5, 20, 11, 10, 26, 7, 5, 31, 10, 15, 10, 5, 7, 12, 10, 25, 5, 10, 10, 15, 6, 33, 13, 10, 6, 11, 11, 4, 15, 20, 15, 10, 5, 11, 10, 12, 13, 7, 10, 10, 5, 15, 9, 10, 50, 12, 30, 8, 13, 12], "Piloting": [5, 4, 10, 10, 25, 5, 10, 10, 2, 6, 28, 0, 6, 20, 36, 10, 10, 19, 8, 21, 15, 8, 13, 16, 21, 29, 7, 20, 5, 5, 5, 2, 5, 15, 0, 5, 5, 5, 9, 10, 15, 10, 2, 16, 24, 10, 30, 1, 18, 22, 6, 10, 11, 30, 15, 0, 10, 9, 9, 20, 10, 5, 10, 10, 25, 7, 5, 11, 15, 11, 11, 7, 6, 23, 5, 5, 0, 20, 4, 10, 3, 7, 5, 5, 10, 15, 8, 11, 10, 9, 4, 11, 6, 8, 5], "Data Collection": [40, 24, 7, 20, 10, 48, 30, 35, 50, 22, 40, 0, 20, 30, 13, 20, 24, 14, 24, 31, 10, 15, 11, 17, 6, 31, 22, 10, 33, 25, 25, 8, 17, 10, 0, 0, 25, 60, 16, 16, 23, 40, 28, 29, 30, 52, 10, 22, 30, 42, 30, 18, 16, 10, 15, 65, 25, 11, 56, 35, 20, 40, 30, 30, 10, 43, 12, 44, 22, 34, 16, 53, 32, 18, 20, 20, 30, 35, 32, 25, 36, 28, 15, 15, 25, 60, 22, 24, 50, 11, 25, 11, 35, 27, 27], "Data Analysis": [30, 38, 59, 20, 10, 17, 17, 15, 36, 49, 19, 0, 26, 29, 31, 10, 24, 42, 30, 31, 28, 30, 47, 17, 20, 20, 22, 40, 20, 25, 25, 40, 38, 20, 6, 9, 21, 15, 44, 50, 34, 15, 40, 16, 29, 18, 20, 18, 17, 10, 38, 41, 13, 30, 15, 15, 35, 43, 9, 20, 30, 30, 10, 20, 10, 28, 20, 15, 24, 34, 44, 20, 29, 31, 25, 40, 30, 25, 32, 30, 24, 16, 30, 50, 40, 10, 27, 26, 20, 10, 44, 48, 32, 31, 28], "Writing Results": [10, 16, 10, 20, 10, 12, 24, 10, 3, 11, 2, 10, 17, 10, 0, 20, 24, 14, 28, 5, 24, 26, 12, 17, 15, 0, 19, 10, 14, 25, 15, 21, 20, 20, 51, 48, 14, 10, 12, 8, 11, 10, 20, 16, 3, 4, 10, 36, 11, 0, 5, 10, 14, 10, 15, 6, 10, 27, 9, 10, 7, 15, 25, 20, 30, 10, 15, 5, 20, 10, 10, 6, 7, 9, 20, 15, 20, 10, 16, 15, 15, 23, 27, 15, 10, 5, 16, 22, 5, 10, 10, 0, 12, 18, 16], "Review Process": [10, 6, 3, 10, 5, 3, 16, 5, 3, 6, 2, 45, 17, 2, 0, 30, 8, 5, 6, 2, 11, 10, 5, 17, 17, 0, 4, 10, 18, 10, 10, 20, 7, 30, 22, 17, 3, 5, 9, 0, 5, 5, 2, 7, 3, 11, 10, 12, 14, 0, 14, 16, 15, 10, 25, 4, 15, 3, 5, 5, 8, 5, 15, 10, 10, 6, 15, 12, 9, 5, 8, 3, 22, 4, 10, 5, 10, 5, 5, 10, 10, 13, 16, 5, 5, 5, 12, 8, 5, 10, 5, 0, 7, 3, 12]} \ No newline at end of file +{"Experimental Design": [5, 12, 11, 20, 40, 15, 3, 25, 6, 6, 9, 45, 14, 9, 20, 10, 10, 6, 4, 10, 12, 11, 12, 16, 21, 20, 26, 10, 10, 10, 20, 9, 13, 5, 21, 21, 32, 5, 10, 16, 12, 20, 8, 16, 11, 5, 20, 11, 10, 26, 7, 5, 31, 10, 15, 10, 5, 7, 12, 10, 25, 5, 10, 10, 15, 6, 33, 13, 10, 6, 11, 11, 4, 15, 20, 15, 10, 5, 11, 10, 12, 13, 7, 10, 10, 5, 15, 9, 10, 50, 12, 30, 8, 13, 12], "Piloting": [5, 4, 10, 10, 25, 5, 10, 10, 2, 6, 28, 0, 6, 20, 36, 10, 10, 19, 8, 21, 15, 8, 13, 16, 21, 29, 7, 20, 5, 5, 5, 2, 5, 15, 0, 5, 5, 5, 9, 10, 15, 10, 2, 16, 24, 10, 30, 1, 18, 22, 6, 10, 11, 30, 15, 0, 10, 9, 9, 20, 10, 5, 10, 10, 25, 7, 5, 11, 15, 11, 11, 7, 6, 23, 5, 5, 0, 20, 4, 10, 3, 7, 5, 5, 10, 15, 8, 11, 10, 9, 4, 11, 6, 8, 5], "Data Collection": [40, 24, 7, 20, 10, 48, 30, 35, 50, 22, 40, 0, 20, 30, 13, 20, 24, 14, 24, 31, 10, 15, 11, 17, 6, 31, 22, 10, 33, 25, 25, 8, 17, 10, 0, 0, 25, 60, 16, 16, 23, 40, 28, 29, 30, 52, 10, 22, 30, 42, 30, 18, 16, 10, 15, 65, 25, 11, 56, 35, 20, 40, 30, 30, 10, 43, 12, 44, 22, 34, 16, 53, 32, 18, 20, 20, 30, 35, 32, 25, 36, 28, 15, 15, 25, 60, 22, 24, 50, 11, 25, 11, 35, 27, 27], "Data Analysis": [30, 38, 59, 20, 10, 17, 17, 15, 36, 49, 19, 0, 26, 29, 31, 10, 24, 42, 30, 31, 28, 30, 47, 17, 20, 20, 22, 40, 20, 25, 25, 40, 38, 20, 6, 9, 21, 15, 44, 50, 34, 15, 40, 16, 29, 18, 20, 18, 17, 10, 38, 41, 13, 30, 15, 15, 35, 43, 9, 20, 30, 30, 10, 20, 10, 28, 20, 15, 24, 34, 44, 20, 29, 31, 25, 40, 30, 25, 32, 30, 24, 16, 30, 50, 40, 10, 27, 26, 20, 10, 44, 48, 32, 31, 28], "Writing Results": [10, 16, 10, 20, 10, 12, 24, 10, 3, 11, 2, 10, 17, 10, 0, 20, 24, 14, 28, 5, 24, 26, 12, 17, 15, 0, 19, 10, 14, 25, 15, 21, 20, 20, 51, 48, 14, 10, 12, 8, 11, 10, 20, 16, 3, 4, 10, 36, 11, 0, 5, 10, 14, 10, 15, 6, 10, 27, 9, 10, 7, 15, 25, 20, 30, 10, 15, 5, 20, 10, 10, 6, 7, 9, 20, 15, 20, 10, 16, 15, 15, 23, 27, 15, 10, 5, 16, 22, 5, 10, 10, 0, 12, 18, 16], "Review Process": [10, 6, 3, 10, 5, 3, 16, 5, 3, 6, 2, 45, 17, 2, 0, 30, 8, 5, 6, 2, 11, 10, 5, 17, 17, 0, 4, 10, 18, 10, 10, 20, 7, 30, 22, 17, 3, 5, 9, 0, 5, 5, 2, 7, 3, 11, 10, 12, 14, 0, 14, 16, 15, 10, 25, 4, 15, 3, 5, 5, 8, 5, 15, 10, 10, 6, 15, 12, 9, 5, 8, 3, 22, 4, 10, 5, 10, 5, 5, 10, 10, 13, 16, 5, 5, 5, 12, 8, 5, 10, 5, 0, 7, 3, 12]} diff --git a/_includes/head.html b/_includes/head.html index 2e31fd8..d0e433e 100644 --- a/_includes/head.html +++ b/_includes/head.html @@ -3,8 +3,9 @@ - - + + {% seo %} + diff --git a/_includes/jsload.html b/_includes/jsload.html index b7518e6..28f86a5 100644 --- a/_includes/jsload.html +++ b/_includes/jsload.html @@ -8,4 +8,4 @@ - \ No newline at end of file + diff --git a/_layouts/home.html b/_layouts/home.html index 2425983..c4f61d0 100644 --- a/_layouts/home.html +++ b/_layouts/home.html @@ -11,4 +11,4 @@

{{ page.desc }}

{% include jsload.html %} - \ No newline at end of file + diff --git a/_posts/2015-11-13-big-data-nih.md b/_posts/2015-11-13-big-data-nih.md index 820c8e1..065e0e9 100644 --- a/_posts/2015-11-13-big-data-nih.md +++ b/_posts/2015-11-13-big-data-nih.md @@ -6,7 +6,7 @@ author: John Pearson category: blog --- - + > Big data is like teenage sex: everyone talks about it, nobody really knows how to do it, everyone thinks everyone else is doing it, so everyone claims they are doing it... > diff --git a/_posts/2015-11-20-pride_study.md b/_posts/2015-11-20-pride_study.md index c048b8a..55cf920 100644 --- a/_posts/2015-11-20-pride_study.md +++ b/_posts/2015-11-20-pride_study.md @@ -6,7 +6,7 @@ author: Shariq Iqbal category: blog --- -The Atlantic recently did a cool [feature](http://www.theatlantic.com/magazine/archive/2015/12/the-return-of-electroshock-therapy/413179/) on Dr. Sarah (Holly) Lisanby that highlights some of the work she has done over the course of her career regarding ECT (Electro-Convulsive Therapy). The article is really well done, and I would recommend checking it out. As with anything on the internet though, avoid the comments. +The Atlantic recently did a cool [feature](http://www.theatlantic.com/magazine/archive/2015/12/the-return-of-electroshock-therapy/413179/) on Dr. Sarah (Holly) Lisanby that highlights some of the work she has done over the course of her career regarding ECT (Electro-Convulsive Therapy). The article is really well done, and I would recommend checking it out. As with anything on the internet though, avoid the comments. Dr. Lisanby is the P.I. on a study concerning the effectiveness of ECT in depressed elders that we have been doing some analysis for over the past couple of months. The goal of our analysis is to be able to predict outcomes (most importantly, remission status), from the rest of the data. The data set has been collected across seven hospitals and consists of hundreds of separate treatments of over one hundred patients. For a clinical data set, this is pretty large-scale, but, as with any clinical data, there are certain limitations. diff --git a/_posts/2016-9-9-time-allocation-in-neuro.md b/_posts/2016-9-9-time-allocation-in-neuro.md index 550aa6a..3f2a537 100644 --- a/_posts/2016-9-9-time-allocation-in-neuro.md +++ b/_posts/2016-9-9-time-allocation-in-neuro.md @@ -11,6 +11,7 @@ jsexternals: - https://cdn.plot.ly/plotly-latest.min.js --- ### The Setup + A couple of weeks ago, as I was preparing to teach our incoming graduate students about data analysis, I ran across the following assertion in my notes: neuroscientists spend more time on data analysis than any other research activity. I had zero proof for this, but it felt truthy. @@ -21,7 +22,7 @@ So I did a little experiment. I asked my colleagues in the [Center for Cognitive By the way, that link is still up. If you haven't taken the survey but work in neuroscience, [go take it now](https://duke.qualtrics.com/SE/?SID=SV_4SLoFFC7fLr7j9z). -### The results: +### The results **Indeed, data analysis is the single most time-consuming activity in the research process** @@ -33,8 +34,8 @@ But there's also a lot of variability overall. My intuition is that subfields li
+### Some correlations -### Some correlations: Even though I didn't ask respondents to report their subfields, I was curious whether the data were perhaps multimodal, suggesting clusters of responses, but the [violin plot](https://en.wikipedia.org/wiki/Violin_plot) didn't bear that out. However it's still interesting to ask how correlated the allocations were with each other: ------ @@ -53,7 +54,7 @@ tr:hover {background-color: #f5f5f5} **Writing Results** |-0.05|-0.36|-0.44|-0.19| **Review Process** |0.12|-0.24|-0.38|-0.38|0.36 ------ +------ So, even though all these numbers are required to add to 1, and so we expect a negative correlation between them (roughly -14% based on a uniform Dirichlet prior with K=6), we still notice a couple of interesting features: diff --git a/_posts/2017-4-18-job-ad.md b/_posts/2017-4-18-job-ad.md index 57d042c..aae6970 100644 --- a/_posts/2017-4-18-job-ad.md +++ b/_posts/2017-4-18-job-ad.md @@ -8,6 +8,7 @@ category: blog The laboratory of Dr. John Pearson ([http://pearsonlab.github.io](http://pearsonlab.github.io)) is seeking a data scientist/research assistant to support its applied machine learning research program. This is a one-year full-time position with the possibility of renewal. Women and minorities particularly encouraged to apply. The data scientist will be responsible for: + - Managing our cloud-based pipeline for analyzing hundreds of gigabytes of human brain data - Collecting, cleaning, and analyzing data from online behavioral experiments involving thousands of subjects - Analyzing neuroscience data using deep learning models @@ -15,6 +16,7 @@ The data scientist will be responsible for: Previous data scientists from the lab have gone on to graduate school (machine learning), as well as data engineering and cloud computing roles in the private sector. We are especially looking for candidates who: + - have previous research experience - are recent graduates - have strong programming skills diff --git a/_posts/2018-10-30-high-throughput-legal-decisions.md b/_posts/2018-10-30-high-throughput-legal-decisions.md index 12695ba..adfecf9 100644 --- a/_posts/2018-10-30-high-throughput-legal-decisions.md +++ b/_posts/2018-10-30-high-throughput-legal-decisions.md @@ -8,7 +8,7 @@ category: blog Today, our paper on legal decision-making goes online at [Nature Human Behaviour](https://www.nature.com/articles/s41562-018-0451-z.epdf?author_access_token=gW_gZL0F4bNCBdSfJdfHqtRgN0jAjWel9jnR3ZoTv0OPcExbUXFEBLmRIJVwmtiNjh9IEH2pkC2Nh_cBrWPkHuJj4keS7hpDBQvmnU20N9jF3OGevYkvLVEkxopzUvo61hticf34wy0yLHXrWmQ-AA%3D%3D). You can read more about the genesis of the project [here](https://socialsciences.nature.com/channels/1745-behind-the-paper/posts/40535-searching-for-justice-how-marketing-research-can-shed-light-on-decisions-in-the-criminal-justice-system). Briefly, we used a large-scale survey approach based on randomly generated legal cases to show three things: -1. It's possible to estimate (using Bayesian hierarchical models) how groups of individuals weight different types of legal evidence, even when not all individuals see not all of the evidence combinations. +1. It's possible to estimate (using Bayesian hierarchical models) how groups of individuals weight different types of legal evidence, even when not all individuals see not all of the evidence combinations. 2. Prospective jurors (mTurk participants) assigned some weight to the accusation itself. That is, they rated these cases as a little convincing even when no evidence for guilt was presented. The more seriousness the crime, the higher that rating. 3. Participants with legal training focused entirely on the evidence, but their overall ratings of case strength *were still* correlated with how serious the crime was. diff --git a/_posts/2018-12-5-incubator-award.md b/_posts/2018-12-5-incubator-award.md index 55e55ce..845ea0f 100644 --- a/_posts/2018-12-5-incubator-award.md +++ b/_posts/2018-12-5-incubator-award.md @@ -10,4 +10,4 @@ category: blog


-
\ No newline at end of file +
diff --git a/_posts/2019-1-16-poster-award.md b/_posts/2019-1-16-poster-award.md index 8cd7d6d..3231e25 100644 --- a/_posts/2019-1-16-poster-award.md +++ b/_posts/2019-1-16-poster-award.md @@ -5,4 +5,4 @@ post_title: "Anne Draelos wins best poster" author: John Pearson category: blog --- -Congratulations to Anne, who won best poster for her work on real-time analysis of zebrafish data at the [Duke Research Computing Symposium](https://rc.duke.edu/symposium-2019/)! +Congratulations to Anne, who won best poster for her work on real-time analysis of zebrafish data at the [Duke Research Computing Symposium](https://rc.duke.edu/symposium-2019/)! diff --git a/_posts/2019-4-20-plos-cb.md b/_posts/2019-4-20-plos-cb.md index f0e5576..0a7c9a6 100644 --- a/_posts/2019-4-20-plos-cb.md +++ b/_posts/2019-4-20-plos-cb.md @@ -5,10 +5,10 @@ post_title: "New papers on strategic decision making" author: Anne Draelos category: blog --- -We have two new papers out on dynamic and strategic decision making. +We have two new papers out on dynamic and strategic decision making. -The first, published in PLoS Computational Biology as ["Latent goal models for dynamic strategic interaction"](https://doi.org/10.1371/journal.pcbi.1006895), proposed a new model that is capable of reproducing the rich behavior of monkeys playing against each other in a dynamic decision task. +The first, published in PLoS Computational Biology as ["Latent goal models for dynamic strategic interaction"](https://doi.org/10.1371/journal.pcbi.1006895), proposed a new model that is capable of reproducing the rich behavior of monkeys playing against each other in a dynamic decision task. -Our second paper was published in Nature Communications, ["Bayesian nonparametric models characterize instantaneous strategies in a competitive dynamic game"](https://www.nature.com/articles/s41467-019-09789-4). Here, we used Gaussian Processes to model the policy and value functions of participants as a function of both game state and opponent identity. +Our second paper was published in Nature Communications, ["Bayesian nonparametric models characterize instantaneous strategies in a competitive dynamic game"](https://www.nature.com/articles/s41467-019-09789-4). Here, we used Gaussian Processes to model the policy and value functions of participants as a function of both game state and opponent identity. -Congrats to Sam and Kelsey in particular for their hard work! +Congrats to Sam and Kelsey in particular for their hard work! diff --git a/_posts/2019-6-26-poster-award.md b/_posts/2019-6-26-poster-award.md index a617db4..e4853e0 100644 --- a/_posts/2019-6-26-poster-award.md +++ b/_posts/2019-6-26-poster-award.md @@ -5,4 +5,4 @@ post_title: "Anne Draelos receives Ruth K Broad Postdoctoral Award" author: John Pearson category: blog --- -Congratulations to Anne, who was selected for a Ruth K Broad Postdoctoral Award for her project, “Real-Time Functional Characterization of Neural Circuits”. This work will focus on developing an online data analysis platform capable of both determining neural function and adaptively selecting targets for intervention in real-time. +Congratulations to Anne, who was selected for a Ruth K Broad Postdoctoral Award for her project, “Real-Time Functional Characterization of Neural Circuits”. This work will focus on developing an online data analysis platform capable of both determining neural function and adaptively selecting targets for intervention in real-time. diff --git a/_posts/2019-7-26-huang-poster.md b/_posts/2019-7-26-huang-poster.md index 4e72f76..87390a4 100644 --- a/_posts/2019-7-26-huang-poster.md +++ b/_posts/2019-7-26-huang-poster.md @@ -6,7 +6,7 @@ author: Anne Draelos category: blog --- -Congratulations to Raymond Chen for his excellent work this summer, as well as his successful presentation during the Huang Fellows summer poster session. We see great things in his future research endeavors! +Congratulations to Raymond Chen for his excellent work this summer, as well as his successful presentation during the Huang Fellows summer poster session. We see great things in his future research endeavors!


diff --git a/_posts/2020-02-24-summer-fellowships.md b/_posts/2020-02-24-summer-fellowships.md index 464cc65..112e2d0 100644 --- a/_posts/2020-02-24-summer-fellowships.md +++ b/_posts/2020-02-24-summer-fellowships.md @@ -5,6 +5,6 @@ post_title: "Undergraduate success for Summer 2020" author: Anne Draelos category: blog --- -Members of the Pearson Lab have successfully obtained competitive positions for summer research programs this year! Nicole Moiseyev was accepted into the Biomedical Big Data Science training program at Mt. Sinai in New York City, where she will tackle data-intensive biomedical problems with machine learning and data harmonization tools. Richard Sriworarat will be spending the summer at HHMI's Janelia Research Campus in Virginia as a part of their Undergraduate Scholars Program. He will work in the lab of Dr. Marius Pachitariu, where they use machine learning techniques to investigate the structure of neural activity recorded from tens of thousands of neurons simultaneously. +Members of the Pearson Lab have successfully obtained competitive positions for summer research programs this year! Nicole Moiseyev was accepted into the Biomedical Big Data Science training program at Mt. Sinai in New York City, where she will tackle data-intensive biomedical problems with machine learning and data harmonization tools. Richard Sriworarat will be spending the summer at HHMI's Janelia Research Campus in Virginia as a part of their Undergraduate Scholars Program. He will work in the lab of Dr. Marius Pachitariu, where they use machine learning techniques to investigate the structure of neural activity recorded from tens of thousands of neurons simultaneously.

diff --git a/_posts/2020-08-20-grad-school.md b/_posts/2020-08-20-grad-school.md index d23a5c8..502edf0 100644 --- a/_posts/2020-08-20-grad-school.md +++ b/_posts/2020-08-20-grad-school.md @@ -5,7 +5,7 @@ post_title: "Best wishes to Sam & Jack!" author: Anne Draelos category: blog --- -As the new school year begins, the Pearson lab has to say farewell to two of its amazing members, Sam and Jack. Both of them are heading off to begin graduate school. We are so proud of them and wish them the best in their future research pursuits! +As the new school year begins, the Pearson lab has to say farewell to two of its amazing members, Sam and Jack. Both of them are heading off to begin graduate school. We are so proud of them and wish them the best in their future research pursuits! Sam will be attending the Emory Biostatistics PhD program to work on modeling high-dimensional time-series data and Bayesian methods. He says he chose this program because the faculty has a broad range of research interests and the curriculum is both rigorous and eclectic (two years of coursework and separate training for teaching). diff --git a/about.md b/about.md index 976df8c..30609d7 100644 --- a/about.md +++ b/about.md @@ -15,25 +15,25 @@ nav: About # what shows up in the navbar at the top (do not define if you don't **We believe that the best hope for treating brain disorders is the discovery of fundamental principles underlying brain activity.** Theory is essential, but the best theory happens in conversation with data. That's why we work closely with experimentalists to build tools that not only make sense of existing data but suggest new hypotheses and new directions. - # What we value ## Open Science + We [code in the open](https://github.com/pearsonlab). We share data. Communicating science requires finding and telling the stories in our data, but these stories are worthless if they don't stand up to scrutiny from the community. ## Natural Behavior + Nothing in neuroscience makes sense except in light of behavior.[^1] We prefer behaviors like foraging and stimuli like movies because they give us the opportunity to study the brain in something closer to its normal working mode. ## Dynamics + The brain functions in a rapidly changing environment and is itself an organ with complex internal dynamics. We favor models and methods that incorporate this behavior, particularly those drawn from the physics and statistics of dynamical systems. -## Collaboration -Almost all our projects are done in close collaboration with the experimentalists who generate the data we model. Our code and algorithms are designed to solve real scientific problems faced by real users. +## Collaboration +Almost all our projects are done in close collaboration with the experimentalists who generate the data we model. Our code and algorithms are designed to solve real scientific problems faced by real users.



- - [^1]: With apologies to Theodosius Dobzhansky. diff --git a/join_us.md b/join_us.md index e456f83..22f35d3 100644 --- a/join_us.md +++ b/join_us.md @@ -4,6 +4,7 @@ title: Joining our team # header at the top of the page nav: Join Us # what shows up in the navbar at the top (do not define if you don't want page in the navbar) --- # Postdocs + We're always interested in talking to qualified postdoc candidates. Particularly those with backgrounds in Statistics, Computer Science, Physics, or any other field where you do applied math for a living. Neuroscience experience is not required, though neuroscience interest is. # Graduate students @@ -12,43 +13,48 @@ We're always interested in talking to qualified postdoc candidates. Particularly Duke provides a wonderful environment for students interested in pursuing the kind of interdisciplinary research we do. As a result, P[λ]ab accepts graduate students through multiple programs: -- [Neurobiology](https://www.neuro.duke.edu/). The Department of Neurobiology offers a PhD through its [graduate training program](https://www.neuro.duke.edu/education/graduate-training-program). Research in the department focuses on biological and mechanistic approaches, typically at the cellular and molecular or systems and circuits level. This is John's home department, and the lab regularly hosts students for rotations or PhD mentorship. +- [Neurobiology](https://www.neuro.duke.edu/). The Department of Neurobiology offers a PhD through its graduate training program. Research in the department focuses on biological and mechanistic approaches, typically at the cellular and molecular or systems and circuits level. This is John's home department, and the lab regularly hosts students for rotations or PhD mentorship. - [Cognitive Neuroscience Admitting Program](https://dibs.duke.edu/centers/ccn/graduate-cnap). Unlike most PhD programs, CNAP is not tied to a single department. Rather, it gives students the opportunity to explore interdisciplinary research in cognitive neuroscience before ultimately affiliating with a department like Psychology & Neuroscience, Neurobiology, or Electrical and Computer Engineering. CNAP is administered by the [Center for Cognitive Neuroscience](https://dibs.duke.edu/centers/ccn), which includes faculty whose interests range from speech and development to neurons and computation. Students do three semester-long rotations and are often jointly mentored. The typical CNAP student has a strong interest in cognition and is looking to pursue research that crosses traditional departmental boundaries. John is a core faculty member of CCN. -- [Biostatistics and Bioinformatics](https://biostat.duke.edu/). The [PhD in biostatistics](https://biostat.duke.edu/education/phd-biostatistics/overview) is a rigorous program that focuses on statistical theory and its application to biomedical research. The program is small, and like most programs at Duke, highly competitive. Compared to neuroscience PhD programs, the number of required courses is high, and many admitted students enter having completed a master's degree. John has a secondary appointment in the deparment, and interested candidates should mention him in their applications. +- [Biostatistics and Bioinformatics](https://biostat.duke.edu/). The [PhD in biostatistics](https://biostat.duke.edu/education/phd-biostatistics/overview) is a rigorous program that focuses on statistical theory and its application to biomedical research. The program is small, and like most programs at Duke, highly competitive. Compared to neuroscience PhD programs, the number of required courses is high, and many admitted students enter having completed a master's degree. John has a secondary appointment in the deparment, and interested candidates should mention him in their applications. - [Psychology and Neuroscience](https://psychandneuro.duke.edu/). The department offers a [PhD program](https://psychandneuro.duke.edu/graduate) with emphases in multiple areas, including Cognition and Cognitive Neuroscience and Systems and Integrative Neuroscience. John has a secondary appointment in the department and can accept students. - [Electrical and Computer Engineering](https://ece.duke.edu/). The department offers a [PhD program](https://ece.duke.edu/grad) and has an exceptional faculty in [Signal and Information Processing](https://ece.duke.edu/faculty/signal-information-processing). John has a secondary appointment in ECE and can accept students. The program is by direct admission, so interested students should reach out to John in advance and mention the lab in their applications. **If you plan on applying,** keep a few things in mind: + - It's best to contact John in advance if you have a strong interest in our lab. Some programs may be a better fit for you than others. _Please also indicate which program(s) you are targeting._ - Graduate students cost money. Sadly, we don't have enough funding to support all the talented students who might be interested. Again, contacting John early will let you get our buest guess as to how things stand, but given that students are funded through a variety of mechanisms (grants, fellowships, departmental resources) and often don't choose a lab until their second year, it can be difficult to project into the future. In general, students should target programs and schools with a variety of potential mentors. - We really prioritize students with quantitative skills. PhD students in the lab will spend the bulk of their time coding, analyzing, and deriving and so should expect to take additional courses in statistics and machine learning, even if those are not required by their program. We value previous experience in neuroscience, but we are particularly looking for: - - Demonstrated mathematical ability (usually coursework). - - Strong coding skills (public code, open source contributions, version control, testing). - - Experience analyzing data (statistical modeling, simulation, machine learning). + - Demonstrated mathematical ability (usually coursework). + - Strong coding skills (public code, open source contributions, version control, testing). + - Experience analyzing data (statistical modeling, simulation, machine learning). # Full-Time Research Associates -We sometimes advertise positions for research associates/data scientists. These roles are best-suited to post-baccalaureate students who plan to go on to graduate school or careers in data science. Applicants should send a cv, cover letter, and code sample (GitHub/BitBucket/Gitlab profile preferred) to John for consideration. Again, we prioritize applicants with strong coding and quantitative skills. + +We sometimes advertise positions for research associates/data scientists. These roles are best-suited to post-baccalaureate students who plan to go on to graduate school or careers in data science. Applicants should send a cv, cover letter, and code sample (GitHub/BitBucket/Gitlab profile preferred) to John for consideration. Again, we prioritize applicants with strong coding and quantitative skills. # Undergraduates P[λ]ab offers undergraduates several opportunities to contribute to the work of the lab: + - Through an independent study. - Through a senior thesis. - Through work-study or undergraduate research assistantships. -- Through several [summer research opportunities](https://undergraduateresearch.duke.edu/opportunities). (N.B.: John really doesn't know much about these, so if this is a route you'd like to take, it's up to you to figure out what programs would allow you to work with us.) +- Through several [summer research opportunities](https://undergraduateresearch.duke.edu/). (N.B.: John really doesn't know much about these, so if this is a route you'd like to take, it's up to you to figure out what programs would allow you to work with us.) A few points to note: + - There is very little busy work in the lab. Many of the traditional grunt tasks assigned to undergraduate researchers either don't exist (collecting and organizing data) or are highly technical (setting up and running cloud environments). All of our projects involve computer programming. Most involve some math. To be a successful undergraduate researcher, you must be able to contribute meaningfully, and that requires a certain level of technical skill at the outset. -- That said, if you are interested in working with us and willing to put in the time, we have a [list of resources for self-study](../learning.html). John is also willing to meet and discuss what informal mentoring along these lines might look like. +- That said, if you are interested in working with us and willing to put in the time, we have a [list of resources for self-study](../learning.html). John is also willing to meet and discuss what informal mentoring along these lines might look like. - For reasons of equity, **we pay all our undergraduate researchers.** We might ask you for a brief trial period, but we do not in general offer unpaid internships. This means, in practice, that the bar for accepting students into the lab is high (though cf. the previous point). # High School Students While we encourage students at all levels to get involved in research early on in their academic careers, having high school students work with the lab poses a couple of serious challenges: + 1. Duke takes its responsibility to ensure the protection of minors very seriously, which entails tight restrictions on their involvement in research labs. In most cases, the benefits to us and the students do not outweigh the costs. 2. Because of the very high bar for contribution in the lab and the fact that we do not offer remote positions, it is nearly impossible for high school students to participate in a way that produces a meaningful research experience for them or a measurable contribution to the lab. -As a result **we do not accept high school students into the lab except through select Duke-affiliated programs.** For instance, we are open to hosting students through [DUNE](https://dibs.duke.edu/education/dune/), and those interested in our work should note this during the application process, but in general, we encourage high school students to seek out structured research experiences tailored to their career stage. \ No newline at end of file +As a result **we do not accept high school students into the lab except through select Duke-affiliated programs.** For instance, we are open to hosting students through [DUNE](https://dibs.duke.edu/education/dune/), and those interested in our work should note this during the application process, but in general, we encourage high school students to seek out structured research experiences tailored to their career stage. diff --git a/js/jumbo_scroll.js b/js/jumbo_scroll.js index 63e50e8..3c8a184 100644 --- a/js/jumbo_scroll.js +++ b/js/jumbo_scroll.js @@ -6,4 +6,4 @@ function parallax(){ $(window).scroll(function(e){ parallax(); -}); \ No newline at end of file +}); diff --git a/learning.md b/learning.md index a5609ee..c542795 100644 --- a/learning.md +++ b/learning.md @@ -4,23 +4,26 @@ title: Getting up to speed # header at the top of the page nav: Learning # what shows up in the navbar at the top (do not define if you don't want page in the navbar) --- # How do I get started? + {:.no_toc} I'm frequently asked by students, especially neuroscience students, how they should go about improving their {programming, computing, statistics} skills. This page is partly an answer to that. It's mostly my opinions, with no claim to being comprehensive. The wonderful upside of learning to program in the internet age is that there is so much information and so many options that you don't have to go with my recommendations. # Contents + {:.no_toc} 1. Contents seed {:toc} - # Learning to program ## General comments + - My advice here pertains to scientific programming. If you want to learn web development or build device drivers, this may not be for you. - [StackOverflow](https://stackoverflow.com/). If you have ever used a search engine to look up a programming question, you have probably run across StackOverflow. The site uses a question-and-answer format, with accepted answers clearly marked and the best answers upvoted. The site can be a bit intimidating to use ([there are a lot of guidelines for posting a good question](https://stackoverflow.com/help/how-to-ask)), but it's probably the best programming resource on the internet for passive search. If you're completely new to programming, it won't teach you, but for fixing well-defined problems, there's no substitute.[^sof_os] ## Choosing your first language + - Use whatever the people around you are using. It's frustrating enough to learn programming; take advantage of local expertise to help you. If you're struggling to learn functions and `if` statements, that can be done in pretty much any modern language, and the concepts will carry over to most others. - That said, here's my order of preference: 1. **Python**: Because everything. Python is used for scripting, building and scraping websites, and pretty much anything else where performance isn't critical. It is also the *de facto* standard in data science and machine learning. It's also comparatively easy to learn. Python is the new BASIC. What's more, Python skills actually help on a resume. I'll talk more about recommended packages/setup [below](#python-for-data-science) @@ -29,49 +32,56 @@ I'm frequently asked by students, especially neuroscience students, how they sho 1. **Matlab**: If you must. Matlab is pervasive in neuroscience and engineering, and it provides a decent ecosystem (professionally supported toolboxes, a decent IDE and debugger) out of the box. Provided, that is, your institution pays the substantial price tag. My complaints about Matlab mostly center on: (a) its painful ergonomics as a programming language[^matlab_woes] (I just don't find it fun to use); and (b) its absence in the software and data science industries (Matlab skills don't mean much when applying to those jobs). ## Learning your first language + I'll be vague here for one reason: there are too many choices, and none is a clear winner. All you really want at this initial phase is an acquaintance with basic programming: variables, control flow, functions, etc. Some people prefer books here, but in the cases of Python and R there are also lots of free video series and online courses. Which you choose doesn't matter so long as: + - You devote serious time to learning. Programming is a skill and cannot be crammed. - You actually write code. This is a bit like learning a foreign language: you have to speak to get better. No passive learning. It really helps to have a project here, even a side project, so you have something to work toward. If you're coming to Python from a different language and want a quick overview, I highly recommend Jake Vanderplas's [Whirlwind Tour of Python](https://jakevdp.github.io/WhirlwindTourOfPython/). It's perhaps a little more than what many scientists need to know to get started, but it's free and excellent. ## In addition + - For Python, once you've gotten a basic acquaintance with the language, and after you've worked on your [scientific programming](#python-for-data-science) skills, it's worth going back to invest in more advanced aspects of the language. This pays dividends both in understanding others' code and in writing reusable libraries of your own. For Python, I particularly recommend [Fluent Python](http://shop.oreilly.com/product/0636920032519.do). # Python for Data Science + Most programming material online is targeted either at students learning their first programming language or professionals learning a new tool for software development. However, programming for science — writing code that runs, simulates, or analyzes experiments — carries its own set of unique challenges, and is distinct from general-purpose programming. That's why learning to program Python is distinct from learning "scientific Python," the suite of packages, tools, and practices that surround Python as used in (data) science. This is why I make every new student in my lab read (cover-to-cover) Jake Vanderplas's [Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/). The book covers exactly the toolset we use: IPython, Jupyter, NumPy, SciPy, Pandas, Matplotlib, and Scikit-Learn. I don't know of a better, more comprehensive introduction to modern scientific Python. # Statistics + **Professional disclaimer:** I recommend a good grounding in statistical theory. It's worth the investment. But we're all busy people. What I usually end up recommending to students: + - [Data Analysis Using Regression and Multilevel/Hierarchical Models](http://www.stat.columbia.edu/~gelman/arm/). This was my first introduction to applied Bayesian analysis. Surprisingly readable for students without much statistical background and teaches an approach to modeling data that I like and advocate. As a bonus, covers Markov Chain Monte Carlo sampling tools like [Stan](http://mc-stan.org/) that are necessary in practice. - [A First Course in Bayesian Statistical Methods](https://www.stat.washington.edu/people/pdhoff/book.php). This is the book they use for the intro Bayesian class at Duke. This is really for students who are investing in serious stats education. Finishing this one may not leave you quite ready to tackle your real data, but you will have a solid foundation to build on. - [All of Statistics](https://www.amazon.com/All-Statistics-Statistical-Inference-Springer/dp/0387402721/ref=sr_1_1?ie=UTF8&qid=1249141007&sr=8-1). A really nice single-volume introduction to statistics. A bit of a steep learning curve for the less mathematically inclined, but worth a mention. - For Duke students interested in the problem of actually implementing statistical models and methods in code, I highly recommend Cliburn Chan's [STA 663](https://github.com/cliburn/sta-663-2021), typically offered each spring. Teaches all the same software tools my lab uses. # Machine Learning: Classic + There are lots of great references. The current deep learning phase notwithstanding, machine learning is actually a very broad field, and what is old now will eventually be new again. Some references worth checking out: + - [An Introduction to Statistical Learning](https://www.statlearning.com) - [Elements of Statistical Learning](http://web.stanford.edu/~hastie/ElemStatLearn/) (free pdf) -- [Pattern Recognition and Machine Learning](https://www.springer.com/us/book/9780387310732) ([pdf](http://users.isr.ist.utl.pt/~wurmd/Livros/school/Bishop%20-%20Pattern%20Recognition%20And%20Machine%20Learning%20-%20Springer%20%202006.pdf)) +- [Pattern Recognition and Machine Learning](https://www.springer.com/us/book/9780387310732) - [Machine Learning: A Probabilistic Perspective](https://probml.github.io/pml-book/) (Duke uses this for its intro ML class) - - # Machine Learning: Deep Learning + So Deep Learning (aka neural networks) is eating the world. Briefly: + - Read the [Deep Learning Book](http://www.deeplearningbook.org/). It's even free online from the website. The field is moving incredibly rapidly, but this is now the standard introduction. - For online classes, we've had students take the [Stanford convnets class](http://cs231n.stanford.edu/) and Coursera's [Deep Learning Specialization](https://www.coursera.org/specializations/deep-learning). These are pretty basic but nice for people getting started.[^online_dl_classes] - Over the last few years, we've gradually moved from [TensorFlow](https://www.tensorflow.org/) to [PyTorch](https://pytorch.org/) and [JAX](https://github.com/google/jax). This mirrors broader trends among machine learning researchers, since the latter often allow for faster prototyping. # Notes - [^sof_os]: Note that information on StackOverflow tends to be proportional to the popularity of a given tool. So information on R and Python is extensive, while Matlab has comparatively less support. [^matlab_woes]: To be fair, Matlab is now an old language and was designed to ease the burden of engineers who were coding C and FORTRAN for a living. By those standards, it is highly successful, and new features are being added to the language all the time. [^online_dl_classes]: Keep in mind that these classes are great at introducing the material, but they tend to be very light on theory and more focused on simple applications. While they're a great starting point for high school students, undergraduates, or graduate students in other fields, students interested in machine learning research will be expected to engage with these ideas at a much higher mathematical level. diff --git a/location.md b/location.md index 40ed342..859a4c8 100644 --- a/location.md +++ b/location.md @@ -4,7 +4,7 @@ title: How to find us # header at the top of the page nav: Find Us # what shows up in the navbar at the top (do not define if you don't want page in the navbar) --- -We're located in the [Bryan Research Building](https://maps.duke.edu/?focus=68). This is a view of it from across Research Drive: +We're located in the [Bryan Research Building](https://maps.duke.edu/?focus=68). This is a view of it from across Research Drive: @@ -19,6 +19,3 @@ Inside the lobby, turn right and enter the department offices: Head toward the back, where you'll find the Center for Theoretical Neurobiology. The lab and John's office are inside. - - - diff --git a/lychee.toml b/lychee.toml new file mode 100644 index 0000000..5c16f0e --- /dev/null +++ b/lychee.toml @@ -0,0 +1,67 @@ +# Configuration for lychee link checker. See https://lychee.cli.rs/. +# Used by .github/workflows/site-health.yml. + +cache = true +max_cache_age = "30d" +timeout = 30 +max_retries = 3 +retry_wait_time = 5 + +# root_dir is passed via the lychee-action args (it requires an absolute +# path which differs between local dev and CI). See site-health.yml. + +# Use GET instead of HEAD. Many academic publishers (arxiv, nature, etc.) +# don't handle HEAD reliably and return 405 or other oddities; GET is +# what real browsers use and gets through more often. +method = "GET" + +# Browser-like UA — lychee's default ("lychee/...") triggers bot +# protection on many sites. +user_agent = "Mozilla/5.0 (compatible; lychee-link-checker; +https://lychee.cli.rs/)" + +# Status codes to treat as success. We're trying to catch dead links +# (404) and broken servers, not paywalls or anti-bot measures. +# - 200/204/206: success +# - 401: paywalled but exists +# - 403/405/429: bot-protection / rate-limit / method-not-allowed +# - 999: LinkedIn's bot-detection response +accept = [200, 204, 206, 401, 403, 405, 429, 999] + +# Hosts to skip: +# - pearsonlab.github.io: this is the site we're building. The sitemap +# and robots.txt contain absolute self-referential URLs by spec, but +# HTTP-checking them only tells us about the *currently deployed* +# site, not our build. Internal page-to-page links should be relative +# anyway (lychee resolves them against the local _site/ directory). +# - dibs-web01.vm.duke.edu: known dead/flaky external image host. The +# migration into the repo is tracked as Phase 5; remove this exclude +# once that migration lands so any new dead URLs are caught. +# - maxcdn.bootstrapcdn.com: stable enough that transient 5xx responses +# shouldn't fail the build. +exclude = [ + "pearsonlab\\.github\\.io", + "dibs-web01\\.vm\\.duke\\.edu", + "maxcdn\\.bootstrapcdn\\.com", + # localhost references in README.md (jekyll-readme-index copies + # README into _site/ so lychee scans it). + "localhost", + # Specific URLs with known issues that aren't worth blocking on: + # - thomasli.me: former undergrad's personal site, host unreachable + # from GitHub Actions runners (DNS or geoblock). + # - stat.washington.edu/people/pdhoff/book.php: SSL handshake fails + # in lychee but the page loads in browsers (server cert chain). + # - socialsciences.nature.com: TLS handshake failure (server config). + "thomasli\\.me", + "stat\\.washington\\.edu/people/pdhoff", + "socialsciences\\.nature\\.com", + # LinkedIn's bot-detection responds inconsistently to GitHub Actions + # runners: same URL can return 200, 301-to-stripped-URL, 404, or 999 + # depending on time, IP, and load. Treat the whole domain as + # unreliable for automated checking — link-rot detection on LinkedIn + # was never going to work in practice. + "linkedin\\.com", +] + +# Path excludes for legacy blog posts are passed via the workflow's +# args (--exclude-path) rather than configured here, to keep the +# arg-passing pattern uniform with --root-dir. See site-health.yml. diff --git a/people.html b/people.html index 4c245a8..3708d17 100644 --- a/people.html +++ b/people.html @@ -1,241 +1,240 @@ ---- -layout: default -title: Lab Members -desc: Meet our group -nav: People ---- - - - -

Principal Investigator:

-
-
- {% include person.html - image="https://dibs-web01.vm.duke.edu/pearson/assets/images/website/john.png" - site="http://jmxpearson.com" github="https://github.com/jmxpearson" - email="john.pearson@duke.edu" - linkedin="https://www.linkedin.com/in/jmxpearson" name="John Pearson" - desc="John earned his bachelor's degree in physics and math from the - University of Kentucky and his PhD in physics from Princeton. He became a - neuroscientist at Duke, where he did his postdoctoral training with Michael - Platt, working on the neurobiology of reward and decision-making. From 2015 to - 2018, he was an Assistant Research Professor in the Duke Institute for Brain - Sciences. In 2018, he moved back to the School of Medicine as an Assistant - Professor in the Department of Biostatistics & Bioinformatics, and in 2022, he moved to the - Department of Neurobiology, where he was promoted to Associate Professor in 2025. In addition, he maintains secondary - appointments in the departments of Biostatics & Bioinformatics, Psychology & Neuroscience, and Electrical - and Computer Engineering. (cv)



" %} -
- - -

Graduate Students:

-
-
- {% - include person.html image="https://dibs-web01.vm.duke.edu/pearson/assets/images/website/Trevor.jpg" - github="https://github.com/Trevorules" email="trevor.alston@duke.edu" - linkedin="www.linkedin.com/in/trevor-alston" name="Trevor Alston" desc="Trevor is a sixth-year Neurobiology graduate student at Duke University. - He holds two Biomedical Engineering Degrees; he received his BS degree from Rutgers University and his MS degree from Northwestern University. - His research interest is in Neuroengineering and brain-related technologies. Outside of the lab he likes hanging out with friends, playing games, and watching TV. -







" %} - {% - include person.html image="images/miles.jpg" - github="https://github.com/mdmarti" email="miles.martinez@duke.edu" - linkedin="https://www.linkedin.com/in/miles-martinez-00a496123/" - name="Miles Martinez" title="Electrical and Computer Engineering PhD Candidate" desc="Miles is a fifth-year graduate student in Electrical and Computer Engineering. - He entered Duke through the CNAP program. He currently develops and applies computational methods to - understand the neural bases of natural learning. When not doing - research, he loves getting outside and hiking or rock climbing - unless it's - too cold, in which case you can find him indoors cooking or playing games. -



" %} - {% - include person.html image="images/gong.png" - site="https://scholar.google.com/citations?user=zOKHGPgAAAAJ" - github="https://github.com/gongziyida" email="ziyi.gong@duke.edu" - name="Ziyi Gong" title="Neurobiology Graduate Student" desc="Ziyi is a fourth-year PhD candidate in Neurobiology. He received his B.S. in Computer Science with minors in Mathematics and Neuroscience from the University of Pittsburgh. He is interested in building mathematical models to summarize, explain, and predict phenomena observed in brains. Currently, he is developing models to explain how songbirds learn to sing. He previously worked with Dr. Nicolas Brunel on the roles of inhibition and inhibitory plasticity in the learning and memory of sequences. Outside of the lab he loves cooking and exercising, and occasionally plays the xiao (Chinese flute). -



" %} - {% - include person.html image="https://scholars.duke.edu/file/i6468953/image_6468953.JPG" - site="https://scholar.google.com/citations?user=Bc4NeD0AAAAJ" - github="https://github.com/DavidStA95" email="david.st-amand@duke.edu" - name="David St-Amand" title="Neurobiology Graduate Student" desc="David is a third-year graduate student in Neurobiology. He graduated from McGill University, where he received his BS in Psychology and his MS in Neuroscience. His research investigates how efficient coding models can help us understand how information is computed in the retina. Outside the lab, he likes to work out and play games. -



" %} - {% - include person.html image="https://dibs-web01.vm.duke.edu/pearson/assets/images/website/shiyang.jpg" - github="https://github.com/WaAaaAterfall" email="shiyang.pan@duke.edu" - linkedin="https://www.linkedin.com/in/shiyangpan" - site="https://waaaaaterfall.github.io" - name="Shiyang Pan" title="Electrical and Computer Engineering PhD Student" desc=" Shiyang is a graduate student in Electrical & Computer Engineering at Duke. - She received her Bsc in Applied Mathematics from University of Liverpool and Xi'an Jiaotong-Liverpool University. - Her research focuses on developing computational models and real-time methods to understand the activity of large-scale neural populations. - When not doing research she enjoys music, reading, and creative writing. -



" %} - {% - include person.html image="images/caitlin.jpg" - github="https://github.com/clewis7" email="caitlin.lewis@duke.edu" - linkedin="https://www.linkedin.com/in/caitlinllewis/" - name="Caitlin Lewis" - title="Electrical & Computer Engineering PhD Student" - desc="Caitlin is a second-year graduate student in Electrical & Computer Engineering at Duke. She received her B.S. in Computer Science and Statistics from the University of North Carolina at Chapel Hill. - Her research focuses on developing computational models and open-source software tools for real-time analysis and visualization of large-scale neural data. - Outside of the lab, she enjoys reading and playing pickleball with friends. -



" %} - -
- - -

Undergraduate Students:

-
-
- - -
-
-

Former members

-

Postdocs:

- -

Graduate Students:

- -

Research Associates:

- -

Undergraduates:

- - +--- +layout: default +title: Lab Members +desc: Meet our group +nav: People +--- + + + +

Principal Investigator:

+
+
+ {% include person.html + image="https://dibs-web01.vm.duke.edu/pearson/assets/images/website/john.png" + site="http://jmxpearson.com" github="https://github.com/jmxpearson" + email="john.pearson@duke.edu" + linkedin="https://www.linkedin.com/in/jmxpearson" name="John Pearson" + desc="John earned his bachelor's degree in physics and math from the + University of Kentucky and his PhD in physics from Princeton. He became a + neuroscientist at Duke, where he did his postdoctoral training with Michael + Platt, working on the neurobiology of reward and decision-making. From 2015 to + 2018, he was an Assistant Research Professor in the Duke Institute for Brain + Sciences. In 2018, he moved back to the School of Medicine as an Assistant + Professor in the Department of Biostatistics & Bioinformatics, and in 2022, he moved to the + Department of Neurobiology, where he was promoted to Associate Professor in 2025. In addition, he maintains secondary + appointments in the departments of Biostatics & Bioinformatics, Psychology & Neuroscience, and Electrical + and Computer Engineering. (cv)



" %} +
+ + +

Graduate Students:

+
+
+ {% + include person.html image="https://dibs-web01.vm.duke.edu/pearson/assets/images/website/Trevor.jpg" + github="https://github.com/Trevorules" email="trevor.alston@duke.edu" + linkedin="www.linkedin.com/in/trevor-alston" name="Trevor Alston" desc="Trevor is a sixth-year Neurobiology graduate student at Duke University. + He holds two Biomedical Engineering Degrees; he received his BS degree from Rutgers University and his MS degree from Northwestern University. + His research interest is in Neuroengineering and brain-related technologies. Outside of the lab he likes hanging out with friends, playing games, and watching TV. +







" %} + {% + include person.html image="images/miles.jpg" + github="https://github.com/mdmarti" email="miles.martinez@duke.edu" + linkedin="https://www.linkedin.com/in/miles-martinez-00a496123/" + name="Miles Martinez" title="Electrical and Computer Engineering PhD Candidate" desc="Miles is a fifth-year graduate student in Electrical and Computer Engineering. + He entered Duke through the CNAP program. He currently develops and applies computational methods to + understand the neural bases of natural learning. When not doing + research, he loves getting outside and hiking or rock climbing - unless it's + too cold, in which case you can find him indoors cooking or playing games. +



" %} + {% + include person.html image="images/gong.png" + site="https://scholar.google.com/citations?user=zOKHGPgAAAAJ" + github="https://github.com/gongziyida" email="ziyi.gong@duke.edu" + name="Ziyi Gong" title="Neurobiology Graduate Student" desc="Ziyi is a fourth-year PhD candidate in Neurobiology. He received his B.S. in Computer Science with minors in Mathematics and Neuroscience from the University of Pittsburgh. He is interested in building mathematical models to summarize, explain, and predict phenomena observed in brains. Currently, he is developing models to explain how songbirds learn to sing. He previously worked with Dr. Nicolas Brunel on the roles of inhibition and inhibitory plasticity in the learning and memory of sequences. Outside of the lab he loves cooking and exercising, and occasionally plays the xiao (Chinese flute). +



" %} + {% + include person.html image="https://scholars.duke.edu/file/i6468953/image_6468953.JPG" + site="https://scholar.google.com/citations?user=Bc4NeD0AAAAJ" + github="https://github.com/DavidStA95" email="david.st-amand@duke.edu" + name="David St-Amand" title="Neurobiology Graduate Student" desc="David is a third-year graduate student in Neurobiology. He graduated from McGill University, where he received his BS in Psychology and his MS in Neuroscience. His research investigates how efficient coding models can help us understand how information is computed in the retina. Outside the lab, he likes to work out and play games. +



" %} + {% + include person.html image="https://dibs-web01.vm.duke.edu/pearson/assets/images/website/shiyang.jpg" + github="https://github.com/WaAaaAterfall" email="shiyang.pan@duke.edu" + linkedin="https://www.linkedin.com/in/shiyangpan" + site="https://waaaaaterfall.github.io" + name="Shiyang Pan" title="Electrical and Computer Engineering PhD Student" desc=" Shiyang is a graduate student in Electrical & Computer Engineering at Duke. + She received her Bsc in Applied Mathematics from University of Liverpool and Xi'an Jiaotong-Liverpool University. + Her research focuses on developing computational models and real-time methods to understand the activity of large-scale neural populations. + When not doing research she enjoys music, reading, and creative writing. +



" %} + {% + include person.html image="images/caitlin.jpg" + github="https://github.com/clewis7" email="caitlin.lewis@duke.edu" + linkedin="https://www.linkedin.com/in/caitlinllewis/" + name="Caitlin Lewis" + title="Electrical & Computer Engineering PhD Student" + desc="Caitlin is a second-year graduate student in Electrical & Computer Engineering at Duke. She received her B.S. in Computer Science and Statistics from the University of North Carolina at Chapel Hill. + Her research focuses on developing computational models and open-source software tools for real-time analysis and visualization of large-scale neural data. + Outside of the lab, she enjoys reading and playing pickleball with friends. +



" %} + +
+ + +

Undergraduate Students:

+
+
+ + +
+
+

Former members

+

Postdocs:

+ +

Graduate Students:

+ +

Research Associates:

+ +

Undergraduates:

+ diff --git a/publications.html b/publications.html index 53e342c..6dd14e2 100644 --- a/publications.html +++ b/publications.html @@ -12,6 +12,6 @@ h3 li:not(:last-child) { margin-bottom: 0.75em; } - + {% include pubs.html %} diff --git a/research.md b/research.md index 3b68367..5dec431 100644 --- a/research.md +++ b/research.md @@ -7,7 +7,7 @@ nav: Research # Real-time analysis of neural data -Together with [Eva Naumann's](https://www.neuro.duke.edu/research/faculty-labs/naumann-lab) lab, we've developed *[improv](https://github.com/pearsonlab/improv)* ([paper](https://www.biorxiv.org/content/10.1101/2021.02.22.432006v1)), a software platform for designing and orchestrating adaptive experiments. By analyzing data in real time, we can measure, model, and manipulate neural activity in response to new data. We've shown how these tools, in conjunction with holographic photostimulation, could in principle map functional connectivity of large circuits in a few hours ([paper](https://proceedings.nips.cc/paper/2020/file/531d29a813ef9471aad0a5558d449a73-Paper.pdf), [expanded version](https://arxiv.org/abs/2007.13911)). More recently, we've worked on methods for fast dimensionality reduction and modeling of neural populations in real time ([paper](https://arxiv.org/abs/2108.13941)). +Together with [Eva Naumann's](https://www.neuro.duke.edu/research/faculty-labs/naumann-lab) lab, we've developed *[improv](https://github.com/project-improv/improv)* ([paper](https://www.biorxiv.org/content/10.1101/2021.02.22.432006v1)), a software platform for designing and orchestrating adaptive experiments. By analyzing data in real time, we can measure, model, and manipulate neural activity in response to new data. We've shown how these tools, in conjunction with holographic photostimulation, could in principle map functional connectivity of large circuits in a few hours ([paper](https://proceedings.nips.cc/paper/2020/file/531d29a813ef9471aad0a5558d449a73-Paper.pdf), [expanded version](https://arxiv.org/abs/2007.13911)). More recently, we've worked on methods for fast dimensionality reduction and modeling of neural populations in real time ([paper](https://arxiv.org/abs/2108.13941)).
@@ -39,13 +39,15 @@ Vocalization is a complex behavior that underlies vocal communication and vocal
# Efficient coding in the retina + How does the retina, which receives roughly one gigabit per second of visual information, compress that into something small enough to transmit down an optic nerve with a capacity of one megabit per second — three orders of magnitude lower? One answer, proposed by Horace Barlow half a century ago, is that the nervous system attempts to minimize redundancy, maximizing mutual information between the world and the brain's representation of it while minimizing metabolic costs. This theory makes a number of testable predictions, including the well-known fact that retinal ganglion cells should be active only in response to either increases or decreases in light levels at within small regions of visual space — their receptive fields. -Working together with [Greg Field's lab](https://www.neuro.duke.edu/research/faculty-labs/field-lab), we've shown that patterns of alignment between different collections of receptive fields can also be explained using efficient coding theory. This was based on findings from Field lab ([paper](https://www.nature.com/articles/s41586-021-03317-5)), which led to surprising further theoretical results ([paper](https://www.nature.com/articles/s41586-021-03317-5)). In short, the most information-efficient receptive field arrangements are determined both by levels of noise in the system and the statistics of natural images. +Working together with [Greg Field's lab](https://www.neuro.duke.edu/research/faculty-labs/field-lab), we've shown that patterns of alignment between different collections of receptive fields can also be explained using efficient coding theory. This was based on findings from Field lab ([paper](https://www.nature.com/articles/s41586-021-03317-5)), which led to surprising further theoretical results ([paper](https://www.nature.com/articles/s41586-021-03317-5)). In short, the most information-efficient receptive field arrangements are determined both by levels of noise in the system and the statistics of natural images. Most recently, we've looked at what happens to mosaics as the number of neurons available for coding changes. There, [we found](https://www.biorxiv.org/content/10.1101/2022.08.29.505726v2) that greater numbers of available neurons leads to greater diversity in functionally defined cell types, starting with small temporally smoothing receptive fields and progressing toward larger temporally "differentiating" receptive fields. # Autoencoding whole-brain dynamics + Brain functional magnetic imaging data (fMRI) is one of the most popular modalities in human and clinical neuroscience as it allows researchers to investigate relationships between high-level cognitive functions, brain activity patterns and experimental variables of interest. Traditional fMRI analysis methods utilize a mass univariate approach, wherein a General Linear Model (GLM) is fit to each small volume pixel ("voxel") independently and researchers correct for an inflated false positive rate post hoc. This method has been widely adopted due to its simplicity and ability to produce separate spatial brain maps, capturing the inferred effects of experimental variables on brain-wide activity. However, it fails to account for the rich spatial and temporal information inherent to this modality. In recent work, we've explored the idea of using variational autoencoder (VAE) methods nested inside a Generalized Additive Modeling (GAM) framework to model entire brain volumes together ([paper](https://static1.squarespace.com/static/59d5ac1780bd5ef9c396eda6/t/61080b1bcadb042a79974faf)). This approach better accounts for the spatial dependencies of fMRI data and generates separate, interpretable spatial maps capturing the inferred effects of experimental variables on whole-brain dynamics. In collaboration with [Kevin LaBar's lab](http://www.labarlab.com), we're working to expand on this work with the goal of characterizing brain spatio-temporal dynamics underlying transitions between emotional states in health and in disease. @@ -53,7 +55,7 @@ In recent work, we've explored the idea of using variational autoencoder (VAE) m
- A) VAE-GAM Model Schematic: brain volumes with signal of interest are compressed to a lower dimensional representation using encoder network. Sampled latents are then fed through decoder network to yield a base map and separate spatial effect maps. Each effect map is scaled by a potentially non-linear gain modelled using a Gaussian Process. Variance is modeled separately on a per voxel basis. B) Sample Effect Maps for VAE-GAM and GLM: effect maps for a visual stimulation task dataset analysed using the proposed VAE-GAM approach (top) vs. the tranditional (GLM) approach. + A) VAE-GAM Model Schematic: brain volumes with signal of interest are compressed to a lower dimensional representation using encoder network. Sampled latents are then fed through decoder network to yield a base map and separate spatial effect maps. Each effect map is scaled by a potentially non-linear gain modelled using a Gaussian Process. Variance is modeled separately on a per voxel basis. B) Sample Effect Maps for VAE-GAM and GLM: effect maps for a visual stimulation task dataset analysed using the proposed VAE-GAM approach (top) vs. the tranditional (GLM) approach.
diff --git a/scholar_scraper.py b/scholar_scraper.py index 554e44a..09d4a0c 100644 --- a/scholar_scraper.py +++ b/scholar_scraper.py @@ -20,7 +20,7 @@ def create_id_from_publication(first_author_last, year, title): skip_words = {'the', 'a', 'an', 'in', 'on', 'at', 'of', 'for', 'to', 'and', 'or'} title_words = re.findall(r'\w+', title.lower()) first_word = next((word for word in title_words if word not in skip_words), title_words[0] if title_words else 'paper') - + # Clean and combine pub_id = f"{first_author_last.lower()}{year}{first_word}" # Remove any non-alphanumeric characters @@ -33,16 +33,16 @@ def parse_authors(author_string): """ if not author_string: return [] - + authors = [] # Split by 'and' or commas author_list = re.split(r'\s+and\s+|,\s*(?![^,]*,)', author_string) - + for author in author_list: author = author.strip() if not author: continue - + # Try to split into given and family names parts = author.split() if len(parts) >= 2: @@ -57,7 +57,7 @@ def parse_authors(author_string): 'family': author, 'given': '' }) - + return authors def extract_journal_from_citation(citation): @@ -68,23 +68,23 @@ def extract_journal_from_citation(citation): """ if not citation: return None - + # Split by comma to get the first part (journal name) parts = citation.split(',') - + if parts: journal = parts[0].strip() - + # Clean up common artifacts - remove trailing volume numbers journal = re.sub(r'\s+\d+\s*$', '', journal) - + # Remove year at the end if present journal = re.sub(r'\s+\d{4}\s*$', '', journal) - + # Check if reasonable journal name if len(journal) > 3 and not re.match(r'^\d+$', journal): return journal - + return None def get_author_publications(scholar_id): @@ -92,7 +92,7 @@ def get_author_publications(scholar_id): Fetch publications from Google Scholar for a given author ID """ print(f"Fetching publications for scholar ID: {scholar_id}", flush=True) - + # Set up a proxy generator to avoid rate limiting try: print("Setting up proxy to avoid rate limiting...", flush=True) @@ -103,28 +103,28 @@ def get_author_publications(scholar_id): except Exception as e: print(f"Warning: Could not set up proxy: {e}", flush=True) print("Continuing without proxy (may be slower)...", flush=True) - + try: # Search for author by ID print("Searching for author...", flush=True) author = scholarly.search_author_id(scholar_id) print("Filling author publications...", flush=True) author = scholarly.fill(author, sections=['publications']) - + publications = [] total_pubs = len(author['publications']) print(f"Found {total_pubs} publications to process", flush=True) - + for idx, pub in enumerate(author['publications'], 1): try: print(f"Processing publication {idx}/{total_pubs}...", flush=True) # Fill in publication details filled_pub = scholarly.fill(pub) bib = filled_pub['bib'] - + # Parse authors authors = parse_authors(bib.get('author', '')) - + # Get year year = None if bib.get('pub_year'): @@ -132,12 +132,12 @@ def get_author_publications(scholar_id): year = int(bib['pub_year']) except (ValueError, TypeError): pass - + # Create ID first_author_last = authors[0]['family'] if authors else 'unknown' title = bib.get('title', 'untitled') pub_id = create_id_from_publication(first_author_last, year or 0, title) - + # Build publication entry in CSL format pub_data = { 'id': pub_id, @@ -146,46 +146,46 @@ def get_author_publications(scholar_id): 'issued': [{'year': year}] if year else [], 'title': bib.get('title', ''), } - + # Add optional fields if they exist # Try multiple possible fields for journal/venue - container_title = (bib.get('journal') or - bib.get('venue') or + container_title = (bib.get('journal') or + bib.get('venue') or bib.get('conference') or bib.get('booktitle')) - + # If still no journal, try parsing from citation string if not container_title and bib.get('citation'): container_title = extract_journal_from_citation(bib['citation']) - + if container_title: pub_data['container-title'] = container_title - + if bib.get('publisher'): pub_data['publisher'] = bib['publisher'] - + if bib.get('pages'): pub_data['page'] = bib['pages'] - + if bib.get('volume'): pub_data['volume'] = str(bib['volume']) - + if bib.get('number') or bib.get('issue'): pub_data['issue'] = str(bib.get('number') or bib.get('issue')) - + # Add URL if available if filled_pub.get('pub_url'): pub_data['URL'] = filled_pub['pub_url'] - + publications.append(pub_data) print(f" - Added: {pub_id}", flush=True) - + except Exception as e: print(f" - Error processing publication: {e}", flush=True) continue - + return publications - + except Exception as e: print(f"Error fetching author publications: {e}", flush=True) sys.exit(1) @@ -199,18 +199,18 @@ def save_to_yaml(publications, output_file): -(x['issued'][0]['year'] if x.get('issued') and x['issued'] else 0), x.get('id', '') )) - + with open(output_file, 'w', encoding='utf-8') as f: - yaml.dump(publications, f, default_flow_style=False, allow_unicode=True, + yaml.dump(publications, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=1000, indent=2) - + print(f"\nSuccessfully wrote {len(publications)} publications to {output_file}", flush=True) if __name__ == "__main__": # Configuration SCHOLAR_ID = "4whjDosAAAAJ" OUTPUT_FILE = "_data/publications.yaml" - + print("Starting publication update...", flush=True) publications = get_author_publications(SCHOLAR_ID) save_to_yaml(publications, OUTPUT_FILE) diff --git a/scripts/check_image_size.py b/scripts/check_image_size.py new file mode 100755 index 0000000..021752f --- /dev/null +++ b/scripts/check_image_size.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +"""Block oversized images. Runs after the compression hooks. + +>1 MB blocks the commit. >500 KB warns. Paths in .image-size-overrides +are exempt from blocking (still warned). +""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +WARN_BYTES = 500_000 +BLOCK_BYTES = 1_000_000 +OVERRIDE_FILE = Path(".image-size-overrides") + + +def load_overrides() -> set[str]: + if not OVERRIDE_FILE.exists(): + return set() + paths: set[str] = set() + for raw in OVERRIDE_FILE.read_text().splitlines(): + line = raw.split("#", 1)[0].strip() + if line: + paths.add(line) + return paths + + +def humanize(n: int) -> str: + if n >= 1_000_000: + return f"{n / 1_000_000:.1f} MB" + return f"{n / 1_000:.0f} KB" + + +def main(argv: list[str]) -> int: + overrides = load_overrides() + blocked: list[tuple[str, int]] = [] + warned: list[tuple[str, int]] = [] + for path in argv: + try: + size = os.path.getsize(path) + except OSError: + continue + if path in overrides: + if size > WARN_BYTES: + warned.append((path, size)) + continue + if size > BLOCK_BYTES: + blocked.append((path, size)) + elif size > WARN_BYTES: + warned.append((path, size)) + + for path, size in warned: + print( + f"warning: {path} is {humanize(size)} (>500 KB). " + f"Consider downscaling.", + file=sys.stderr, + ) + + for path, size in blocked: + print( + f"error: {path} is {humanize(size)} (>1 MB after compression). " + f"Downscale the image, or add the path to .image-size-overrides " + f"if the size is justified.", + file=sys.stderr, + ) + + return 1 if blocked else 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:]))