From d285d5c386351ce83b8f63f405714cabf5f8bd1e Mon Sep 17 00:00:00 2001 From: Jammy2211 Date: Fri, 15 May 2026 10:02:42 +0100 Subject: [PATCH] ci: add live URL audit (weekly cron) + grandfather current broken URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add .url_check_allowlist.txt at repo root containing the 9 broken URLs the audit currently flags in HowToLens — mostly external paywalled / dead links and a few internal docs renames that need editorial fixes. The weekly cron job will only fail when a NEW broken URL appears that isn't in this file. Update .github/workflows/url_check.yml: - keep the existing offline regex guard (runs on every PR + push, now with ~15 additional bad patterns thanks to the matching PyAutoBuild extension) - add a url_check_live job that runs on schedule (Mon 04:00 UTC) and workflow_dispatch. On non-zero exit it opens or comments on a [url-check] New broken URLs detected tracking issue. - on a clean run, auto-closes any prior open [url-check] issue. Tool: PyAutoLabs/PyAutoBuild#87 (paired PR — must merge first for the extended regex patterns and the new live tool to be available). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/url_check.yml | 74 ++++++++++++++++++++++++++++++++- .url_check_allowlist.txt | 26 ++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 .url_check_allowlist.txt diff --git a/.github/workflows/url_check.yml b/.github/workflows/url_check.yml index 41ee620..cdd5fe4 100644 --- a/.github/workflows/url_check.yml +++ b/.github/workflows/url_check.yml @@ -4,9 +4,16 @@ on: push: branches: [main] pull_request: + schedule: + - cron: '0 4 * * 1' # Mondays 04:00 UTC + workflow_dispatch: + +permissions: + contents: read jobs: - url_check: + url_check_patterns: + name: Offline regex guard runs-on: ubuntu-latest steps: - name: Checkout repo @@ -21,3 +28,68 @@ jobs: path: PyAutoBuild - name: Run url_check.sh run: bash PyAutoBuild/autobuild/url_check.sh repo + + url_check_live: + name: Live HTTP audit (weekly) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + path: repo + - name: Checkout PyAutoBuild + uses: actions/checkout@v4 + with: + repository: PyAutoLabs/PyAutoBuild + ref: main + path: PyAutoBuild + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install requests + run: pip install --quiet requests + - name: Run live URL audit + id: audit + run: | + set +e + body=$(bash PyAutoBuild/autobuild/url_check_live.sh repo) + rc=$? + printf '%s\n' "$body" > /tmp/url_audit_body.md + echo "rc=$rc" >> "$GITHUB_OUTPUT" + cat /tmp/url_audit_body.md + exit 0 + - name: Open or update [url-check] tracking issue + if: steps.audit.outputs.rc != '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd repo + existing=$(gh issue list --search '"[url-check]"' --state open --json number --jq '.[0].number // empty') + if [ -n "$existing" ]; then + echo "Updating existing tracking issue #$existing" + gh issue comment "$existing" --body-file /tmp/url_audit_body.md + else + echo "Opening new tracking issue" + gh issue create \ + --title "[url-check] New broken URLs detected" \ + --body-file /tmp/url_audit_body.md \ + --label url-check 2>/dev/null \ + || gh issue create \ + --title "[url-check] New broken URLs detected" \ + --body-file /tmp/url_audit_body.md + fi + - name: Close stale tracking issue if audit is clean + if: steps.audit.outputs.rc == '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd repo + for n in $(gh issue list --search '"[url-check]"' --state open --json number --jq '.[].number'); do + echo "Closing now-clean tracking issue #$n" + gh issue close "$n" --comment "Weekly URL audit is clean — closing automatically." + done diff --git a/.url_check_allowlist.txt b/.url_check_allowlist.txt new file mode 100644 index 0000000..97dae87 --- /dev/null +++ b/.url_check_allowlist.txt @@ -0,0 +1,26 @@ +# Known broken URLs grandfathered for url_check_live.sh. +# Format: one URL per line. Lines starting with '#' and blank lines are ignored. +# Sample location follows each URL as a trailing comment. +# +# This file is referenced by .github/workflows/url_check.yml (weekly cron). +# url_check_live.sh fails CI when a URL is broken AND not in this file. +# To accept new breakage: append the URL here. To force a fix: leave it out. + +# Academic citations (paywall / DOI / journal) +https://academic.oup.com/mnras/article-abstract/478/4/4738/5001434?redirectedFrom=fulltext # CITATIONS.md:11 +https://academic.oup.com/mnras/article/452/3/2940/1749640 # CITATIONS.md:11 +https://academic.oup.com/mnras/article/488/1/1387/5526256 # CITATIONS.md:22 + +# Code of Conduct boilerplate +http://geekfeminism.wikia.com/wiki/Conference_anti-harassment/Policy # CODE_OF_CONDUCT.md:305 + +# Colab refs to notebooks no longer in HowTo/workspace +https://colab.research.google.com/github/PyAutoLabs/HowToLens/blob/main/notebooks/chapter_3_search_chaining/tutorial_4_complex_source.ipynb # scripts/chapter_3_search_chaining/README.md:16 +https://colab.research.google.com/github/PyAutoLabs/HowToLens/blob/main/notebooks/chapter_4_pixelizations/tutorial_11_adapt_regularization.py.ipynb # scripts/chapter_4_pixelizations/README.md:27 +https://colab.research.google.com/github/PyAutoLabs/HowToLens/blob/main/notebooks/chapter_4_pixelizations/tutorial_3_pixelizations.ipynb # scripts/chapter_4_pixelizations/README.md:11 + +# External dead / paywalled / departmental pages +http://www.ita.uni-heidelberg.de/~massimo/sub/Lectures/gl_all.pdf # README.md:66 + +# Internal image refs missing from main +https://raw.githubusercontent.com/PyAutoLabs/PyAutoLens/main/docs/overview/images/overview_1_lensing/schematic.jpg # scripts/chapter_1_introduction/tutorial_1_grids_and_galaxies.py:12