-
-
Notifications
You must be signed in to change notification settings - Fork 30
60 lines (59 loc) · 2.29 KB
/
linkcheck.yml
File metadata and controls
60 lines (59 loc) · 2.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
name: Link Checker [Anaconda, Linux]
on:
schedule:
# UTC 23:00 is early morning in Australia (9am)
- cron: '0 23 * * 1'
workflow_dispatch:
jobs:
link-checking:
name: QuantEcon AI link checking
runs-on: "ubuntu-latest"
permissions:
issues: write # required for QuantEcon link-checker
steps:
# Download the latest release HTML archive (permanent, not subject to artifact expiry)
- name: Get latest release asset URL
id: release
env:
GH_TOKEN: ${{ github.token }}
run: |
ASSET_URL=$(gh api repos/${{ github.repository }}/releases/latest \
--jq '.assets[] | select(.name | endswith(".tar.gz")) | .browser_download_url')
echo "asset-url=$ASSET_URL" >> $GITHUB_OUTPUT
- name: Download and extract release HTML
run: |
mkdir -p _site
curl -sL "${{ steps.release.outputs.asset-url }}" | tar -xz -C _site
- name: Pre-process HTML to exclude known bot-blocking domains
env:
# Domains that block automated crawlers (space-separated).
# These are also listed in linkcheck_ignore in lectures/_config.yml.
SKIP_DOMAINS: "fred.stlouisfed.org"
run: |
python3 - <<'PYEOF'
import os
import re
from pathlib import Path
skip_domains = os.environ.get('SKIP_DOMAINS', '').split()
count = 0
for html_file in Path('_site').rglob('*.html'):
try:
content = html_file.read_text(encoding='utf-8')
except UnicodeDecodeError as exc:
print(f"Skipping {html_file}: encoding error - {exc}")
continue
for domain in skip_domains:
pattern = r'href="https?://' + re.escape(domain) + r'[^"]*"'
content = re.sub(pattern, 'href="#"', content)
html_file.write_text(content, encoding='utf-8')
count += 1
print(f"Pre-processing complete ({count} files processed)")
PYEOF
- name: AI-Powered Link Checker
uses: QuantEcon/action-link-checker@main
with:
html-path: '_site'
fail-on-broken: 'false'
silent-codes: '403,503'
ai-suggestions: 'true'
create-issue: 'true'