Data Processing #427
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Data Processing | |
| # FORRT Data Processing Workflow | |
| # | |
| # Purpose: Automated data fetching and processing for FORRT website content | |
| # | |
| # Triggers: | |
| # - Weekly on Sundays at midnight UTC (scheduled) | |
| # - Manual trigger via GitHub Actions UI (workflow_dispatch) | |
| # | |
| # Data Sources Processed: | |
| # 1. Curated Resources (Python script) | |
| # 3. Google Analytics data (Python script) | |
| # 4. Contributor analysis (R script) - Monthly only | |
| # | |
| # Outputs: | |
| # - Updated JSON data files in data/ directory | |
| # - Static copies in static/data/ for client-side access | |
| # - Automated PRs for contributor analysis (monthly) | |
| # | |
| # The processed data is used throughout the Hugo website for dynamic content. | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' # Daily at Midnight UTC | |
| workflow_dispatch: | |
| inputs: | |
| skip_deploy: | |
| description: 'Skip triggering deploy after processing' | |
| required: false | |
| type: boolean | |
| default: false | |
| regenerate_glossary: | |
| description: 'Regenerate glossary files (only use when glossary sources are stable)' | |
| required: false | |
| type: boolean | |
| default: false | |
| regenerate_ga: | |
| description: 'Regenerate Google Analytics data' | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| process-data: | |
| name: Process Data | |
| runs-on: ubuntu-22.04 | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| env: | |
| PYTHON_VERSION: "3.11" | |
| steps: | |
| #================ | |
| # Repository Setup | |
| #================ | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| # Checkout the repository code to the runner environment | |
| #====================== | |
| # Workflow Configuration | |
| #====================== | |
| # Check if this is a monthly run (1st of month or manual trigger) | |
| #========================================================== | |
| - name: Check if monthly run | |
| id: monthly-run | |
| run: | | |
| CURRENT_DAY=$(date +%d) | |
| if [ "$CURRENT_DAY" != "01" ] && [ "${{ github.event_name }}" != "workflow_dispatch" ]; then | |
| echo "is_monthly=false" >> $GITHUB_OUTPUT | |
| echo "ℹ️ Skipping contributor analysis (not 1st of month and not manual trigger)" | |
| else | |
| echo "is_monthly=true" >> $GITHUB_OUTPUT | |
| echo "🔄 Monthly run detected - will run contributor analysis" | |
| fi | |
| #================= | |
| # Environment Setup | |
| #================= | |
| #======================================== | |
| # Configure Git with identity for commits | |
| #======================================== | |
| - name: Configure Git | |
| run: | | |
| git config --global user.email "mudaherarich@gmail.com" | |
| git config --global user.name "richarddushime" | |
| # Configure Git with the identity that will be used for commits for the monthly run | |
| #======================================== | |
| # Install Python 3.11 for running scripts | |
| #======================================== | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| #======================= | |
| # Tenzing Data Processing | |
| #======================= | |
| #======================================== | |
| # Install Python packages for data processing scripts | |
| #======================================== | |
| - name: Install Python dependencies | |
| run: python3 -m pip install -r ./requirements.txt | |
| #======================================== | |
| # Process contributor data using Tenzing script | |
| # Must run before Contributor Analysis, which reads contributors_cache.csv | |
| #======================================== | |
| - name: Run Tenzing script | |
| id: tenzing-script | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 scripts/forrt_contribs/tenzing.py | |
| env: | |
| GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }} | |
| #======================================== | |
| # Check for Tenzing failures and create issue if needed | |
| #======================================== | |
| - name: Check Tenzing failures and create issue | |
| if: always() # Run even if previous step failed | |
| continue-on-error: true # Don't fail the workflow if issue creation fails | |
| run: python3 scripts/forrt_contribs/create_failure_issue.py | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| #============================== | |
| # Contributor Analysis (Monthly) | |
| #============================== | |
| #======================================== | |
| # Setup r2u for fast R package installation | |
| #======================================== | |
| - name: Setup r2u | |
| if: steps.monthly-run.outputs.is_monthly == 'true' | |
| uses: eddelbuettel/github-actions/r2u-setup@master | |
| #======================================== | |
| # Install Pandoc for rendering R Markdown documents | |
| #======================================== | |
| - name: Setup Pandoc | |
| if: steps.monthly-run.outputs.is_monthly == 'true' | |
| uses: r-lib/actions/setup-pandoc@v2 | |
| #======================================== | |
| # Install R packages for contributor analysis and visualization | |
| #======================================== | |
| - name: Install R dependencies | |
| if: steps.monthly-run.outputs.is_monthly == 'true' | |
| run: Rscript -e 'install.packages(c("rmarkdown", "ggplot2", "dplyr", "tidyr", "readr", "googlesheets4", "stringr", "here", "sysfonts", "showtext", "treemapify", "igraph", "visNetwork"))' | |
| #======================================== | |
| # Generate contributor analysis reports and network visualizations | |
| # Reads from contributors_cache.csv generated by Tenzing script above | |
| #======================================== | |
| - name: Run Contributor Analysis | |
| id: contributor-analysis | |
| if: steps.monthly-run.outputs.is_monthly == 'true' | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| echo "🚀 Running Contributor Analysis..." | |
| # Clean old files from content/contributor-analysis and partials | |
| rm -rf content/contributor-analysis/*.png content/contributor-analysis/*.html content/contributor-analysis/htmlwidgets_libs | |
| rm -f static/partials/network-graph.html | |
| # Run index.Rmd to generate contributor analysis content and plots | |
| echo "📊 Rendering contributor analysis..." | |
| Rscript -e "rmarkdown::render('content/contributor-analysis/index.Rmd')" | |
| # Run network-graph.Rmd to generate interactive network visualization | |
| echo "🕸️ Rendering network visualization..." | |
| Rscript -e "rmarkdown::render('content/contributor-analysis/network-graph.Rmd')" | |
| # Move generated HTML file to static/partials (served via iframe) | |
| echo "📁 Moving network graph to static/partials..." | |
| mv content/contributor-analysis/network-graph.html static/partials/ | |
| #======================================== | |
| # Setup Node.js for bibliography processing | |
| #======================================== | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '18' | |
| cache: 'npm' | |
| cache-dependency-path: bibtex_to_apa/package-lock.json | |
| #======================================== | |
| # Install Node.js dependencies for bibliography processing | |
| #======================================== | |
| - name: Install Node.js dependencies | |
| run: | | |
| cd bibtex_to_apa | |
| npm install | |
| #======================================== | |
| # Process contributor data using Tenzing script | |
| #======================================== | |
| - name: Run Tenzing script | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 scripts/forrt_contribs/tenzing.py | |
| env: | |
| GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }} | |
| #======================================== | |
| # Process and organize curated resources data | |
| #======================================== | |
| - name: Run Curated Resources script | |
| id: curated-resources | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 content/resources/resource.py | |
| # Execute the curated resources script that processes and organizes resource data | |
| #======================================== | |
| # Move Tenzing output to content directory and validate | |
| #======================================== | |
| - name: Move and validate Tenzing output | |
| id: tenzing-output | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| mv scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md | |
| if [ ! -f content/contributors/tenzing.md ]; then | |
| echo "tenzing.md not found" | |
| exit 1 | |
| fi | |
| #======================================== | |
| # Validate that curated resources files available under content/curated_resources | |
| #======================================== | |
| - name: Validate curated resources | |
| id: validate-resources | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| for file in content/curated_resources/*; do | |
| if [ ! -f "$file" ]; then | |
| echo "Non-markdown file found: $file" | |
| exit 1 | |
| fi | |
| done | |
| #======================================== | |
| # Generate APA lookup from bibliography | |
| #======================================== | |
| - name: Generate APA lookup | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| cd bibtex_to_apa | |
| node bibtex_to_apa.js -o '../content/glossary/apa_lookup.json' | |
| #======================================== | |
| # Process and generate glossary files | |
| #======================================== | |
| - name: Run Glossary Generation script | |
| id: glossary-generation | |
| if: github.event.inputs.regenerate_glossary == 'true' | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 content/glossary/_create_glossaries.py | |
| # Execute the glossary script that generates glossary markdown files | |
| - name: Check for missing references | |
| if: always() | |
| run: | | |
| if [ -f "content/glossary/missing_references.txt" ]; then | |
| echo "Missing references found:" | |
| cat content/glossary/missing_references.txt | |
| # Optionally fail the workflow or create an issue | |
| else | |
| echo "All references resolved successfully" | |
| fi | |
| #======================================================= | |
| # Create a pull request for glossary updates when regenerated | |
| #======================================================= | |
| - name: Create PR for glossary updates | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.regenerate_glossary == 'true' | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| echo "=== Creating PR for glossary updates ===" | |
| if [ -z "$(git status --porcelain -- content/glossary)" ]; then | |
| echo "ℹ️ No glossary changes detected" | |
| exit 0 | |
| fi | |
| BRANCH_NAME="glossary-update-$(date +%Y%m%d-%H%M%S)" | |
| git checkout -b "$BRANCH_NAME" | |
| git add content/glossary/ | |
| git commit -m "Update glossary entries - $(date -u +'%Y-%m-%d %H:%M:%S UTC')" | |
| git push -u origin "$BRANCH_NAME" | |
| gh pr create \ | |
| --title "📘 Glossary update - $(date '+%Y-%m-%d')" \ | |
| --body "Automated glossary update generated on $(date -u +'%Y-%m-%d %H:%M:%S UTC'). Files changed: content/glossary/" \ | |
| --base master \ | |
| --head "$BRANCH_NAME" | |
| echo "✅ PR created for glossary updates" | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.FORRT_PAT }} | |
| GH_TOKEN: ${{ secrets.FORRT_PAT }} | |
| #======================================== | |
| # Download Google Analytics data and validate | |
| #======================================== | |
| - name: Download GA Data | |
| id: ga-data | |
| continue-on-error: true # Continue even if this step fails | |
| if: | | |
| github.event_name == 'schedule' || | |
| github.event.inputs.regenerate_ga == 'true' | |
| env: | |
| GA_API_CREDENTIALS: ${{ secrets.GA_API_CREDENTIALS }} | |
| GA_PROPERTY_ID: ${{ secrets.GA_PROPERTY_ID }} | |
| run: | | |
| if [ -z "$GA_API_CREDENTIALS" ] || [ -z "$GA_PROPERTY_ID" ]; then | |
| echo "❌ GA credentials not set" | |
| exit 1 | |
| fi | |
| rm -f data/ga_data.json | |
| rm -rf data/ga_data/ | |
| python scripts/download_ga_data.py | |
| if [ -f "data/ga_data.json" ]; then | |
| echo "✅ GA data file created successfully" | |
| echo "File size: $(wc -c < data/ga_data.json) bytes" | |
| # Quick validation of data structure | |
| python3 -c "import json; data = json.load(open('data/ga_data.json')); print('✅ GA data:', len(data.get('regions', [])), 'countries,', len(data.get('top_pages', [])), 'pages')" | |
| else | |
| echo "❌ GA data file was not created" | |
| exit 1 | |
| fi | |
| #======================================== | |
| # Update FReD citation (Weekly or Manual) | |
| #======================================== | |
| - name: Update FReD Citation | |
| id: fred-citation | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| echo "=== Updating FReD citation ===" | |
| # Check if it's a Sunday (day 0) OR manually triggered | |
| CURRENT_DAY_OF_WEEK=$(date +%u) | |
| if [ "$CURRENT_DAY_OF_WEEK" != "7" ] && [ "${{ github.event_name }}" != "workflow_dispatch" ]; then | |
| echo "ℹ️ Skipping FReD citation update (not Sunday and not manual trigger)" | |
| exit 0 | |
| fi | |
| echo "🔄 Running FReD citation update..." | |
| python3 scripts/update_fred_citation.py | |
| if [ -f "static/data/fred_citation.txt" ]; then | |
| echo "✅ FReD citation updated successfully" | |
| echo "Citation length: $(wc -c < static/data/fred_citation.txt) bytes" | |
| else | |
| echo "❌ FReD citation file was not created" | |
| exit 1 | |
| fi | |
| #======================================================= | |
| # Create a pull request for GA data updates on monthly runs | |
| #======================================================= | |
| - name: Create PR for GA data update | |
| if: github.event_name != 'pull_request' | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| echo "=== Creating PR for GA data update ===" | |
| # Check if it's the first day of the month OR manually triggered | |
| CURRENT_DAY=$(date +%d) | |
| if [ "$CURRENT_DAY" != "01" ] && [ "${{ github.event_name }}" != "workflow_dispatch" ]; then | |
| echo "ℹ️ Skipping PR creation (not 1st of month and not manual trigger)" | |
| exit 0 | |
| fi | |
| BRANCH_NAME="ga-data-update-$(date +%Y%m%d-%H%M%S)" | |
| git fetch origin master | |
| git checkout master | |
| # Delete local branch if it exists | |
| git branch -D "$BRANCH_NAME" 2>/dev/null || true | |
| git checkout -b "$BRANCH_NAME" | |
| # Verify we're on the correct branch | |
| CURRENT_BRANCH=$(git branch --show-current) | |
| if [ "$CURRENT_BRANCH" != "$BRANCH_NAME" ]; then | |
| echo "❌ Failed to create branch $BRANCH_NAME, currently on $CURRENT_BRANCH" | |
| exit 1 | |
| fi | |
| echo "✅ Created and switched to branch: $BRANCH_NAME" | |
| # Add and commit the GA data file | |
| echo "Adding GA data file..." | |
| git add data/ga_data.json | |
| git commit -m "Update GA data - $(date -u +'%Y-%m-%d %H:%M:%S UTC')" | |
| if ! git push origin "$BRANCH_NAME" --force-with-lease; then | |
| git push origin "$BRANCH_NAME" | |
| fi | |
| gh pr create \ | |
| --title "📊 Monthly GA Data Update - $(date '+%B %Y')" \ | |
| --body "Automated monthly Google Analytics data update. Generated on $(date -u +'%Y-%m-%d %H:%M:%S UTC'). Files changed: data/ga_data.json" \ | |
| --base master \ | |
| --head "$BRANCH_NAME" \ | |
| --label "ga-data,monthly-update" | |
| echo "✅ PR created for GA data update" | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.FORRT_PAT }} | |
| GH_TOKEN: ${{ secrets.FORRT_PAT }} | |
| #======================= | |
| # Google Scholar Citations | |
| #======================================== | |
| # Execute Google Scholar citation tracking script | |
| #======================================== | |
| - name: Run Google Scholar script | |
| id: google-scholar | |
| continue-on-error: true | |
| run: python3 scripts/gs-cite/google_scholar.py | |
| env: | |
| SERPAPI: ${{ secrets.SERPAPI }} | |
| #============== | |
| # Artifact Upload | |
| #============== | |
| #======================================== | |
| # Upload all processed data files as artifact | |
| #======================================== | |
| - name: Upload data artifact | |
| id: upload-artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: data-artifact | |
| path: | | |
| content/contributors/tenzing.md | |
| scripts/forrt_contribs/contributors_cache.csv | |
| content/curated_resources/ | |
| content/glossary/ | |
| data/ | |
| static/data/ | |
| static/partials/ | |
| content/contributor-analysis/ | |
| content/publications/citation_chart.webp | |
| retention-days: 7 | |
| #======================================== | |
| # Commit generated files to build-resources branch (via worktree) | |
| #======================================== | |
| - name: Commit to build-resources branch | |
| if: github.event_name != 'pull_request' | |
| continue-on-error: true | |
| run: | | |
| echo "📝 Committing generated files to build-resources branch via worktree..." | |
| set -e | |
| WORKTREE_DIR="/tmp/build-resources-worktree" | |
| # Store generated files in temp location | |
| mkdir -p /tmp/generated-resources | |
| cp -r content/curated_resources /tmp/generated-resources/ | |
| cp content/contributors/tenzing.md /tmp/generated-resources/ || true | |
| cp scripts/forrt_contribs/contributors_cache.csv /tmp/generated-resources/ || true | |
| cp data/ga_data.json /tmp/generated-resources/ga_data.json || true | |
| # Copy FReD citation if it exists | |
| if [ -f static/data/fred_citation.txt ]; then | |
| mkdir -p /tmp/generated-resources/static/data | |
| cp static/data/fred_citation.txt /tmp/generated-resources/static/data/fred_citation.txt | |
| fi | |
| # Copy additional generated files | |
| mkdir -p /tmp/generated-resources/contributor-analysis | |
| cp -r content/contributor-analysis/* /tmp/generated-resources/contributor-analysis/ || true | |
| mkdir -p /tmp/generated-resources/publications | |
| cp content/publications/citation_chart.webp /tmp/generated-resources/publications/ || true | |
| if [ -f data/summaries.json ]; then | |
| cp data/summaries.json /tmp/generated-resources/summaries.json | |
| fi | |
| if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then | |
| cp -r content/glossary /tmp/generated-resources/ | |
| fi | |
| # Prepare worktree for build-resources | |
| git fetch origin | |
| rm -rf "$WORKTREE_DIR" | |
| if git ls-remote --exit-code origin build-resources >/dev/null 2>&1; then | |
| echo "✓ build-resources branch exists, creating/updating worktree" | |
| git worktree add -B build-resources "$WORKTREE_DIR" origin/build-resources | |
| else | |
| echo "✓ build-resources does not exist, creating from master" | |
| git worktree add -b build-resources "$WORKTREE_DIR" origin/master | |
| fi | |
| # Apply updates inside the worktree | |
| pushd "$WORKTREE_DIR" | |
| # Ensure target directories exist | |
| mkdir -p content/curated_resources content/contributors data content/contributor-analysis content/publications static/data | |
| # Remove old generated resource files (but keep _index.md) | |
| find content/curated_resources -type f ! -name '_index.md' -delete 2>/dev/null || true | |
| # We also want to clean up old contributor analysis files to avoid stale data | |
| rm -rf content/contributor-analysis/* 2>/dev/null || true | |
| # Copy newly generated files | |
| cp -r /tmp/generated-resources/curated_resources/* content/curated_resources/ || true | |
| if [ -f /tmp/generated-resources/tenzing.md ]; then | |
| cp /tmp/generated-resources/tenzing.md content/contributors/ | |
| fi | |
| if [ -f /tmp/generated-resources/ga_data.json ]; then | |
| cp /tmp/generated-resources/ga_data.json data/ga_data.json | |
| fi | |
| if [ -f /tmp/generated-resources/static/data/fred_citation.txt ]; then | |
| cp /tmp/generated-resources/static/data/fred_citation.txt static/data/fred_citation.txt | |
| fi | |
| if [ -f /tmp/generated-resources/summaries.json ]; then | |
| cp /tmp/generated-resources/summaries.json data/summaries.json | |
| fi | |
| if [ -f /tmp/generated-resources/contributors_cache.csv ]; then | |
| mkdir -p scripts/forrt_contribs | |
| cp /tmp/generated-resources/contributors_cache.csv scripts/forrt_contribs/ | |
| fi | |
| # Copy contributor analysis and citation chart | |
| cp -r /tmp/generated-resources/contributor-analysis/* content/contributor-analysis/ || true | |
| if [ -f /tmp/generated-resources/publications/citation_chart.webp ]; then | |
| cp /tmp/generated-resources/publications/citation_chart.webp content/publications/ | |
| fi | |
| # Copy glossary files only if regenerated (preserving directory structure) | |
| if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ] && [ -d /tmp/generated-resources/glossary ]; then | |
| echo "✓ Updating glossary files in build-resources worktree" | |
| mkdir -p content/glossary | |
| find content/glossary -type f ! -name '_index.md' ! -name '_create_glossaries.py' -delete 2>/dev/null || true | |
| rsync -av --exclude='_index.md' --exclude='_create_glossaries.py' /tmp/generated-resources/glossary/ content/glossary/ | |
| fi | |
| # Check if there are any changes to commit | |
| if git diff --quiet && git diff --cached --quiet; then | |
| echo "ℹ️ No changes to commit" | |
| else | |
| echo "✓ Changes detected, committing..." | |
| # Add all potential files | |
| git add content/curated_resources/ 2>/dev/null || true | |
| git add content/contributors/tenzing.md 2>/dev/null || true | |
| git add scripts/forrt_contribs/contributors_cache.csv 2>/dev/null || true | |
| git add data/ga_data.json 2>/dev/null || true | |
| git add static/data/fred_citation.txt 2>/dev/null || true | |
| git add data/summaries.json 2>/dev/null || true | |
| git add content/contributor-analysis/ 2>/dev/null || true | |
| git add content/publications/citation_chart.webp 2>/dev/null || true | |
| if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then | |
| git add content/glossary/ 2>/dev/null || true | |
| fi | |
| git commit -m "Update generated resources and data - $(date -u +'%Y-%m-%d %H:%M:%S UTC')" || echo "Nothing to commit" | |
| # Push to build-resources with retry logic | |
| MAX_RETRIES=3 | |
| RETRY_COUNT=0 | |
| while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do | |
| # Using force-with-lease to be safer, but effectively force pushing since we are overwriting | |
| if git push -u origin build-resources --force-with-lease; then | |
| echo "✅ Successfully pushed to build-resources branch" | |
| break | |
| else | |
| RETRY_COUNT=$((RETRY_COUNT + 1)) | |
| if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then | |
| echo "⚠️ Push failed, retrying ($RETRY_COUNT/$MAX_RETRIES)..." | |
| sleep 2 | |
| git pull --rebase || true | |
| else | |
| echo "❌ Push failed after $MAX_RETRIES attempts" | |
| exit 1 | |
| fi | |
| fi | |
| done | |
| fi | |
| popd | |
| # Clean up worktree | |
| git worktree remove "$WORKTREE_DIR" --force || true | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.FORRT_PAT }} | |
| #================ | |
| # Trigger Deploy | |
| #================ | |
| #======================================== | |
| # Trigger the deploy workflow to publish updated data | |
| #======================================== | |
| - name: Trigger deployment | |
| if: github.event_name != 'pull_request' && inputs.skip_deploy != true | |
| run: | | |
| echo "🚀 Triggering deployment workflow..." | |
| curl -s -X POST \ | |
| -H "Authorization: Bearer ${{ secrets.FORRT_PAT }}" \ | |
| -H "Accept: application/vnd.github+json" \ | |
| "https://api.github.com/repos/${{ github.repository }}/dispatches" \ | |
| -d '{"event_type": "data-update"}' | |
| echo "✅ Deployment triggered successfully" | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| #==================== | |
| # Failure Reporting | |
| #==================== | |
| #======================================== | |
| # Check all step outcomes and create issues for any failures | |
| #======================================== | |
| - name: Report workflow step failures | |
| if: always() # Always run this step to catch any failures | |
| continue-on-error: true # Don't fail the workflow if issue creation fails | |
| run: | | |
| echo "🔍 Checking workflow step outcomes..." | |
| # Create workflow run URL | |
| WORKFLOW_RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # Track if any failures occurred | |
| FAILURES_FOUND=false | |
| # Check each critical step outcome | |
| # Note: We check steps that are critical for data processing | |
| # For conditional steps, we check both the condition AND the outcome | |
| # to avoid reporting failures for skipped steps | |
| # Contributor Analysis (only if it was supposed to run) | |
| if [ "${{ steps.monthly-run.outputs.is_monthly }}" == "true" ] && [ "${{ steps.contributor-analysis.outcome }}" == "failure" ]; then | |
| echo "❌ Contributor Analysis failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Contributor Analysis" "The contributor analysis step failed during execution." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for Contributor Analysis" | |
| fi | |
| # Tenzing Script | |
| if [ "${{ steps.tenzing-script.outcome }}" == "failure" ]; then | |
| echo "❌ Tenzing Script failed" | |
| FAILURES_FOUND=true | |
| # Don't create a duplicate issue since tenzing has its own issue creation | |
| echo "ℹ️ Tenzing has its own issue creation mechanism" | |
| fi | |
| # Curated Resources | |
| if [ "${{ steps.curated-resources.outcome }}" == "failure" ]; then | |
| echo "❌ Curated Resources script failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Curated Resources Processing" "The curated resources processing step failed during execution." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for Curated Resources" | |
| fi | |
| # Tenzing Output Validation | |
| if [ "${{ steps.tenzing-output.outcome }}" == "failure" ]; then | |
| echo "❌ Tenzing output validation failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Tenzing Output Validation" "The tenzing.md file validation failed. The file may not have been created or moved correctly." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for Tenzing Output" | |
| fi | |
| # Curated Resources Validation | |
| if [ "${{ steps.validate-resources.outcome }}" == "failure" ]; then | |
| echo "❌ Curated resources validation failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Curated Resources Validation" "The curated resources validation failed. Some files may be invalid or missing." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for Resources Validation" | |
| fi | |
| # Glossary Generation (only if it was supposed to run) | |
| if [ "${{ github.event.inputs.regenerate_glossary }}" == "true" ] && [ "${{ steps.glossary-generation.outcome }}" == "failure" ]; then | |
| echo "❌ Glossary generation failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Glossary Generation" "The glossary generation step failed during execution." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for Glossary Generation" | |
| fi | |
| # Google Analytics Data | |
| if [ "${{ steps.ga-data.outcome }}" == "failure" ]; then | |
| echo "❌ GA Data download failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Google Analytics Data Download" "The Google Analytics data download step failed. This may be due to authentication issues or API problems." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for GA Data" | |
| fi | |
| # FReD Citation | |
| if [ "${{ steps.fred-citation.outcome }}" == "failure" ]; then | |
| echo "❌ FReD citation update failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "FReD Citation Update" "The FReD citation update step failed during execution." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for FReD Citation" | |
| fi | |
| # Google Scholar | |
| if [ "${{ steps.google-scholar.outcome }}" == "failure" ]; then | |
| echo "❌ Google Scholar script failed" | |
| FAILURES_FOUND=true | |
| python3 scripts/create_workflow_failure_issue.py "Google Scholar Citation Tracking" "The Google Scholar citation tracking step failed during execution." "$WORKFLOW_RUN_URL" || echo "Failed to create issue for Google Scholar" | |
| fi | |
| if [ "$FAILURES_FOUND" == "false" ]; then | |
| echo "✅ All critical workflow steps completed successfully!" | |
| else | |
| echo "⚠️ Some workflow steps failed - issues have been created" | |
| fi | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |