Data Processing #427

Workflow file for this run

.github/workflows/data-processing.yml at 3754324

	name: Data Processing

	# FORRT Data Processing Workflow
	#
	# Purpose: Automated data fetching and processing for FORRT website content
	#
	# Triggers:
	# - Weekly on Sundays at midnight UTC (scheduled)
	# - Manual trigger via GitHub Actions UI (workflow_dispatch)
	#
	# Data Sources Processed:
	# 1. Curated Resources (Python script)
	# 3. Google Analytics data (Python script)
	# 4. Contributor analysis (R script) - Monthly only
	#
	# Outputs:
	# - Updated JSON data files in data/ directory
	# - Static copies in static/data/ for client-side access
	# - Automated PRs for contributor analysis (monthly)
	#
	# The processed data is used throughout the Hugo website for dynamic content.

	on:
	schedule:
	- cron: '0 0 * * *' # Daily at Midnight UTC
	workflow_dispatch:
	inputs:
	skip_deploy:
	description: 'Skip triggering deploy after processing'
	required: false
	type: boolean
	default: false
	regenerate_glossary:
	description: 'Regenerate glossary files (only use when glossary sources are stable)'
	required: false
	type: boolean
	default: false
	regenerate_ga:
	description: 'Regenerate Google Analytics data'
	required: false
	type: boolean
	default: false
	jobs:
	process-data:
	name: Process Data
	runs-on: ubuntu-22.04
	permissions:
	contents: write
	pull-requests: write
	env:
	PYTHON_VERSION: "3.11"
	steps:

	#================
	# Repository Setup
	#================
	- name: Checkout repository
	uses: actions/checkout@v4
	# Checkout the repository code to the runner environment

	#======================
	# Workflow Configuration
	#======================
	# Check if this is a monthly run (1st of month or manual trigger)
	#==========================================================
	- name: Check if monthly run
	id: monthly-run
	run: \|
	CURRENT_DAY=$(date +%d)
	if [ "$CURRENT_DAY" != "01" ] && [ "${{ github.event_name }}" != "workflow_dispatch" ]; then
	echo "is_monthly=false" >> $GITHUB_OUTPUT
	echo "ℹ️ Skipping contributor analysis (not 1st of month and not manual trigger)"
	else
	echo "is_monthly=true" >> $GITHUB_OUTPUT
	echo "🔄 Monthly run detected - will run contributor analysis"
	fi

	#=================
	# Environment Setup
	#=================
	#========================================
	# Configure Git with identity for commits
	#========================================
	- name: Configure Git
	run: \|
	git config --global user.email "mudaherarich@gmail.com"
	git config --global user.name "richarddushime"
	# Configure Git with the identity that will be used for commits for the monthly run

	#========================================
	# Install Python 3.11 for running scripts
	#========================================
	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	cache: 'pip'

	#=======================
	# Tenzing Data Processing
	#=======================
	#========================================
	# Install Python packages for data processing scripts
	#========================================
	- name: Install Python dependencies
	run: python3 -m pip install -r ./requirements.txt


	#========================================
	# Process contributor data using Tenzing script
	# Must run before Contributor Analysis, which reads contributors_cache.csv
	#========================================
	- name: Run Tenzing script
	id: tenzing-script
	continue-on-error: true # Continue even if this step fails
	run: python3 scripts/forrt_contribs/tenzing.py
	env:
	GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}

	#========================================
	# Check for Tenzing failures and create issue if needed
	#========================================
	- name: Check Tenzing failures and create issue
	if: always() # Run even if previous step failed
	continue-on-error: true # Don't fail the workflow if issue creation fails
	run: python3 scripts/forrt_contribs/create_failure_issue.py
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	#==============================
	# Contributor Analysis (Monthly)
	#==============================
	#========================================
	# Setup r2u for fast R package installation
	#========================================
	- name: Setup r2u
	if: steps.monthly-run.outputs.is_monthly == 'true'
	uses: eddelbuettel/github-actions/r2u-setup@master

	#========================================
	# Install Pandoc for rendering R Markdown documents
	#========================================
	- name: Setup Pandoc
	if: steps.monthly-run.outputs.is_monthly == 'true'
	uses: r-lib/actions/setup-pandoc@v2

	#========================================
	# Install R packages for contributor analysis and visualization
	#========================================
	- name: Install R dependencies
	if: steps.monthly-run.outputs.is_monthly == 'true'
	run: Rscript -e 'install.packages(c("rmarkdown", "ggplot2", "dplyr", "tidyr", "readr", "googlesheets4", "stringr", "here", "sysfonts", "showtext", "treemapify", "igraph", "visNetwork"))'

	#========================================
	# Generate contributor analysis reports and network visualizations
	# Reads from contributors_cache.csv generated by Tenzing script above
	#========================================
	- name: Run Contributor Analysis
	id: contributor-analysis
	if: steps.monthly-run.outputs.is_monthly == 'true'
	continue-on-error: true # Continue even if this step fails
	run: \|
	echo "🚀 Running Contributor Analysis..."

	# Clean old files from content/contributor-analysis and partials
	rm -rf content/contributor-analysis/.png content/contributor-analysis/.html content/contributor-analysis/htmlwidgets_libs
	rm -f static/partials/network-graph.html

	# Run index.Rmd to generate contributor analysis content and plots
	echo "📊 Rendering contributor analysis..."
	Rscript -e "rmarkdown::render('content/contributor-analysis/index.Rmd')"

	# Run network-graph.Rmd to generate interactive network visualization
	echo "🕸️ Rendering network visualization..."
	Rscript -e "rmarkdown::render('content/contributor-analysis/network-graph.Rmd')"

	# Move generated HTML file to static/partials (served via iframe)
	echo "📁 Moving network graph to static/partials..."
	mv content/contributor-analysis/network-graph.html static/partials/

	#========================================
	# Setup Node.js for bibliography processing
	#========================================
	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '18'
	cache: 'npm'
	cache-dependency-path: bibtex_to_apa/package-lock.json
	#========================================
	# Install Node.js dependencies for bibliography processing
	#========================================
	- name: Install Node.js dependencies
	run: \|
	cd bibtex_to_apa
	npm install

	#========================================
	# Process contributor data using Tenzing script
	#========================================
	- name: Run Tenzing script
	continue-on-error: true # Continue even if this step fails
	run: python3 scripts/forrt_contribs/tenzing.py
	env:
	GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}

	#========================================
	# Process and organize curated resources data
	#========================================
	- name: Run Curated Resources script
	id: curated-resources
	continue-on-error: true # Continue even if this step fails
	run: python3 content/resources/resource.py
	# Execute the curated resources script that processes and organizes resource data

	#========================================
	# Move Tenzing output to content directory and validate
	#========================================
	- name: Move and validate Tenzing output
	id: tenzing-output
	continue-on-error: true # Continue even if this step fails
	run: \|
	mv scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md
	if [ ! -f content/contributors/tenzing.md ]; then
	echo "tenzing.md not found"
	exit 1
	fi

	#========================================
	# Validate that curated resources files available under content/curated_resources
	#========================================
	- name: Validate curated resources
	id: validate-resources
	continue-on-error: true # Continue even if this step fails
	run: \|
	for file in content/curated_resources/*; do
	if [ ! -f "$file" ]; then
	echo "Non-markdown file found: $file"
	exit 1
	fi
	done

	#========================================
	# Generate APA lookup from bibliography
	#========================================
	- name: Generate APA lookup
	continue-on-error: true # Continue even if this step fails
	run: \|
	cd bibtex_to_apa
	node bibtex_to_apa.js -o '../content/glossary/apa_lookup.json'

	#========================================
	# Process and generate glossary files
	#========================================
	- name: Run Glossary Generation script
	id: glossary-generation
	if: github.event.inputs.regenerate_glossary == 'true'
	continue-on-error: true # Continue even if this step fails
	run: python3 content/glossary/_create_glossaries.py
	# Execute the glossary script that generates glossary markdown files

	- name: Check for missing references
	if: always()
	run: \|
	if [ -f "content/glossary/missing_references.txt" ]; then
	echo "Missing references found:"
	cat content/glossary/missing_references.txt
	# Optionally fail the workflow or create an issue
	else
	echo "All references resolved successfully"
	fi

	#=======================================================
	# Create a pull request for glossary updates when regenerated
	#=======================================================
	- name: Create PR for glossary updates
	if: github.event_name == 'workflow_dispatch' && github.event.inputs.regenerate_glossary == 'true'
	continue-on-error: true # Continue even if this step fails
	run: \|
	echo "=== Creating PR for glossary updates ==="

	if [ -z "$(git status --porcelain -- content/glossary)" ]; then
	echo "ℹ️ No glossary changes detected"
	exit 0
	fi

	BRANCH_NAME="glossary-update-$(date +%Y%m%d-%H%M%S)"
	git checkout -b "$BRANCH_NAME"

	git add content/glossary/
	git commit -m "Update glossary entries - $(date -u +'%Y-%m-%d %H:%M:%S UTC')"

	git push -u origin "$BRANCH_NAME"

	gh pr create \
	--title "📘 Glossary update - $(date '+%Y-%m-%d')" \
	--body "Automated glossary update generated on $(date -u +'%Y-%m-%d %H:%M:%S UTC'). Files changed: content/glossary/" \
	--base master \
	--head "$BRANCH_NAME"
	echo "✅ PR created for glossary updates"
	env:
	GITHUB_TOKEN: ${{ secrets.FORRT_PAT }}
	GH_TOKEN: ${{ secrets.FORRT_PAT }}

	#========================================
	# Download Google Analytics data and validate
	#========================================
	- name: Download GA Data
	id: ga-data
	continue-on-error: true # Continue even if this step fails
	if: \|
	github.event_name == 'schedule' \|\|
	github.event.inputs.regenerate_ga == 'true'
	env:
	GA_API_CREDENTIALS: ${{ secrets.GA_API_CREDENTIALS }}
	GA_PROPERTY_ID: ${{ secrets.GA_PROPERTY_ID }}
	run: \|
	if [ -z "$GA_API_CREDENTIALS" ] \|\| [ -z "$GA_PROPERTY_ID" ]; then
	echo "❌ GA credentials not set"
	exit 1
	fi

	rm -f data/ga_data.json
	rm -rf data/ga_data/

	python scripts/download_ga_data.py

	if [ -f "data/ga_data.json" ]; then
	echo "✅ GA data file created successfully"
	echo "File size: $(wc -c < data/ga_data.json) bytes"

	# Quick validation of data structure
	python3 -c "import json; data = json.load(open('data/ga_data.json')); print('✅ GA data:', len(data.get('regions', [])), 'countries,', len(data.get('top_pages', [])), 'pages')"
	else
	echo "❌ GA data file was not created"
	exit 1
	fi

	#========================================
	# Update FReD citation (Weekly or Manual)
	#========================================
	- name: Update FReD Citation
	id: fred-citation
	continue-on-error: true # Continue even if this step fails
	run: \|
	echo "=== Updating FReD citation ==="

	# Check if it's a Sunday (day 0) OR manually triggered
	CURRENT_DAY_OF_WEEK=$(date +%u)
	if [ "$CURRENT_DAY_OF_WEEK" != "7" ] && [ "${{ github.event_name }}" != "workflow_dispatch" ]; then
	echo "ℹ️ Skipping FReD citation update (not Sunday and not manual trigger)"
	exit 0
	fi

	echo "🔄 Running FReD citation update..."
	python3 scripts/update_fred_citation.py

	if [ -f "static/data/fred_citation.txt" ]; then
	echo "✅ FReD citation updated successfully"
	echo "Citation length: $(wc -c < static/data/fred_citation.txt) bytes"
	else
	echo "❌ FReD citation file was not created"
	exit 1
	fi

	#=======================================================
	# Create a pull request for GA data updates on monthly runs
	#=======================================================
	- name: Create PR for GA data update
	if: github.event_name != 'pull_request'
	continue-on-error: true # Continue even if this step fails
	run: \|
	echo "=== Creating PR for GA data update ==="

	# Check if it's the first day of the month OR manually triggered
	CURRENT_DAY=$(date +%d)
	if [ "$CURRENT_DAY" != "01" ] && [ "${{ github.event_name }}" != "workflow_dispatch" ]; then
	echo "ℹ️ Skipping PR creation (not 1st of month and not manual trigger)"
	exit 0
	fi

	BRANCH_NAME="ga-data-update-$(date +%Y%m%d-%H%M%S)"
	git fetch origin master
	git checkout master
	# Delete local branch if it exists
	git branch -D "$BRANCH_NAME" 2>/dev/null \|\| true
	git checkout -b "$BRANCH_NAME"

	# Verify we're on the correct branch
	CURRENT_BRANCH=$(git branch --show-current)
	if [ "$CURRENT_BRANCH" != "$BRANCH_NAME" ]; then
	echo "❌ Failed to create branch $BRANCH_NAME, currently on $CURRENT_BRANCH"
	exit 1
	fi
	echo "✅ Created and switched to branch: $BRANCH_NAME"

	# Add and commit the GA data file
	echo "Adding GA data file..."
	git add data/ga_data.json
	git commit -m "Update GA data - $(date -u +'%Y-%m-%d %H:%M:%S UTC')"

	if ! git push origin "$BRANCH_NAME" --force-with-lease; then
	git push origin "$BRANCH_NAME"
	fi

	gh pr create \
	--title "📊 Monthly GA Data Update - $(date '+%B %Y')" \
	--body "Automated monthly Google Analytics data update. Generated on $(date -u +'%Y-%m-%d %H:%M:%S UTC'). Files changed: data/ga_data.json" \
	--base master \
	--head "$BRANCH_NAME" \
	--label "ga-data,monthly-update"
	echo "✅ PR created for GA data update"

	env:
	GITHUB_TOKEN: ${{ secrets.FORRT_PAT }}
	GH_TOKEN: ${{ secrets.FORRT_PAT }}

	#=======================
	# Google Scholar Citations
	#========================================
	# Execute Google Scholar citation tracking script
	#========================================
	- name: Run Google Scholar script
	id: google-scholar
	continue-on-error: true
	run: python3 scripts/gs-cite/google_scholar.py
	env:
	SERPAPI: ${{ secrets.SERPAPI }}

	#==============
	# Artifact Upload
	#==============
	#========================================
	# Upload all processed data files as artifact
	#========================================
	- name: Upload data artifact
	id: upload-artifact
	uses: actions/upload-artifact@v4
	with:
	name: data-artifact
	path: \|
	content/contributors/tenzing.md
	scripts/forrt_contribs/contributors_cache.csv
	content/curated_resources/
	content/glossary/
	data/
	static/data/
	static/partials/
	content/contributor-analysis/
	content/publications/citation_chart.webp
	retention-days: 7

	#========================================
	# Commit generated files to build-resources branch (via worktree)
	#========================================
	- name: Commit to build-resources branch
	if: github.event_name != 'pull_request'
	continue-on-error: true
	run: \|
	echo "📝 Committing generated files to build-resources branch via worktree..."

	set -e
	WORKTREE_DIR="/tmp/build-resources-worktree"

	# Store generated files in temp location
	mkdir -p /tmp/generated-resources
	cp -r content/curated_resources /tmp/generated-resources/
	cp content/contributors/tenzing.md /tmp/generated-resources/ \|\| true
	cp scripts/forrt_contribs/contributors_cache.csv /tmp/generated-resources/ \|\| true
	cp data/ga_data.json /tmp/generated-resources/ga_data.json \|\| true

	# Copy FReD citation if it exists
	if [ -f static/data/fred_citation.txt ]; then
	mkdir -p /tmp/generated-resources/static/data
	cp static/data/fred_citation.txt /tmp/generated-resources/static/data/fred_citation.txt
	fi

	# Copy additional generated files
	mkdir -p /tmp/generated-resources/contributor-analysis
	cp -r content/contributor-analysis/* /tmp/generated-resources/contributor-analysis/ \|\| true

	mkdir -p /tmp/generated-resources/publications
	cp content/publications/citation_chart.webp /tmp/generated-resources/publications/ \|\| true

	if [ -f data/summaries.json ]; then
	cp data/summaries.json /tmp/generated-resources/summaries.json
	fi

	if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then
	cp -r content/glossary /tmp/generated-resources/
	fi

	# Prepare worktree for build-resources
	git fetch origin
	rm -rf "$WORKTREE_DIR"

	if git ls-remote --exit-code origin build-resources >/dev/null 2>&1; then
	echo "✓ build-resources branch exists, creating/updating worktree"
	git worktree add -B build-resources "$WORKTREE_DIR" origin/build-resources
	else
	echo "✓ build-resources does not exist, creating from master"
	git worktree add -b build-resources "$WORKTREE_DIR" origin/master
	fi

	# Apply updates inside the worktree
	pushd "$WORKTREE_DIR"

	# Ensure target directories exist
	mkdir -p content/curated_resources content/contributors data content/contributor-analysis content/publications static/data

	# Remove old generated resource files (but keep _index.md)
	find content/curated_resources -type f ! -name '_index.md' -delete 2>/dev/null \|\| true

	# We also want to clean up old contributor analysis files to avoid stale data
	rm -rf content/contributor-analysis/* 2>/dev/null \|\| true

	# Copy newly generated files
	cp -r /tmp/generated-resources/curated_resources/* content/curated_resources/ \|\| true
	if [ -f /tmp/generated-resources/tenzing.md ]; then
	cp /tmp/generated-resources/tenzing.md content/contributors/
	fi
	if [ -f /tmp/generated-resources/ga_data.json ]; then
	cp /tmp/generated-resources/ga_data.json data/ga_data.json
	fi
	if [ -f /tmp/generated-resources/static/data/fred_citation.txt ]; then
	cp /tmp/generated-resources/static/data/fred_citation.txt static/data/fred_citation.txt
	fi
	if [ -f /tmp/generated-resources/summaries.json ]; then
	cp /tmp/generated-resources/summaries.json data/summaries.json
	fi
	if [ -f /tmp/generated-resources/contributors_cache.csv ]; then
	mkdir -p scripts/forrt_contribs
	cp /tmp/generated-resources/contributors_cache.csv scripts/forrt_contribs/
	fi

	# Copy contributor analysis and citation chart
	cp -r /tmp/generated-resources/contributor-analysis/* content/contributor-analysis/ \|\| true
	if [ -f /tmp/generated-resources/publications/citation_chart.webp ]; then
	cp /tmp/generated-resources/publications/citation_chart.webp content/publications/
	fi

	# Copy glossary files only if regenerated (preserving directory structure)
	if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ] && [ -d /tmp/generated-resources/glossary ]; then
	echo "✓ Updating glossary files in build-resources worktree"
	mkdir -p content/glossary
	find content/glossary -type f ! -name '_index.md' ! -name '_create_glossaries.py' -delete 2>/dev/null \|\| true
	rsync -av --exclude='_index.md' --exclude='_create_glossaries.py' /tmp/generated-resources/glossary/ content/glossary/
	fi

	# Check if there are any changes to commit
	if git diff --quiet && git diff --cached --quiet; then
	echo "ℹ️ No changes to commit"
	else
	echo "✓ Changes detected, committing..."

	# Add all potential files
	git add content/curated_resources/ 2>/dev/null \|\| true
	git add content/contributors/tenzing.md 2>/dev/null \|\| true
	git add scripts/forrt_contribs/contributors_cache.csv 2>/dev/null \|\| true
	git add data/ga_data.json 2>/dev/null \|\| true
	git add static/data/fred_citation.txt 2>/dev/null \|\| true
	git add data/summaries.json 2>/dev/null \|\| true
	git add content/contributor-analysis/ 2>/dev/null \|\| true
	git add content/publications/citation_chart.webp 2>/dev/null \|\| true

	if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then
	git add content/glossary/ 2>/dev/null \|\| true
	fi

	git commit -m "Update generated resources and data - $(date -u +'%Y-%m-%d %H:%M:%S UTC')" \|\| echo "Nothing to commit"

	# Push to build-resources with retry logic
	MAX_RETRIES=3
	RETRY_COUNT=0
	while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
	# Using force-with-lease to be safer, but effectively force pushing since we are overwriting
	if git push -u origin build-resources --force-with-lease; then
	echo "✅ Successfully pushed to build-resources branch"
	break
	else
	RETRY_COUNT=$((RETRY_COUNT + 1))
	if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then
	echo "⚠️ Push failed, retrying ($RETRY_COUNT/$MAX_RETRIES)..."
	sleep 2
	git pull --rebase \|\| true
	else
	echo "❌ Push failed after $MAX_RETRIES attempts"
	exit 1
	fi
	fi
	done
	fi

	popd

	# Clean up worktree
	git worktree remove "$WORKTREE_DIR" --force \|\| true
	env:
	GITHUB_TOKEN: ${{ secrets.FORRT_PAT }}

	#================
	# Trigger Deploy
	#================
	#========================================
	# Trigger the deploy workflow to publish updated data
	#========================================
	- name: Trigger deployment
	if: github.event_name != 'pull_request' && inputs.skip_deploy != true
	run: \|
	echo "🚀 Triggering deployment workflow..."
	curl -s -X POST \
	-H "Authorization: Bearer ${{ secrets.FORRT_PAT }}" \
	-H "Accept: application/vnd.github+json" \
	"https://api.github.com/repos/${{ github.repository }}/dispatches" \
	-d '{"event_type": "data-update"}'
	echo "✅ Deployment triggered successfully"
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	#====================
	# Failure Reporting
	#====================
	#========================================
	# Check all step outcomes and create issues for any failures
	#========================================
	- name: Report workflow step failures
	if: always() # Always run this step to catch any failures
	continue-on-error: true # Don't fail the workflow if issue creation fails
	run: \|
	echo "🔍 Checking workflow step outcomes..."

	# Create workflow run URL
	WORKFLOW_RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"

	# Track if any failures occurred
	FAILURES_FOUND=false

	# Check each critical step outcome
	# Note: We check steps that are critical for data processing
	# For conditional steps, we check both the condition AND the outcome
	# to avoid reporting failures for skipped steps

	# Contributor Analysis (only if it was supposed to run)
	if [ "${{ steps.monthly-run.outputs.is_monthly }}" == "true" ] && [ "${{ steps.contributor-analysis.outcome }}" == "failure" ]; then
	echo "❌ Contributor Analysis failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Contributor Analysis" "The contributor analysis step failed during execution." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for Contributor Analysis"
	fi

	# Tenzing Script
	if [ "${{ steps.tenzing-script.outcome }}" == "failure" ]; then
	echo "❌ Tenzing Script failed"
	FAILURES_FOUND=true
	# Don't create a duplicate issue since tenzing has its own issue creation
	echo "ℹ️ Tenzing has its own issue creation mechanism"
	fi

	# Curated Resources
	if [ "${{ steps.curated-resources.outcome }}" == "failure" ]; then
	echo "❌ Curated Resources script failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Curated Resources Processing" "The curated resources processing step failed during execution." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for Curated Resources"
	fi

	# Tenzing Output Validation
	if [ "${{ steps.tenzing-output.outcome }}" == "failure" ]; then
	echo "❌ Tenzing output validation failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Tenzing Output Validation" "The tenzing.md file validation failed. The file may not have been created or moved correctly." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for Tenzing Output"
	fi

	# Curated Resources Validation
	if [ "${{ steps.validate-resources.outcome }}" == "failure" ]; then
	echo "❌ Curated resources validation failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Curated Resources Validation" "The curated resources validation failed. Some files may be invalid or missing." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for Resources Validation"
	fi

	# Glossary Generation (only if it was supposed to run)
	if [ "${{ github.event.inputs.regenerate_glossary }}" == "true" ] && [ "${{ steps.glossary-generation.outcome }}" == "failure" ]; then
	echo "❌ Glossary generation failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Glossary Generation" "The glossary generation step failed during execution." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for Glossary Generation"
	fi

	# Google Analytics Data
	if [ "${{ steps.ga-data.outcome }}" == "failure" ]; then
	echo "❌ GA Data download failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Google Analytics Data Download" "The Google Analytics data download step failed. This may be due to authentication issues or API problems." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for GA Data"
	fi

	# FReD Citation
	if [ "${{ steps.fred-citation.outcome }}" == "failure" ]; then
	echo "❌ FReD citation update failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "FReD Citation Update" "The FReD citation update step failed during execution." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for FReD Citation"
	fi

	# Google Scholar
	if [ "${{ steps.google-scholar.outcome }}" == "failure" ]; then
	echo "❌ Google Scholar script failed"
	FAILURES_FOUND=true
	python3 scripts/create_workflow_failure_issue.py "Google Scholar Citation Tracking" "The Google Scholar citation tracking step failed during execution." "$WORKFLOW_RUN_URL" \|\| echo "Failed to create issue for Google Scholar"
	fi

	if [ "$FAILURES_FOUND" == "false" ]; then
	echo "✅ All critical workflow steps completed successfully!"
	else
	echo "⚠️ Some workflow steps failed - issues have been created"
	fi
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Data Processing #427

Workflow file

Data Processing #427

Uh oh!

Workflow file for this run