test-repo/scripts/podcast-generator/generate-podcast.sh

#!/bin/bash
# Daily Digest to Podcast Converter
# Converts blog-backup daily digest posts to audio podcast format

set -e

# Configuration
WORKSPACE_DIR="/Users/mattbruce/.openclaw/workspace"
PODCAST_DIR="$WORKSPACE_DIR/podcast"
AUDIO_DIR="$PODCAST_DIR/audio"
RSS_FILE="$PODCAST_DIR/rss.xml"
BLOG_BACKUP_URL="https://blog-backup-two.vercel.app"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

log() {
    echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}

error() {
    echo -e "${RED}[ERROR]${NC} $1" >&2
}

warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
}

# Create directories
mkdir -p "$AUDIO_DIR"

# Check if OpenAI API key is set
if [ -z "$OPENAI_API_KEY" ]; then
    # Try to load from environment file
    if [ -f "$WORKSPACE_DIR/.env.openai" ]; then
        export $(cat "$WORKSPACE_DIR/.env.openai" | xargs)
    fi
fi

if [ -z "$OPENAI_API_KEY" ]; then
    error "OPENAI_API_KEY not set. Please set it or create $WORKSPACE_DIR/.env.openai"
    exit 1
fi

# Function to fetch latest digest
fetch_latest_digest() {
    log "Fetching latest daily digest from blog-backup..."

    # Get the latest message from Supabase
    local response=$(curl -s "https://qnatchrjlpehiijwtreh.supabase.co/rest/v1/blog_messages?select=id,date,content,tags&order=created_at.desc&limit=1" \
        -H "apikey: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFuYXRjaHJqbHBlaGlpand0cmVoIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzE2NDA0MzYsImV4cCI6MjA4NzIxNjQzNn0.47XOMrQBzcQEh71phQflPoO4v79Jk3rft7BC72KHDvA" \
        -H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFuYXRjaHJqbHBlaGlpand0cmVoIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzE2NDA4MzYsImV4cCI6MjA4NzIxNjQzNn0.47XOMrQBzcQEh71phQflPoO4v79Jk3rft7BC72KHDvA")

    echo "$response"
}

# Function to convert text to speech
convert_to_speech() {
    local text="$1"
    local output_file="$2"

    log "Converting text to speech..."

    # Use OpenAI TTS API
    curl -s -X POST "https://api.openai.com/v1/audio/speech" \
        -H "Authorization: Bearer $OPENAI_API_KEY" \
        -H "Content-Type: application/json" \
        -d "{
            \"model\": \"tts-1\",
            \"input\": $(echo "$text" | jq -R -s .),
            \"voice\": \"alloy\",
            \"response_format\": \"mp3\"
        }" \
        --output "$output_file"

    if [ -f "$output_file" ] && [ -s "$output_file" ]; then
        log "Audio file created: $output_file"
        return 0
    else
        error "Failed to create audio file"
        return 1
    fi
}

# Function to generate RSS feed
generate_rss() {
    log "Generating RSS feed..."

    local podcast_title="OpenClaw Daily Digest"
    local podcast_description="Daily tech news and insights for developers"
    local podcast_link="https://mission-control-rho-pink.vercel.app/podcast"
    local podcast_image="https://mission-control-rho-pink.vercel.app/podcast-cover.jpg"

    cat > "$RSS_FILE" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
    <channel>
        <title>$podcast_title</title>
        <link>$podcast_link</link>
        <language>en-us</language>
        <copyright>© 2026 OpenClaw</copyright>
        <itunes:author>OpenClaw</itunes:author>
        <description>$podcast_description</description>
        <itunes:image href="$podcast_image"/>
        <itunes:category text="Technology"/>
        <itunes:explicit>no</itunes:explicit>

EOF

    # Add episodes (most recent first)
    for mp3_file in $(ls -t "$AUDIO_DIR"/*.mp3 2>/dev/null | head -20); do
        if [ -f "$mp3_file" ]; then
            local filename=$(basename "$mp3_file")
            local episode_date=$(echo "$filename" | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}' || date '+%Y-%m-%d')
            local episode_title="Daily Digest - $(date -j -f '%Y-%m-%d' "$episode_date" '+%B %d, %Y' 2>/dev/null || echo "$episode_date")"
            local file_size=$(stat -f%z "$mp3_file" 2>/dev/null || stat -c%s "$mp3_file" 2>/dev/null || echo "0")
            local pub_date=$(date -j -f '%Y-%m-%d' "$episode_date" '+%a, %d %b %Y 07:00:00 CST' 2>/dev/null || date '+%a, %d %b %Y 07:00:00 CST')

            cat >> "$RSS_FILE" << EOF
        <item>
            <title>$episode_title</title>
            <enclosure url="$podcast_link/audio/$filename" length="$file_size" type="audio/mpeg"/>
            <pubDate>$pub_date</pubDate>
            <guid isPermaLink="false">$filename</guid>
            <itunes:duration>5:00</itunes:duration>
            <description>Daily tech digest for $episode_date</description>
        </item>

EOF
        fi
    done

    cat >> "$RSS_FILE" << EOF
    </channel>
</rss>
EOF

    log "RSS feed generated: $RSS_FILE"
}

# Function to clean text for TTS (remove markdown, URLs, etc.)
clean_text_for_tts() {
    local text="$1"

    # Remove markdown links [text](url) -> text
    text=$(echo "$text" | sed -E 's/\[([^]]+)\]\([^)]+\)/\1/g')

    # Remove markdown headers
    text=$(echo "$text" | sed -E 's/^#+ //g')

    # Remove markdown bold/italic
    text=$(echo "$text" | sed -E 's/\*\*//g; s/\*//g; s/__//g; s/_//g')

    # Remove code blocks
    text=$(echo "$text" | sed -E 's/```[^`]*```//g')

    # Remove inline code
    text=$(echo "$text" | sed -E 's/`([^`]+)`/\1/g')

    # Remove horizontal rules
    text=$(echo "$text" | sed -E 's/^---$//g')

    # Remove extra whitespace
    text=$(echo "$text" | sed -E 's/^[[:space:]]*//g; s/[[:space:]]*$//g')

    echo "$text"
}

# Main execution
main() {
    log "Starting Daily Digest to Podcast conversion..."

    # Fetch latest digest
    local digest_json=$(fetch_latest_digest)

    if [ -z "$digest_json" ] || [ "$digest_json" = "[]" ]; then
        error "No digest found"
        exit 1
    fi

    # Parse digest info
    local digest_id=$(echo "$digest_json" | jq -r '.[0].id')
    local digest_date=$(echo "$digest_json" | jq -r '.[0].date')
    local digest_content=$(echo "$digest_json" | jq -r '.[0].content')

    log "Found digest for date: $digest_date"

    # Check if already converted
    local output_file="$AUDIO_DIR/daily-digest-$digest_date.mp3"
    if [ -f "$output_file" ]; then
        warn "Audio already exists for $digest_date, skipping conversion"
        exit 0
    fi

    # Clean text for TTS
    log "Cleaning text for TTS..."
    local clean_text=$(clean_text_for_tts "$digest_content")

    # Add intro
    local full_text="OpenClaw Daily Digest for $(date -j -f '%Y-%m-%d' "$digest_date" '+%B %d, %Y' 2>/dev/null || echo "$digest_date"). $clean_text"

    # Convert to speech
    convert_to_speech "$full_text" "$output_file"

    # Generate RSS feed
    generate_rss

    log "✅ Podcast generation complete!"
    log "Audio file: $output_file"
    log "RSS feed: $RSS_FILE"
}

# Run main function
main "$@"