#!/bin/bash

# CONFIGURATION
CSV_FILE="contenus_seidn_files.csv"                         # Path to your CSV file
DEST_DIR="/var/www/files/95/2026/02/files"   # Destination folder
PROXY="http://proxya.ac-guadeloupe.fr:8080"                 # Set to "" if no proxy
PROXY_USER=""                # Leave empty if no auth

# Create destination folder if needed
mkdir -p "$DEST_DIR"

# Skip header and read each line
tail -n +2 "$CSV_FILE" | while read -r line; do
    # Remove quotes and trim whitespace
    line=$(echo "$line" | tr -d '"' | xargs)

    # Split by semicolon
    IFS=';' read -ra URLS <<< "$line"

    for url in "${URLS[@]}"; do
        # Clean URL: trim, encode spaces, remove carriage returns
        clean_url=$(echo "$url" | xargs | sed 's/ /%20/g' | tr -d '\r\n')

        # Validate URL format
        if [[ ! "$clean_url" =~ ^https?:// ]]; then
            echo "Invalid URL skipped: $clean_url"
            continue
        fi

        # Extract filename
        filename=$(basename "$clean_url")

        # Build curl command
        CURL_CMD=(curl -f -L --silent --show-error --output "$DEST_DIR/$filename")

        # Add proxy if configured
        if [ -n "$PROXY" ]; then
            CURL_CMD+=(-x "$PROXY")
            if [ -n "$PROXY_USER" ]; then
                CURL_CMD+=(--proxy-user "$PROXY_USER")
            fi
        fi

        CURL_CMD+=("$clean_url")

        # Execute download
        if "${CURL_CMD[@]}"; then
            echo "✅ Downloaded: $filename"
        else
            echo "❌ Failed: $clean_url"
        fi
    done
done