"""Show S3 storage usage.

This script displays storage statistics without making any changes.
Useful for monitoring storage usage on limited plans (e.g., CloudFlare R2 free tier).

Usage:
    python scripts/show_s3_usage.py \\
        --endpoint https://s3.amazonaws.com \\
        --access-key YOUR_KEY \\
        --secret-key YOUR_SECRET \\
        --bucket my-bucket

    python scripts/show_s3_usage.py \\
        --endpoint https://s3.amazonaws.com \\
        --access-key YOUR_KEY \\
        --secret-key YOUR_SECRET \\
        --bucket my-bucket \\
        --detailed

Requirements:
    - boto3 and rich packages
"""

import argparse
import json
import os
import sys
from collections import defaultdict

import boto3
from botocore.exceptions import ClientError
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.table import Table
from rich.tree import Tree

console = Console()


def get_s3_client(endpoint, access_key, secret_key, region="us-east-1"):
    """Create S3 client with provided credentials.

    Args:
        endpoint: S3 endpoint URL
        access_key: S3 access key
        secret_key: S3 secret key
        region: S3 region (default: us-east-1)

    Returns:
        boto3 S3 client
    """
    return boto3.client(
        "s3",
        endpoint_url=endpoint,
        aws_access_key_id=access_key,
        aws_secret_access_key=secret_key,
        region_name=region,
        config=boto3.session.Config(
            signature_version="s3v4",
            s3={"addressing_style": "path"},
        ),
    )


def format_size(bytes_size):
    """Format bytes to human-readable size."""
    for unit in ["B", "KB", "MB", "GB", "TB"]:
        if bytes_size < 1024.0:
            return f"{bytes_size:.2f} {unit}"
        bytes_size /= 1024.0
    return f"{bytes_size:.2f} PB"


def analyze_storage(s3_client, bucket):
    """Analyze storage usage by prefix.

    Returns:
        dict: Storage statistics by prefix
    """
    stats = defaultdict(lambda: {"count": 0, "size": 0, "objects": []})

    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        transient=True,
    ) as progress:
        progress.add_task("Analyzing storage...", total=None)

        paginator = s3_client.get_paginator("list_objects_v2")
        try:
            for page in paginator.paginate(Bucket=bucket):
                if "Contents" not in page:
                    continue

                for obj in page["Contents"]:
                    key = obj["Key"]
                    size = obj["Size"]

                    # Categorize by prefix
                    if key.startswith("lfs/"):
                        prefix = "lfs"
                    elif key.startswith("hf-model-"):
                        prefix = "models"
                    elif key.startswith("hf-dataset-"):
                        prefix = "datasets"
                    elif key.startswith("hf-space-"):
                        prefix = "spaces"
                    else:
                        prefix = "other"

                    stats[prefix]["count"] += 1
                    stats[prefix]["size"] += size
                    stats[prefix]["objects"].append({"key": key, "size": size})

                    # Update total
                    stats["total"]["count"] += 1
                    stats["total"]["size"] += size

        except ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchBucket":
                console.print(f"[red]Error: Bucket '{bucket}' does not exist[/red]")
                return None
            raise

    return stats


def display_summary(bucket, stats, detailed=False):
    """Display storage summary table."""
    table = Table(
        title=f"S3 Storage Usage: {bucket}", show_header=True, header_style="bold cyan"
    )
    table.add_column("Category", style="yellow")
    table.add_column("Objects", justify="right", style="magenta")
    table.add_column("Total Size", justify="right", style="green")
    table.add_column("Percentage", justify="right", style="cyan")

    total_size = stats["total"]["size"]

    # Sort by size (descending)
    categories = [
        ("lfs", "LFS Files (>5MB)"),
        ("models", "Model Repositories"),
        ("datasets", "Dataset Repositories"),
        ("spaces", "Space Repositories"),
        ("other", "Other"),
    ]

    for prefix, label in categories:
        if prefix in stats and stats[prefix]["count"] > 0:
            count = stats[prefix]["count"]
            size = stats[prefix]["size"]
            percentage = (size / total_size * 100) if total_size > 0 else 0

            table.add_row(
                label,
                f"{count:,}",
                format_size(size),
                f"{percentage:.1f}%",
            )

    # Add total row
    table.add_row(
        "[bold]TOTAL[/bold]",
        f"[bold]{stats['total']['count']:,}[/bold]",
        f"[bold]{format_size(stats['total']['size'])}[/bold]",
        "[bold]100.0%[/bold]",
        style="bold blue",
    )

    console.print(table)

    # Detailed view
    if detailed and stats["total"]["count"] > 0:
        console.print()
        tree = Tree(f"[bold cyan]Storage Breakdown[/bold cyan]")

        for prefix, label in categories:
            if prefix in stats and stats[prefix]["count"] > 0:
                branch = tree.add(
                    f"[yellow]{label}[/yellow] - {format_size(stats[prefix]['size'])}"
                )

                # Show top 10 largest objects in this category
                objects = sorted(
                    stats[prefix]["objects"], key=lambda x: x["size"], reverse=True
                )[:10]
                for obj in objects:
                    branch.add(
                        f"{obj['key']} - [green]{format_size(obj['size'])}[/green]"
                    )

                if len(stats[prefix]["objects"]) > 10:
                    branch.add(
                        f"[dim]... and {len(stats[prefix]['objects']) - 10} more[/dim]"
                    )

        console.print(tree)


def display_quota_warning(total_size, quota_gb=10):
    """Display warning if approaching quota limit."""
    quota_bytes = quota_gb * 1000**3
    percentage = (total_size / quota_bytes * 100) if quota_bytes > 0 else 0

    console.print()
    if percentage >= 90:
        console.print(
            f"[bold red]⚠ WARNING: Using {percentage:.1f}% of {quota_gb}GB quota![/bold red]"
        )
        console.print(
            f"[red]Consider running: python scripts/clear_s3_storage.py --prefix lfs/[/red]"
        )
    elif percentage >= 75:
        console.print(
            f"[bold yellow]⚠ Approaching quota limit: {percentage:.1f}% of {quota_gb}GB used[/bold yellow]"
        )
    else:
        console.print(
            f"[green]✓ Storage usage: {percentage:.1f}% of {quota_gb}GB quota[/green]"
        )


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(description="Show S3 storage usage")

    # S3 connection arguments
    parser.add_argument(
        "--endpoint",
        default=os.environ.get("S3_ENDPOINT"),
        help="S3 endpoint URL (or set S3_ENDPOINT env var)",
    )
    parser.add_argument(
        "--access-key",
        default=os.environ.get("S3_ACCESS_KEY"),
        help="S3 access key (or set S3_ACCESS_KEY env var)",
    )
    parser.add_argument(
        "--secret-key",
        default=os.environ.get("S3_SECRET_KEY"),
        help="S3 secret key (or set S3_SECRET_KEY env var)",
    )
    parser.add_argument(
        "--bucket",
        default=os.environ.get("S3_BUCKET"),
        help="S3 bucket name (or set S3_BUCKET env var)",
    )
    parser.add_argument(
        "--region",
        default=os.environ.get("S3_REGION", "us-east-1"),
        help="S3 region (default: us-east-1, or set S3_REGION env var)",
    )

    # Display options
    parser.add_argument(
        "--detailed",
        action="store_true",
        help="Show detailed breakdown with top objects",
    )
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument(
        "--quota-gb",
        type=int,
        default=10,
        help="Storage quota in GB for warning calculation (default: 10GB for R2 free tier)",
    )

    args = parser.parse_args()

    # Validate required arguments
    if not args.endpoint:
        console.print(
            "[red]Error: --endpoint is required (or set S3_ENDPOINT env var)[/red]"
        )
        sys.exit(1)
    if not args.access_key:
        console.print(
            "[red]Error: --access-key is required (or set S3_ACCESS_KEY env var)[/red]"
        )
        sys.exit(1)
    if not args.secret_key:
        console.print(
            "[red]Error: --secret-key is required (or set S3_SECRET_KEY env var)[/red]"
        )
        sys.exit(1)
    if not args.bucket:
        console.print(
            "[red]Error: --bucket is required (or set S3_BUCKET env var)[/red]"
        )
        sys.exit(1)

    bucket = args.bucket

    # Create S3 client
    try:
        s3_client = get_s3_client(
            endpoint=args.endpoint,
            access_key=args.access_key,
            secret_key=args.secret_key,
            region=args.region,
        )
    except Exception as e:
        console.print(f"[red]Error connecting to S3: {e}[/red]")
        sys.exit(1)

    # Analyze storage
    stats = analyze_storage(s3_client, bucket)

    if stats is None:
        sys.exit(1)

    if stats["total"]["count"] == 0:
        console.print("[yellow]Bucket is empty.[/yellow]")
        return

    # Output format
    if args.json:
        # JSON output
        output = {
            "bucket": bucket,
            "total_objects": stats["total"]["count"],
            "total_size_bytes": stats["total"]["size"],
            "total_size_human": format_size(stats["total"]["size"]),
            "categories": {},
        }

        for prefix in ["lfs", "models", "datasets", "spaces", "other"]:
            if prefix in stats and stats[prefix]["count"] > 0:
                output["categories"][prefix] = {
                    "count": stats[prefix]["count"],
                    "size_bytes": stats[prefix]["size"],
                    "size_human": format_size(stats[prefix]["size"]),
                }

        print(json.dumps(output, indent=2))
    else:
        # Rich table output
        console.print()
        display_summary(bucket, stats, detailed=args.detailed)
        display_quota_warning(stats["total"]["size"], quota_gb=args.quota_gb)
        console.print()


if __name__ == "__main__":
    main()