use correct size unit in every script

2026-03-08 23:02:09 -05:00 · 2025-10-22 02:43:31 +08:00
parent 8198d9e26c
commit 105d7ff6c2
10 changed files with 42 additions and 50 deletions
--- a/scripts/generate_test_files.py
+++ b/scripts/generate_test_files.py
@@ -53,7 +53,7 @@ def generate_structured_content(file_number, size_bytes):
    header = f"""# Test File {file_number:04d}

 This is a generated test file for upload testing.
-File size: {size_bytes / (1024*1024):.2f} MB
+File size: {size_bytes / (1000*1000):.2f} MB

 ## Content Section

@@ -86,13 +86,13 @@ def main():
    output_dir = Path("large_file_test")
    num_files = 1000
    file_size_mb = 1.5
-    file_size_bytes = int(file_size_mb * 1024 * 1024)
+    file_size_bytes = int(file_size_mb * 1000 * 1000)

    # Create output directory
    output_dir.mkdir(exist_ok=True)
    print(f"Creating {num_files} files of {file_size_mb}MB each...")
    print(f"Output directory: {output_dir.absolute()}")
-    print(f"Total size: {num_files * file_size_mb / 1024:.2f} GB")
+    print(f"Total size: {num_files * file_size_mb / 1000:.2f} GB")
    print()

    # Generate files
@@ -119,7 +119,7 @@ def main():

    # Calculate actual size
    total_size = sum(f.stat().st_size for f in output_dir.glob("*.txt"))
-    print(f"✓ Total size: {total_size / (1024**3):.2f} GB")
+    print(f"✓ Total size: {total_size / (1000**3):.2f} GB")
    print()
    print("You can now test with:")
    print("  - Upload via web UI")
--- a/scripts/generate_test_repo.py
+++ b/scripts/generate_test_repo.py
@@ -255,10 +255,9 @@ def generate_test_repo(base_path: str = "test_folder"):
    lfs_size = sum(size for _, size in lfs_files)
    regular_size = sum(size for _, size in regular_files)

-    print(f"\n  Total size: {total_size / 1024 / 1024:.2f} MB")
-    print(f"  LFS size: {lfs_size / 1024 / 1024:.2f} MB")
-    print(f"  Regular size: {regular_size / 1024:.2f} KB")
-
+    print(f"\n  Total size: {total_size / 1000 / 1000:.2f} MB")
+    print(f"  LFS size: {lfs_size / 1000 / 1000:.2f} MB")
+    print(f"  Regular size: {regular_size / 1000:.2f} KB")
    print(f"\n  Directory structure:")
    print(f"    - Root: 2 files")
    print(f"    - config/: 2 files")
--- a/scripts/show_s3_usage.py
+++ b/scripts/show_s3_usage.py
@@ -200,7 +200,7 @@ def display_summary(bucket, stats, detailed=False):

 def display_quota_warning(total_size, quota_gb=10):
    """Display warning if approaching quota limit."""
-    quota_bytes = quota_gb * 1024**3
+    quota_bytes = quota_gb * 1000**3
    percentage = (total_size / quota_bytes * 100) if quota_bytes > 0 else 0

    console.print()
--- a/scripts/test_migration_009.py
+++ b/scripts/test_migration_009.py
@@ -126,8 +126,8 @@ def step2_populate_mock_data():
        is_org=True,
        email=None,
        password_hash=None,
-        private_quota_bytes=10 * 1024 * 1024 * 1024,  # 10GB
-        public_quota_bytes=50 * 1024 * 1024 * 1024,  # 50GB
+        private_quota_bytes=10 * 1000 * 1000 * 1000,  # 10GB
+        public_quota_bytes=50 * 1000 * 1000 * 1000,  # 50GB
    )
    print(f"  Created organization: {org.username}")

@@ -140,8 +140,8 @@ def step2_populate_mock_data():
        password_hash="dummy_hash",
        email_verified=True,
        is_active=True,
-        private_quota_bytes=5 * 1024 * 1024 * 1024,  # 5GB
-        public_quota_bytes=20 * 1024 * 1024 * 1024,  # 20GB
+        private_quota_bytes=5 * 1000 * 1000 * 1000,  # 5GB
+        public_quota_bytes=20 * 1000 * 1000 * 1000,  # 20GB
    )
    print(f"  Created user: {user.username}")

@@ -154,7 +154,7 @@ def step2_populate_mock_data():
        private=False,
        owner=org,
        quota_bytes=None,  # Inherit from org
-        used_bytes=1024 * 1024 * 100,  # 100MB
+        used_bytes=100 * 1000 * 1000,  # 100MB
    )
    print(f"  Created repository: {repo1.full_id}")

@@ -165,8 +165,8 @@ def step2_populate_mock_data():
        full_id="test-user/test-dataset",
        private=True,
        owner=user,
-        quota_bytes=2 * 1024 * 1024 * 1024,  # Custom 2GB quota
-        used_bytes=500 * 1024 * 1024,  # 500MB used
+        quota_bytes=2 * 1000 * 1000 * 1000,  # Custom 2GB quota
+        used_bytes=500 * 1000 * 1000,  # 500MB used
    )
    print(f"  Created repository: {repo2.full_id}")

@@ -346,11 +346,11 @@ def step4_verify_migration():
        print("  ✗ quota_bytes should still be NULL!")
        return False

-    if repo1.used_bytes != 1024 * 1024 * 100:
+    if repo1.used_bytes != 100 * 1000 * 1000:
        print(f"  ✗ used_bytes changed! Expected 104857600, got {repo1.used_bytes}")
        return False

-    if repo2.quota_bytes != 2 * 1024 * 1024 * 1024:
+    if repo2.quota_bytes != 2 * 1000 * 1000 * 1000:
        print(f"  ✗ quota_bytes changed! Expected 2147483648, got {repo2.quota_bytes}")
        return False

@@ -390,7 +390,7 @@ def step5_test_new_functionality():
    keep_versions = get_effective_lfs_keep_versions(repo)
    suffix_rules = get_effective_lfs_suffix_rules(repo)

-    print(f"    Effective threshold: {threshold / (1024*1024):.1f} MB")
+    print(f"    Effective threshold: {threshold / (1000*1000):.1f} MB")
    print(f"    Effective keep_versions: {keep_versions}")
    print(f"    Suffix rules: {suffix_rules}")

@@ -411,9 +411,8 @@ def step5_test_new_functionality():
        return False

    # Test 2: should_use_lfs with defaults
-    test_small = should_use_lfs(repo, "config.json", 1024)  # 1KB
-    test_large = should_use_lfs(repo, "model.bin", 10 * 1024 * 1024)  # 10MB
-
+    test_small = should_use_lfs(repo, "config.json", 1000)  # 1KB
+    test_large = should_use_lfs(repo, "model.bin", 10 * 1000 * 1000)  # 10MB
    print(f"    config.json (1KB) uses LFS: {test_small}")
    print(f"    model.bin (10MB) uses LFS: {test_large}")

@@ -425,14 +424,13 @@ def step5_test_new_functionality():

    # Test 3: Custom threshold
    print("\n  Test 2: Custom threshold (1MB)")
-    repo.lfs_threshold_bytes = 1024 * 1024
+    repo.lfs_threshold_bytes = 1000 * 1000
    repo.save()

    threshold = get_effective_lfs_threshold(repo)
-    test_500kb = should_use_lfs(repo, "file.bin", 500 * 1024)
-    test_2mb = should_use_lfs(repo, "file.bin", 2 * 1024 * 1024)
-
-    print(f"    Effective threshold: {threshold / (1024*1024):.1f} MB")
+    test_500kb = should_use_lfs(repo, "file.bin", 500 * 1000)
+    test_2mb = should_use_lfs(repo, "file.bin", 2 * 1000 * 1000)
+    print(f"    Effective threshold: {threshold / (1000*1000):.1f} MB")
    print(f"    file.bin (500KB) uses LFS: {test_500kb}")
    print(f"    file.bin (2MB) uses LFS: {test_2mb}")

--- a/src/kohakuhub/api/avatar.py
+++ b/src/kohakuhub/api/avatar.py
@@ -25,7 +25,7 @@ router = APIRouter()

 # Avatar configuration
 AVATAR_SIZE = 1024  # Output size (1024x1024)
-AVATAR_MAX_INPUT_SIZE = 10 * 1024 * 1024  # 10MB max input
+AVATAR_MAX_INPUT_SIZE = 10 * 1000 * 1000  # 10MB max input
 AVATAR_JPEG_QUALITY = 95
 ALLOWED_MIME_TYPES = ["image/jpeg", "image/jpg", "image/png", "image/webp", "image/gif"]

@@ -166,7 +166,7 @@ async def upload_user_avatar(
    if len(content) > AVATAR_MAX_INPUT_SIZE:
        raise HTTPException(
            400,
-            detail=f"Image too large. Maximum: {AVATAR_MAX_INPUT_SIZE // 1024 // 1024}MB",
+            detail=f"Image too large. Maximum: {AVATAR_MAX_INPUT_SIZE // 1000 // 1000}MB",
        )

    # Process image (resize, crop, convert to JPEG)
@@ -314,7 +314,7 @@ async def upload_org_avatar(
    if len(content) > AVATAR_MAX_INPUT_SIZE:
        raise HTTPException(
            400,
-            detail=f"Image too large. Maximum: {AVATAR_MAX_INPUT_SIZE // 1024 // 1024}MB",
+            detail=f"Image too large. Maximum: {AVATAR_MAX_INPUT_SIZE // 1000 // 1000}MB",
        )

    # Process image
--- a/src/kohakuhub/api/commit/routers/history.py
+++ b/src/kohakuhub/api/commit/routers/history.py
@@ -361,7 +361,7 @@ async def get_commit_diff(

            # For non-LFS files, fetch actual diff (let frontend decide if renderable)
            # Skip diff for very large files (>1MB) to avoid memory issues
-            max_diff_size = 1024 * 1024  # 1MB
+            max_diff_size = 1000 * 1000  # 1MB

            if not is_lfs and diff_entry.get("type") in ["changed", "added", "removed"]:
                # Check size constraints
--- a/src/kohakuhub/config.py
+++ b/src/kohakuhub/config.py
@@ -194,7 +194,7 @@ class Config(BaseModel):
            )

        # LFS threshold validation
-        if self.app.lfs_threshold_bytes < 1024 * 1024:  # Less than 1MB
+        if self.app.lfs_threshold_bytes < 1000 * 1000:  # Less than 1MB
            warnings.append(
                f"LFS threshold is very low ({self.app.lfs_threshold_bytes} bytes). "
                f"Consider setting to at least 5MB (5242880 bytes)."
--- a/tests/base.py
+++ b/tests/base.py
@@ -249,7 +249,7 @@ class BaseTestCase:
        Returns:
            Path to created file
        """
-        size_bytes = int(size_mb * 1024 * 1024)
+        size_bytes = int(size_mb * 1000 * 1000)
        content = os.urandom(size_bytes)
        return self.create_temp_file(name, content)

--- a/tests/test_file_ops.py
+++ b/tests/test_file_ops.py
@@ -267,7 +267,7 @@ class TestFileOperations:
        # Create file with random content
        import tempfile

-        test_content = os.urandom(1024 * 100)  # 100KB random data
+        test_content = os.urandom(100 * 1000)  # 100KB random data
        original_hash = hashlib.sha256(test_content).hexdigest()

        test_file = (
--- a/tests/test_lfs.py
+++ b/tests/test_lfs.py
@@ -6,6 +6,8 @@ Files >10MB should use LFS, files <=10MB should use regular upload.

 import hashlib
 import os
+import shutil
+import tempfile
 from pathlib import Path

 import pytest
@@ -20,10 +22,8 @@ class TestLFSOperations:
        repo_id, repo_type, hf_client = temp_repo

        # Create 15MB file
-        import tempfile
-
        size_mb = 15
-        test_content = os.urandom(size_mb * 1024 * 1024)
+        test_content = os.urandom(size_mb * 1000 * 1000)
        original_hash = hashlib.sha256(test_content).hexdigest()

        test_file = Path(tempfile.gettempdir()) / f"test_15mb_{os.urandom(4).hex()}.bin"
@@ -63,7 +63,7 @@ class TestLFSOperations:
        import tempfile

        size_mb = 50
-        test_content = os.urandom(size_mb * 1024 * 1024)
+        test_content = os.urandom(size_mb * 1000 * 1000)
        original_hash = hashlib.sha256(test_content).hexdigest()

        test_file = Path(tempfile.gettempdir()) / f"test_50mb_{os.urandom(4).hex()}.bin"
@@ -101,7 +101,7 @@ class TestLFSOperations:
        import tempfile

        size_mb = 5
-        test_content = os.urandom(size_mb * 1024 * 1024)
+        test_content = os.urandom(size_mb * 1000 * 1000)

        test_file = Path(tempfile.gettempdir()) / f"test_5mb_{os.urandom(4).hex()}.bin"
        test_file.write_bytes(test_content)
@@ -128,10 +128,8 @@ class TestLFSOperations:
        repo_id, repo_type, hf_client = temp_repo

        # Create 12MB file
-        import tempfile
-
        size_mb = 12
-        test_content = os.urandom(size_mb * 1024 * 1024)
+        test_content = os.urandom(size_mb * 1000 * 1000)
        test_file = (
            Path(tempfile.gettempdir()) / f"test_dedup_{os.urandom(4).hex()}.bin"
        )
@@ -188,7 +186,7 @@ class TestLFSOperations:

        # Prepare LFS batch request
        fake_oid = "a" * 64  # SHA256 hex
-        fake_size = 15 * 1024 * 1024  # 15MB
+        fake_size = 15 * 1000 * 1000  # 15MB

        batch_request = {
            "operation": "upload",
@@ -233,7 +231,7 @@ class TestLFSOperations:
        (temp_dir / "small2.txt").write_bytes(b"Small file 2" * 100)

        # Large file (LFS)
-        (temp_dir / "large.bin").write_bytes(os.urandom(12 * 1024 * 1024))  # 12MB
+        (temp_dir / "large.bin").write_bytes(os.urandom(12 * 1000 * 1000))  # 12MB

        # Upload folder
        hf_client.upload_folder(
@@ -254,10 +252,9 @@ class TestLFSOperations:
        downloaded = hf_client.download_file(
            repo_id=repo_id, filename="mixed/large.bin", repo_type=repo_type
        )
-        assert Path(downloaded).stat().st_size == 12 * 1024 * 1024
+        assert Path(downloaded).stat().st_size == 12 * 1000 * 1000

        # Cleanup
-        import shutil

        shutil.rmtree(temp_dir)

@@ -270,7 +267,7 @@ class TestLFSOperations:
        # Upload LFS file
        import tempfile

-        test_content = os.urandom(15 * 1024 * 1024)  # 15MB
+        test_content = os.urandom(15 * 1000 * 1000)  # 15MB
        test_file = (
            Path(tempfile.gettempdir()) / f"test_lfs_meta_{os.urandom(4).hex()}.bin"
        )
@@ -318,9 +315,7 @@ class TestLFSOperations:
        repo_id, repo_type, hf_client = temp_repo

        # Create exactly 10MB file
-        import tempfile
-
-        size_bytes = 10 * 1024 * 1024
+        size_bytes = 10 * 1000 * 1000
        test_content = os.urandom(size_bytes)

        test_file = (