This commit is contained in:
Timothy Jaeryang Baek
2026-03-17 17:58:01 -05:00
parent fcf7208352
commit de3317e26b
220 changed files with 17200 additions and 22836 deletions

View File

@@ -13,9 +13,9 @@ from unittest.mock import MagicMock
def mock_upload_dir(monkeypatch, tmp_path):
"""Fixture to monkey-patch the UPLOAD_DIR and create a temporary directory."""
directory = tmp_path / "uploads"
directory = tmp_path / 'uploads'
directory.mkdir()
monkeypatch.setattr(provider, "UPLOAD_DIR", str(directory))
monkeypatch.setattr(provider, 'UPLOAD_DIR', str(directory))
return directory
@@ -29,16 +29,16 @@ def test_imports():
def test_get_storage_provider():
Storage = provider.get_storage_provider("local")
Storage = provider.get_storage_provider('local')
assert isinstance(Storage, provider.LocalStorageProvider)
Storage = provider.get_storage_provider("s3")
Storage = provider.get_storage_provider('s3')
assert isinstance(Storage, provider.S3StorageProvider)
Storage = provider.get_storage_provider("gcs")
Storage = provider.get_storage_provider('gcs')
assert isinstance(Storage, provider.GCSStorageProvider)
Storage = provider.get_storage_provider("azure")
Storage = provider.get_storage_provider('azure')
assert isinstance(Storage, provider.AzureStorageProvider)
with pytest.raises(RuntimeError):
provider.get_storage_provider("invalid")
provider.get_storage_provider('invalid')
def test_class_instantiation():
@@ -58,10 +58,10 @@ def test_class_instantiation():
class TestLocalStorageProvider:
Storage = provider.LocalStorageProvider()
file_content = b"test content"
file_content = b'test content'
file_bytesio = io.BytesIO(file_content)
filename = "test.txt"
filename_extra = "test_exyta.txt"
filename = 'test.txt'
filename_extra = 'test_exyta.txt'
file_bytesio_empty = io.BytesIO()
def test_upload_file(self, monkeypatch, tmp_path):
@@ -99,14 +99,13 @@ class TestLocalStorageProvider:
@mock_aws
class TestS3StorageProvider:
def __init__(self):
self.Storage = provider.S3StorageProvider()
self.Storage.bucket_name = "my-bucket"
self.s3_client = boto3.resource("s3", region_name="us-east-1")
self.file_content = b"test content"
self.filename = "test.txt"
self.filename_extra = "test_exyta.txt"
self.Storage.bucket_name = 'my-bucket'
self.s3_client = boto3.resource('s3', region_name='us-east-1')
self.file_content = b'test content'
self.filename = 'test.txt'
self.filename_extra = 'test_exyta.txt'
self.file_bytesio_empty = io.BytesIO()
super().__init__()
@@ -116,25 +115,21 @@ class TestS3StorageProvider:
with pytest.raises(Exception):
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
self.s3_client.create_bucket(Bucket=self.Storage.bucket_name)
contents, s3_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, s3_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
object = self.s3_client.Object(self.Storage.bucket_name, self.filename)
assert self.file_content == object.get()["Body"].read()
assert self.file_content == object.get()['Body'].read()
# local checks
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
assert contents == self.file_content
assert s3_file_path == "s3://" + self.Storage.bucket_name + "/" + self.filename
assert s3_file_path == 's3://' + self.Storage.bucket_name + '/' + self.filename
with pytest.raises(ValueError):
self.Storage.upload_file(self.file_bytesio_empty, self.filename)
def test_get_file(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
self.s3_client.create_bucket(Bucket=self.Storage.bucket_name)
contents, s3_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, s3_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
file_path = self.Storage.get_file(s3_file_path)
assert file_path == str(upload_dir / self.filename)
assert (upload_dir / self.filename).exists()
@@ -142,17 +137,15 @@ class TestS3StorageProvider:
def test_delete_file(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
self.s3_client.create_bucket(Bucket=self.Storage.bucket_name)
contents, s3_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, s3_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
assert (upload_dir / self.filename).exists()
self.Storage.delete_file(s3_file_path)
assert not (upload_dir / self.filename).exists()
with pytest.raises(ClientError) as exc:
self.s3_client.Object(self.Storage.bucket_name, self.filename).load()
error = exc.value.response["Error"]
assert error["Code"] == "404"
assert error["Message"] == "Not Found"
error = exc.value.response['Error']
assert error['Code'] == '404'
assert error['Message'] == 'Not Found'
def test_delete_all_files(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
@@ -160,12 +153,12 @@ class TestS3StorageProvider:
self.s3_client.create_bucket(Bucket=self.Storage.bucket_name)
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
object = self.s3_client.Object(self.Storage.bucket_name, self.filename)
assert self.file_content == object.get()["Body"].read()
assert self.file_content == object.get()['Body'].read()
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra)
object = self.s3_client.Object(self.Storage.bucket_name, self.filename_extra)
assert self.file_content == object.get()["Body"].read()
assert self.file_content == object.get()['Body'].read()
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
@@ -173,15 +166,15 @@ class TestS3StorageProvider:
assert not (upload_dir / self.filename).exists()
with pytest.raises(ClientError) as exc:
self.s3_client.Object(self.Storage.bucket_name, self.filename).load()
error = exc.value.response["Error"]
assert error["Code"] == "404"
assert error["Message"] == "Not Found"
error = exc.value.response['Error']
assert error['Code'] == '404'
assert error['Message'] == 'Not Found'
assert not (upload_dir / self.filename_extra).exists()
with pytest.raises(ClientError) as exc:
self.s3_client.Object(self.Storage.bucket_name, self.filename_extra).load()
error = exc.value.response["Error"]
assert error["Code"] == "404"
assert error["Message"] == "Not Found"
error = exc.value.response['Error']
assert error['Code'] == '404'
assert error['Message'] == 'Not Found'
self.Storage.delete_all_files()
assert not (upload_dir / self.filename).exists()
@@ -190,8 +183,8 @@ class TestS3StorageProvider:
def test_init_without_credentials(self, monkeypatch):
"""Test that S3StorageProvider can initialize without explicit credentials."""
# Temporarily unset the environment variables
monkeypatch.setattr(provider, "S3_ACCESS_KEY_ID", None)
monkeypatch.setattr(provider, "S3_SECRET_ACCESS_KEY", None)
monkeypatch.setattr(provider, 'S3_ACCESS_KEY_ID', None)
monkeypatch.setattr(provider, 'S3_SECRET_ACCESS_KEY', None)
# Should not raise an exception
storage = provider.S3StorageProvider()
@@ -201,19 +194,19 @@ class TestS3StorageProvider:
class TestGCSStorageProvider:
Storage = provider.GCSStorageProvider()
Storage.bucket_name = "my-bucket"
file_content = b"test content"
filename = "test.txt"
filename_extra = "test_exyta.txt"
Storage.bucket_name = 'my-bucket'
file_content = b'test content'
filename = 'test.txt'
filename_extra = 'test_exyta.txt'
file_bytesio_empty = io.BytesIO()
@pytest.fixture(scope="class")
@pytest.fixture(scope='class')
def setup(self):
host, port = "localhost", 9023
host, port = 'localhost', 9023
server = create_server(host, port, in_memory=True)
server.start()
os.environ["STORAGE_EMULATOR_HOST"] = f"http://{host}:{port}"
os.environ['STORAGE_EMULATOR_HOST'] = f'http://{host}:{port}'
gcs_client = storage.Client()
bucket = gcs_client.bucket(self.Storage.bucket_name)
@@ -227,36 +220,30 @@ class TestGCSStorageProvider:
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
# catch error if bucket does not exist
with pytest.raises(Exception):
self.Storage.bucket = monkeypatch(self.Storage, "bucket", None)
self.Storage.bucket = monkeypatch(self.Storage, 'bucket', None)
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
contents, gcs_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, gcs_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
object = self.Storage.bucket.get_blob(self.filename)
assert self.file_content == object.download_as_bytes()
# local checks
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
assert contents == self.file_content
assert gcs_file_path == "gs://" + self.Storage.bucket_name + "/" + self.filename
assert gcs_file_path == 'gs://' + self.Storage.bucket_name + '/' + self.filename
# test error if file is empty
with pytest.raises(ValueError):
self.Storage.upload_file(self.file_bytesio_empty, self.filename)
def test_get_file(self, monkeypatch, tmp_path, setup):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
contents, gcs_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, gcs_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
file_path = self.Storage.get_file(gcs_file_path)
assert file_path == str(upload_dir / self.filename)
assert (upload_dir / self.filename).exists()
def test_delete_file(self, monkeypatch, tmp_path, setup):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
contents, gcs_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, gcs_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# ensure that local directory has the uploaded file as well
assert (upload_dir / self.filename).exists()
assert self.Storage.bucket.get_blob(self.filename).name == self.filename
@@ -278,10 +265,7 @@ class TestGCSStorageProvider:
object = self.Storage.bucket.get_blob(self.filename_extra)
assert (upload_dir / self.filename_extra).exists()
assert (upload_dir / self.filename_extra).read_bytes() == self.file_content
assert (
self.Storage.bucket.get_blob(self.filename_extra).name
== self.filename_extra
)
assert self.Storage.bucket.get_blob(self.filename_extra).name == self.filename_extra
assert self.file_content == object.download_as_bytes()
self.Storage.delete_all_files()
@@ -295,7 +279,7 @@ class TestAzureStorageProvider:
def __init__(self):
super().__init__()
@pytest.fixture(scope="class")
@pytest.fixture(scope='class')
def setup_storage(self, monkeypatch):
# Create mock Blob Service Client and related clients
mock_blob_service_client = MagicMock()
@@ -303,32 +287,28 @@ class TestAzureStorageProvider:
mock_blob_client = MagicMock()
# Set up return values for the mock
mock_blob_service_client.get_container_client.return_value = (
mock_container_client
)
mock_blob_service_client.get_container_client.return_value = mock_container_client
mock_container_client.get_blob_client.return_value = mock_blob_client
# Monkeypatch the Azure classes to return our mocks
monkeypatch.setattr(
azure.storage.blob,
"BlobServiceClient",
'BlobServiceClient',
lambda *args, **kwargs: mock_blob_service_client,
)
monkeypatch.setattr(
azure.storage.blob,
"ContainerClient",
'ContainerClient',
lambda *args, **kwargs: mock_container_client,
)
monkeypatch.setattr(
azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client
)
monkeypatch.setattr(azure.storage.blob, 'BlobClient', lambda *args, **kwargs: mock_blob_client)
self.Storage = provider.AzureStorageProvider()
self.Storage.endpoint = "https://myaccount.blob.core.windows.net"
self.Storage.container_name = "my-container"
self.file_content = b"test content"
self.filename = "test.txt"
self.filename_extra = "test_extra.txt"
self.Storage.endpoint = 'https://myaccount.blob.core.windows.net'
self.Storage.container_name = 'my-container'
self.file_content = b'test content'
self.filename = 'test.txt'
self.filename_extra = 'test_extra.txt'
self.file_bytesio_empty = io.BytesIO()
# Apply mocks to the Storage instance
@@ -339,18 +319,14 @@ class TestAzureStorageProvider:
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
# Simulate an error when container does not exist
self.Storage.container_client.get_blob_client.side_effect = Exception(
"Container does not exist"
)
self.Storage.container_client.get_blob_client.side_effect = Exception('Container does not exist')
with pytest.raises(Exception):
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# Reset side effect and create container
self.Storage.container_client.get_blob_client.side_effect = None
self.Storage.create_container()
contents, azure_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
contents, azure_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# Assertions
self.Storage.container_client.get_blob_client.assert_called_with(self.filename)
@@ -359,8 +335,7 @@ class TestAzureStorageProvider:
)
assert contents == self.file_content
assert (
azure_file_path
== f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
azure_file_path == f'https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}'
)
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
@@ -375,11 +350,9 @@ class TestAzureStorageProvider:
# Mock upload behavior
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# Mock blob download behavior
self.Storage.container_client.get_blob_client().download_blob().readall.return_value = (
self.file_content
)
self.Storage.container_client.get_blob_client().download_blob().readall.return_value = self.file_content
file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
file_url = f'https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}'
file_path = self.Storage.get_file(file_url)
assert file_path == str(upload_dir / self.filename)
@@ -395,7 +368,7 @@ class TestAzureStorageProvider:
# Mock deletion
self.Storage.container_client.get_blob_client().delete_blob.return_value = None
file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
file_url = f'https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}'
self.Storage.delete_file(file_url)
self.Storage.container_client.get_blob_client().delete_blob.assert_called_once()
@@ -411,8 +384,8 @@ class TestAzureStorageProvider:
# Mock listing and deletion behavior
self.Storage.container_client.list_blobs.return_value = [
{"name": self.filename},
{"name": self.filename_extra},
{'name': self.filename},
{'name': self.filename_extra},
]
self.Storage.container_client.get_blob_client().delete_blob.return_value = None
@@ -426,10 +399,8 @@ class TestAzureStorageProvider:
def test_get_file_not_found(self, monkeypatch):
self.Storage.create_container()
file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
file_url = f'https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}'
# Mock behavior to raise an error for missing blobs
self.Storage.container_client.get_blob_client().download_blob.side_effect = (
Exception("Blob not found")
)
with pytest.raises(Exception, match="Blob not found"):
self.Storage.container_client.get_blob_client().download_blob.side_effect = Exception('Blob not found')
with pytest.raises(Exception, match='Blob not found'):
self.Storage.get_file(file_url)