From 5d6be2a5fc4e1e1c993debbf8dd39deb6d60f128 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Tue, 21 Apr 2026 16:06:20 +0800 Subject: [PATCH] test: cover expanded tree metadata flows --- .gitignore | 2 +- .../components/test_repo_viewer_paths.test.js | 252 +++- test/kohaku-hub-ui/utils/test_api.test.js | 60 + .../kohakuhub/api/fallback/test_decorators.py | 105 ++ .../kohakuhub/api/fallback/test_operations.py | 37 +- .../api/repo/routers/test_tree_unit.py | 1222 ++++++++++++++--- 6 files changed, 1447 insertions(+), 231 deletions(-) diff --git a/.gitignore b/.gitignore index 2a5c5c1..a9955f1 100644 --- a/.gitignore +++ b/.gitignore @@ -79,7 +79,7 @@ htmlcov/ .cache nosetests.xml coverage.xml -coverage-ui/ +coverage-ui*/ *.cover *.py.cover .hypothesis/ diff --git a/test/kohaku-hub-ui/components/test_repo_viewer_paths.test.js b/test/kohaku-hub-ui/components/test_repo_viewer_paths.test.js index 260bde7..cb5dc86 100644 --- a/test/kohaku-hub-ui/components/test_repo_viewer_paths.test.js +++ b/test/kohaku-hub-ui/components/test_repo_viewer_paths.test.js @@ -14,7 +14,8 @@ const mocks = vi.hoisted(() => ({ }, repoApi: { getInfo: vi.fn(), - listTree: vi.fn(), + listTreeAll: vi.fn(), + getPathsInfo: vi.fn(), listCommits: vi.fn(), }, likesApi: { @@ -22,9 +23,6 @@ const mocks = vi.hoisted(() => ({ like: vi.fn(), unlike: vi.fn(), }, - axios: { - get: vi.fn(), - }, })); vi.mock("vue-router/auto", () => ({ @@ -36,19 +34,23 @@ vi.mock("@/utils/api", () => ({ likesAPI: mocks.likesApi, })); -vi.mock("axios", () => ({ - get: mocks.axios.get, - default: { - get: mocks.axios.get, - }, -})); - import RepoViewer from "@/components/repo/RepoViewer.vue"; +function deferred() { + let resolve; + let reject; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + describe("RepoViewer path handling", () => { beforeEach(() => { vi.clearAllMocks(); setActivePinia(createPinia()); + vi.spyOn(console, "error").mockImplementation(() => {}); mocks.repoApi.getInfo.mockResolvedValue({ data: { @@ -70,11 +72,7 @@ describe("RepoViewer path handling", () => { ); }); - function mountViewer(props, treeEntries) { - mocks.repoApi.listTree.mockResolvedValue({ - data: treeEntries, - }); - + function mountViewer(props) { return mount(RepoViewer, { props: { repoType: "dataset", @@ -100,28 +98,67 @@ describe("RepoViewer path handling", () => { }); } - it("does not duplicate directory paths when the tree API returns repo-root paths", async () => { - const wrapper = mountViewer( + it("loads repo-root tree entries, merges expanded path info, and links commits", async () => { + mocks.repoApi.listTreeAll.mockResolvedValue([ { - currentPath: "catalog", + type: "directory", + path: "catalog/section-01", + size: 0, + lastModified: "2026-04-21T13:53:12.000000Z", }, - [ + ]); + mocks.repoApi.getPathsInfo.mockResolvedValue({ + data: [ { type: "directory", path: "catalog/section-01", size: 10, - lastModified: "2026-04-21T13:53:12.000000Z", + lastCommit: { + id: "commit-1", + title: "Add section summary", + date: "2026-04-21T13:53:12.000000Z", + }, }, ], - ); + }); + + const wrapper = mountViewer({ currentPath: "catalog" }); await flushPromises(); + await flushPromises(); + + expect(mocks.repoApi.listTreeAll).toHaveBeenCalledWith( + "dataset", + "open-media-lab", + "hierarchy-crawl-fixtures", + "main", + "/catalog", + { recursive: false }, + ); + expect(mocks.repoApi.getPathsInfo).toHaveBeenCalledWith( + "dataset", + "open-media-lab", + "hierarchy-crawl-fixtures", + "main", + ["catalog/section-01"], + true, + ); const row = wrapper .findAll('[class*="cursor-pointer"]') .find((node) => node.text().includes("section-01")); - expect(row).toBeTruthy(); + expect(wrapper.text()).toContain("Add section summary"); + + const commitLink = wrapper + .findAll('a[data-router-link="true"]') + .find( + (node) => + node.attributes("href") === + "/datasets/open-media-lab/hierarchy-crawl-fixtures/commit/commit-1", + ); + expect(commitLink).toBeTruthy(); + await row.trigger("click"); expect(mocks.router.push).toHaveBeenCalledWith( @@ -129,33 +166,174 @@ describe("RepoViewer path handling", () => { ); }); - it("does not duplicate file paths when the tree API returns repo-root paths", async () => { - const wrapper = mountViewer( + it("keeps repo-root file navigation working when expanded path info fails", async () => { + mocks.repoApi.listTreeAll.mockResolvedValue([ { - name: "table-scan-fixtures", - currentPath: "metadata", + type: "file", + path: "metadata/features.json", + size: 42, + lastModified: "2026-04-21T13:53:39.000000Z", }, - [ - { - type: "file", - path: "metadata/features.json", - size: 42, - lastModified: "2026-04-21T13:53:39.000000Z", - }, - ], - ); + ]); + mocks.repoApi.getPathsInfo.mockRejectedValue(new Error("expand failed")); + const wrapper = mountViewer({ + name: "table-scan-fixtures", + currentPath: "metadata", + }); + + await flushPromises(); await flushPromises(); const row = wrapper .findAll('[class*="cursor-pointer"]') .find((node) => node.text().includes("features.json")); - expect(row).toBeTruthy(); + await row.trigger("click"); expect(mocks.router.push).toHaveBeenCalledWith( "/datasets/open-media-lab/table-scan-fixtures/blob/main/metadata/features.json", ); }); + + it("ignores stale tree responses after the current path changes", async () => { + const firstTree = deferred(); + const secondTree = deferred(); + + mocks.repoApi.listTreeAll.mockImplementation( + (type, namespace, name, branch, path) => { + if (path === "/catalog") { + return firstTree.promise; + } + if (path === "/catalog-next") { + return secondTree.promise; + } + return Promise.resolve([]); + }, + ); + mocks.repoApi.getPathsInfo.mockResolvedValue({ + data: [{ type: "file", path: "catalog-next/new.txt", size: 1 }], + }); + + const wrapper = mountViewer({ currentPath: "catalog" }); + + await flushPromises(); + await wrapper.setProps({ currentPath: "catalog-next" }); + + secondTree.resolve([ + { + type: "file", + path: "catalog-next/new.txt", + size: 1, + lastModified: "2026-04-21T13:53:39.000000Z", + }, + ]); + await flushPromises(); + await flushPromises(); + + firstTree.resolve([ + { + type: "file", + path: "catalog/old.txt", + size: 1, + lastModified: "2026-04-21T13:53:39.000000Z", + }, + ]); + await flushPromises(); + await flushPromises(); + + expect(wrapper.text()).toContain("new.txt"); + expect(wrapper.text()).not.toContain("old.txt"); + expect(mocks.repoApi.getPathsInfo).toHaveBeenCalledTimes(1); + expect(mocks.repoApi.getPathsInfo).toHaveBeenCalledWith( + "dataset", + "open-media-lab", + "hierarchy-crawl-fixtures", + "main", + ["catalog-next/new.txt"], + true, + ); + }); + + it("ignores stale expanded path info responses after a newer request wins", async () => { + const firstPathsInfo = deferred(); + + mocks.repoApi.listTreeAll.mockImplementation( + (type, namespace, name, branch, path) => { + if (path === "/catalog") { + return Promise.resolve([ + { + type: "file", + path: "catalog/old.txt", + size: 1, + lastModified: "2026-04-21T13:53:39.000000Z", + }, + ]); + } + if (path === "/catalog-next") { + return Promise.resolve([ + { + type: "file", + path: "catalog-next/new.txt", + size: 1, + lastModified: "2026-04-21T13:53:39.000000Z", + }, + ]); + } + return Promise.resolve([]); + }, + ); + mocks.repoApi.getPathsInfo.mockImplementation( + (type, namespace, name, branch, paths) => { + if (paths[0] === "catalog/old.txt") { + return firstPathsInfo.promise; + } + return Promise.resolve({ + data: [ + { + type: "file", + path: "catalog-next/new.txt", + size: 3, + lastCommit: { + id: "commit-2", + title: "Ship new tree row", + date: "2026-04-21T13:53:39.000000Z", + }, + }, + ], + }); + }, + ); + + const wrapper = mountViewer({ currentPath: "catalog" }); + + await flushPromises(); + await flushPromises(); + + await wrapper.setProps({ currentPath: "catalog-next" }); + await flushPromises(); + await flushPromises(); + + firstPathsInfo.resolve({ + data: [ + { + type: "file", + path: "catalog/old.txt", + size: 99, + lastCommit: { + id: "commit-1", + title: "Old tree row", + date: "2026-04-21T13:53:39.000000Z", + }, + }, + ], + }); + await flushPromises(); + await flushPromises(); + + expect(wrapper.text()).toContain("new.txt"); + expect(wrapper.text()).toContain("Ship new tree row"); + expect(wrapper.text()).not.toContain("Old tree row"); + }); }); diff --git a/test/kohaku-hub-ui/utils/test_api.test.js b/test/kohaku-hub-ui/utils/test_api.test.js index b8ad2d5..0118c3e 100644 --- a/test/kohaku-hub-ui/utils/test_api.test.js +++ b/test/kohaku-hub-ui/utils/test_api.test.js @@ -291,6 +291,66 @@ describe("frontend API client", () => { }); }); + it("follows paginated tree Link headers and submits expanded paths-info forms", async () => { + const { apiClient, repoAPI } = await loadModules(); + + const getSpy = vi + .spyOn(apiClient, "get") + .mockResolvedValueOnce({ + data: [{ path: "docs" }], + headers: { + link: '; rel="next"', + }, + }) + .mockResolvedValueOnce({ + data: [{ path: "docs/guide.md" }], + headers: {}, + }); + const postSpy = vi.spyOn(apiClient, "post").mockResolvedValue({ data: [] }); + + const allEntries = await repoAPI.listTreeAll( + "model", + "alice", + "demo", + "main", + "/docs", + { recursive: false }, + ); + await repoAPI.getPathsInfo( + "model", + "alice", + "demo", + "main", + ["docs", "docs/guide.md"], + true, + ); + + expect(allEntries).toEqual([{ path: "docs" }, { path: "docs/guide.md" }]); + expect(getSpy).toHaveBeenNthCalledWith( + 1, + "/api/models/alice/demo/tree/main/docs", + { params: { recursive: false } }, + ); + expect(getSpy).toHaveBeenNthCalledWith( + 2, + "https://hub.local/api/models/alice/demo/tree/main/docs?cursor=page-2", + ); + + expect(postSpy).toHaveBeenCalledTimes(1); + expect(postSpy.mock.calls[0][0]).toBe( + "/api/models/alice/demo/paths-info/main", + ); + expect(postSpy.mock.calls[0][1]).toBeInstanceOf(URLSearchParams); + expect(postSpy.mock.calls[0][1].toString()).toBe( + "paths=docs&paths=docs%2Fguide.md&expand=true", + ); + expect(postSpy.mock.calls[0][2]).toEqual({ + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + }); + }); + it("builds NDJSON commits for ignored, regular, LFS, and editor flows", async () => { const originalFileReader = globalThis.FileReader; globalThis.FileReader = class { diff --git a/test/kohakuhub/api/fallback/test_decorators.py b/test/kohakuhub/api/fallback/test_decorators.py index bcd91b7..20bd0ad 100644 --- a/test/kohakuhub/api/fallback/test_decorators.py +++ b/test/kohakuhub/api/fallback/test_decorators.py @@ -123,6 +123,111 @@ async def test_with_repo_fallback_returns_original_response_on_fallback_miss(mon assert result is original +@pytest.mark.asyncio +async def test_with_repo_fallback_forwards_tree_and_paths_info_expand_parameters(monkeypatch): + forwarded_tree_calls = [] + forwarded_paths_info_calls = [] + + monkeypatch.setattr( + fallback_decorators, + "get_merged_external_tokens", + lambda user, header_tokens: {"https://hf.local": "token"}, + ) + + async def fake_try_fallback_tree(*args, **kwargs): + forwarded_tree_calls.append((args, kwargs)) + return {"tree": True} + + async def fake_try_fallback_paths_info(*args, **kwargs): + forwarded_paths_info_calls.append((args, kwargs)) + return [{"path": "README.md"}] + + monkeypatch.setattr(fallback_decorators, "try_fallback_tree", fake_try_fallback_tree) + monkeypatch.setattr( + fallback_decorators, + "try_fallback_paths_info", + fake_try_fallback_paths_info, + ) + + @fallback_decorators.with_repo_fallback("tree") + async def tree_handler( + namespace: str, + name: str, + revision: str, + path: str = "", + recursive: bool = False, + expand: bool = False, + limit: int | None = None, + cursor: str | None = None, + request=None, + user=None, + ): + raise HTTPException(status_code=404, detail="missing") + + @fallback_decorators.with_repo_fallback("paths_info") + async def paths_info_handler( + repo_type=None, + namespace: str = "", + repo_name: str = "", + revision: str = "", + paths=None, + expand: bool = False, + request=None, + user=None, + ): + raise HTTPException(status_code=404, detail="missing") + + tree_request = _request("/api/models/owner/demo/tree/main/docs") + tree_result = await tree_handler( + namespace="owner", + name="demo", + revision="main", + path="docs", + recursive=True, + expand=True, + limit=25, + cursor="page-1", + request=tree_request, + user="owner-user", + ) + assert tree_result == {"tree": True} + assert forwarded_tree_calls == [ + ( + ("model", "owner", "demo", "main", "docs"), + { + "recursive": True, + "expand": True, + "limit": 25, + "cursor": "page-1", + "user_tokens": {"https://hf.local": "token"}, + }, + ) + ] + + paths_info_request = _request("/api/models/owner/demo/paths-info/main") + repo_type = SimpleNamespace(value="model") + paths_info_result = await paths_info_handler( + repo_type=repo_type, + namespace="owner", + repo_name="demo", + revision="main", + paths=["README.md", "docs"], + expand=True, + request=paths_info_request, + user="owner-user", + ) + assert paths_info_result == [{"path": "README.md"}] + assert forwarded_paths_info_calls == [ + ( + ("model", "owner", "demo", "main", ["README.md", "docs"]), + { + "expand": True, + "user_tokens": {"https://hf.local": "token"}, + }, + ) + ] + + @pytest.mark.asyncio async def test_with_list_aggregation_merges_local_and_external_results(monkeypatch): monkeypatch.setattr( diff --git a/test/kohakuhub/api/fallback/test_operations.py b/test/kohakuhub/api/fallback/test_operations.py index 05f2c11..cffdd13 100644 --- a/test/kohakuhub/api/fallback/test_operations.py +++ b/test/kohakuhub/api/fallback/test_operations.py @@ -268,7 +268,17 @@ async def test_try_fallback_info_tree_and_paths_info_cover_success_paths(monkeyp "https://source.local", "GET", "/api/models/owner/demo/tree/main/folder/file.txt", - _json_response(200, [{"path": "folder/file.txt"}]), + httpx.Response( + 200, + json=[{"path": "folder/file.txt"}], + headers={ + "content-type": "application/json", + "link": '; rel="next"', + }, + request=httpx.Request( + "GET", "https://source.local/api/models/owner/demo/tree/main/folder/file.txt" + ), + ), ) FakeFallbackClient.queue( "https://source.local", @@ -278,23 +288,42 @@ async def test_try_fallback_info_tree_and_paths_info_cover_success_paths(monkeyp ) info = await fallback_ops.try_fallback_info("model", "owner", "demo") - tree = await fallback_ops.try_fallback_tree("model", "owner", "demo", "main", "/folder/file.txt") + tree = await fallback_ops.try_fallback_tree( + "model", + "owner", + "demo", + "main", + "/folder/file.txt", + recursive=True, + expand=True, + limit=25, + cursor="page-1", + ) paths_info = await fallback_ops.try_fallback_paths_info( "model", "owner", "demo", "main", ["folder/file.txt"], + expand=True, ) assert info["_source"] == "Source" assert info["_source_url"] == "https://source.local" - assert tree == [{"path": "folder/file.txt"}] + assert tree.status_code == 200 + assert tree.body == b'[{"path":"folder/file.txt"}]' + assert tree.headers["link"] == '; rel="next"' assert paths_info == [{"path": "folder/file.txt", "type": "file"}] assert cache.set_calls[0][0][:3] == ("model", "owner", "demo") + assert FakeFallbackClient.calls[1][3]["params"] == { + "recursive": True, + "expand": True, + "limit": 25, + "cursor": "page-1", + } assert FakeFallbackClient.calls[-1][3]["data"] == { "paths": ["folder/file.txt"], - "expand": False, + "expand": True, } diff --git a/test/kohakuhub/api/repo/routers/test_tree_unit.py b/test/kohakuhub/api/repo/routers/test_tree_unit.py index 819bb6d..3756fed 100644 --- a/test/kohakuhub/api/repo/routers/test_tree_unit.py +++ b/test/kohakuhub/api/repo/routers/test_tree_unit.py @@ -2,10 +2,13 @@ from __future__ import annotations -from datetime import datetime +import asyncio +import json from types import SimpleNamespace +from urllib.parse import parse_qs, urlparse import pytest +from fastapi.responses import JSONResponse import kohakuhub.api.repo.routers.tree as tree_api @@ -36,32 +39,175 @@ class _FakeLakeFSClient: return result +class _FakeLakeFSRestClient: + def __init__(self, *, log_responses=None, diff_responses=None): + self.log_responses = list(log_responses or []) + self.diff_responses = list(diff_responses or []) + self.log_calls = [] + self.diff_calls = [] + + async def log_commits(self, **kwargs): + self.log_calls.append(kwargs) + return self.log_responses.pop(0) + + async def diff_refs(self, **kwargs): + self.diff_calls.append(kwargs) + return self.diff_responses.pop(0) + + +class _Expression: + def __init__(self, label: str): + self.label = label + + def __and__(self, other: "_Expression") -> "_Expression": + return _Expression(f"({self.label}&{other.label})") + + +class _Field: + def __init__(self, label: str): + self.label = label + + def __eq__(self, other) -> _Expression: # noqa: ANN001 - Peewee-style stub + return _Expression(f"{self.label}=={other!r}") + + def in_(self, values) -> _Expression: # noqa: ANN001 - Peewee-style stub + return _Expression(f"{self.label}.in_({list(values)!r})") + + +class _FakeQuery(list): + def __init__(self, rows): + super().__init__(rows) + self.where_expression = None + + def where(self, expression): + self.where_expression = expression + return self + + +def _json_body(response: JSONResponse) -> list[dict]: + return json.loads(response.body.decode()) + + +def _request(path: str, query=None): + return SimpleNamespace( + query_params=query or {}, + url=SimpleNamespace(path=path), + ) + + +def test_helper_functions_cover_path_formatting_links_and_file_records(monkeypatch): + assert tree_api._normalize_repo_path("/nested/path/") == "nested/path" + assert tree_api._normalize_repo_path("/") == "" + assert tree_api._format_last_modified(None) is None + assert tree_api._format_last_modified(0) is None + assert tree_api._format_commit_date(None) is None + assert tree_api._format_commit_date("2026-04-21T00:00:00.000000Z") == ( + "2026-04-21T00:00:00.000000Z" + ) + + serialized = tree_api._serialize_last_commit( + { + "id": "commit-1", + "message": "Add README", + "creation_date": 1713657600, + } + ) + assert serialized["id"] == "commit-1" + assert serialized["title"] == "Add README" + assert serialized["date"].endswith("Z") + + assert tree_api._build_lfs_payload("sha256", 32) == { + "oid": "sha256", + "size": 32, + "pointerSize": 134, + } + + monkeypatch.setattr(tree_api.cfg.app, "base_url", "https://hub.local/") + next_link = tree_api._build_public_link( + _request( + "/api/models/owner/demo/tree/main/docs", + query={"recursive": "false", "expand": "true"}, + ), + limit=50, + cursor="cursor-2", + ) + parsed = urlparse(next_link) + assert parsed.scheme == "https" + assert parsed.netloc == "hub.local" + assert parsed.path == "/api/models/owner/demo/tree/main/docs" + assert parse_qs(parsed.query) == { + "recursive": ["false"], + "expand": ["true"], + "limit": ["50"], + "cursor": ["cursor-2"], + } + + rows = [ + SimpleNamespace(path_in_repo="README.md", sha256="sha-readme"), + SimpleNamespace(path_in_repo="weights/model.bin", sha256="sha-lfs"), + ] + fake_query = _FakeQuery(rows) + + class _FakeFileModel: + repository = _Field("repository") + path_in_repo = _Field("path_in_repo") + is_deleted = _Field("is_deleted") + + @staticmethod + def select(): + return fake_query + + monkeypatch.setattr(tree_api, "File", _FakeFileModel) + + records = tree_api._build_file_record_map( + SimpleNamespace(id=1), + ["README.md", "weights/model.bin"], + ) + assert records == { + "README.md": rows[0], + "weights/model.bin": rows[1], + } + assert fake_query.where_expression is not None + assert tree_api._build_file_record_map(SimpleNamespace(id=1), []) == {} + + @pytest.mark.asyncio -async def test_fetch_lakefs_objects_and_calculate_folder_stats_cover_pagination(monkeypatch): - fetch_client = _FakeLakeFSClient( +async def test_fetch_page_and_directory_stats_cover_pagination(monkeypatch): + page_client = _FakeLakeFSClient( list_responses=[ { - "results": [{"path": "a.txt", "path_type": "object"}], - "pagination": {"has_more": True, "next_offset": "page-2"}, - }, - { - "results": [{"path": "b.txt", "path_type": "object"}], + "results": [{"path": "docs/a.txt", "path_type": "object"}], "pagination": {"has_more": False}, - }, + } ] ) - monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: fetch_client) + monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: page_client) - results = await tree_api.fetch_lakefs_objects("lake", "main", "docs/", recursive=False) - assert [item["path"] for item in results] == ["a.txt", "b.txt"] - assert fetch_client.list_calls[0]["delimiter"] == "/" - assert fetch_client.list_calls[1]["after"] == "page-2" + page = await tree_api.fetch_lakefs_objects_page( + "lake", + "main", + "docs/", + recursive=False, + amount=25, + ) + assert page["results"][0]["path"] == "docs/a.txt" + assert page_client.list_calls == [ + { + "repository": "lake", + "ref": "main", + "prefix": "docs/", + "delimiter": "/", + "amount": 25, + "after": "", + } + ] - folder_client = _FakeLakeFSClient( + directory_client = _FakeLakeFSClient( list_responses=[ { "results": [ {"path_type": "object", "size_bytes": 4, "mtime": 10}, + {"path_type": "common_prefix", "size_bytes": 999, "mtime": 999}, ], "pagination": {"has_more": True, "next_offset": "page-2"}, }, @@ -73,233 +219,781 @@ async def test_fetch_lakefs_objects_and_calculate_folder_stats_cover_pagination( }, ] ) - monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: folder_client) + monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: directory_client) - folder_size, latest_mtime = await tree_api.calculate_folder_stats("lake", "main", "docs/") - assert folder_size == 10 - assert latest_mtime == 20 - - failing_folder_client = _FakeLakeFSClient(list_responses=[RuntimeError("folder failed")]) - monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: failing_folder_client) - failed_size, failed_mtime = await tree_api.calculate_folder_stats("lake", "main", "docs/") - assert failed_size == 0 - assert failed_mtime is None - - -@pytest.mark.asyncio -async def test_convert_file_object_adds_lfs_and_last_modified_metadata(monkeypatch): - monkeypatch.setattr(tree_api, "should_use_lfs", lambda repository, path, size: True) - monkeypatch.setattr( - tree_api, - "get_file", - lambda repository, path: SimpleNamespace(sha256="sha256-lfs"), - ) - - result = await tree_api.convert_file_object( - { - "path": "weights.bin", - "size_bytes": 32, - "checksum": "lakefs-sha", - "mtime": 123, - }, - SimpleNamespace(full_id="owner/demo"), - ) - - assert result["lfs"] == {"oid": "sha256-lfs", "size": 32, "pointerSize": 134} - assert "lastModified" in result - - -@pytest.mark.asyncio -async def test_convert_directory_object_uses_object_mtime_when_folder_stats_have_no_latest_time( - monkeypatch, -): - async def _fake_calculate_folder_stats(*args): - return (12, None) - - monkeypatch.setattr(tree_api, "calculate_folder_stats", _fake_calculate_folder_stats) - - result = await tree_api.convert_directory_object( - {"path": "docs/", "checksum": "tree-sha", "mtime": 123}, + total_size, latest_mtime = await tree_api._calculate_directory_stats( "lake", "main", + "docs", + ) + assert total_size == 10 + assert latest_mtime == 20 + assert directory_client.list_calls[0]["prefix"] == "docs/" + assert directory_client.list_calls[1]["after"] == "page-2" + + +def test_make_tree_item_and_apply_changed_path_cover_file_directory_and_ancestors(monkeypatch): + monkeypatch.setattr(tree_api, "should_use_lfs", lambda repository, path, size: False) + + file_record = SimpleNamespace(sha256="sha256-lfs", lfs=True) + file_item = tree_api._make_tree_item( + { + "path_type": "object", + "path": "weights/model.bin", + "size_bytes": 32, + "checksum": "lakefs-sha", + "mtime": 1713657600, + }, + repository=SimpleNamespace(id=1), + file_records={"weights/model.bin": file_record}, + expand=True, + last_commit={"id": "commit-1", "title": "Track weights"}, + ) + assert file_item == { + "type": "file", + "oid": "sha256-lfs", + "size": 32, + "path": "weights/model.bin", + "lastModified": tree_api._format_last_modified(1713657600), + "lfs": { + "oid": "sha256-lfs", + "size": 32, + "pointerSize": 134, + }, + "lastCommit": {"id": "commit-1", "title": "Track weights"}, + "securityFileStatus": None, + } + + directory_item = tree_api._make_tree_item( + { + "path_type": "common_prefix", + "path": "docs/", + "checksum": "tree-oid", + "mtime": 1713657600, + }, + repository=SimpleNamespace(id=1), + file_records={}, + expand=True, + last_commit={"id": "commit-2", "title": "Docs refresh"}, + ) + assert directory_item == { + "type": "directory", + "oid": "tree-oid", + "size": 0, + "path": "docs", + "lastModified": tree_api._format_last_modified(1713657600), + "lastCommit": {"id": "commit-2", "title": "Docs refresh"}, + } + + unresolved_files = {"docs/guide.md"} + unresolved_directories = {"docs", "weights"} + resolved = {} + commit_info = {"id": "commit-3", "title": "Update nested paths"} + + tree_api._apply_changed_path( + "docs/guide.md", + unresolved_files, + unresolved_directories, + resolved, + commit_info, ) - assert result["path"] == "docs" - assert result["size"] == 12 - assert "lastModified" in result + assert unresolved_files == set() + assert unresolved_directories == {"weights"} + assert resolved == { + "docs/guide.md": commit_info, + "docs": commit_info, + } + + direct_directory_targets = {"weights"} + tree_api._apply_changed_path( + "/", + set(), + direct_directory_targets, + resolved, + commit_info, + ) + assert direct_directory_targets == {"weights"} + + tree_api._apply_changed_path( + "weights", + set(), + direct_directory_targets, + resolved, + commit_info, + ) + assert direct_directory_targets == set() + assert resolved["weights"] == commit_info @pytest.mark.asyncio -async def test_list_repo_tree_covers_missing_repo_success_and_error_paths(monkeypatch): - request = SimpleNamespace() - monkeypatch.setattr(tree_api, "get_repository", lambda *args: None) +async def test_resolve_last_commits_for_paths_covers_diff_pagination_and_root_commit(monkeypatch): + rest_client = _FakeLakeFSRestClient( + log_responses=[ + { + "results": [ + { + "id": "commit-2", + "message": "Refresh tree rows", + "creation_date": 1713657600, + "parents": ["commit-1"], + } + ], + "pagination": {"has_more": False}, + } + ], + diff_responses=[ + { + "results": [{"path": "docs/guide.md"}], + "pagination": {"has_more": True, "next_offset": "diff-2"}, + }, + { + "results": [{"path": "weights/model.bin"}], + "pagination": {"has_more": False}, + }, + ], + ) + monkeypatch.setattr(tree_api, "get_lakefs_rest_client", lambda: rest_client) + + resolved = await tree_api.resolve_last_commits_for_paths( + "lake", + "main", + [ + {"path": "docs", "type": "directory"}, + {"path": "weights/model.bin", "type": "file"}, + ], + ) + + assert resolved["docs"]["id"] == "commit-2" + assert resolved["weights/model.bin"]["title"] == "Refresh tree rows" + assert rest_client.log_calls == [ + { + "repository": "lake", + "ref": "main", + "after": None, + "amount": tree_api.TREE_COMMIT_SCAN_PAGE_SIZE, + } + ] + assert rest_client.diff_calls == [ + { + "repository": "lake", + "left_ref": "commit-1", + "right_ref": "commit-2", + "after": None, + "amount": tree_api.TREE_DIFF_PAGE_SIZE, + }, + { + "repository": "lake", + "left_ref": "commit-1", + "right_ref": "commit-2", + "after": "diff-2", + "amount": tree_api.TREE_DIFF_PAGE_SIZE, + }, + ] + + root_client = _FakeLakeFSRestClient( + log_responses=[ + { + "results": [ + { + "id": "root-commit", + "message": "Initial import", + "creation_date": "2026-04-21T00:00:00.000000Z", + "parents": [], + } + ], + "pagination": {"has_more": False}, + } + ] + ) + monkeypatch.setattr(tree_api, "get_lakefs_rest_client", lambda: root_client) + + root_resolved = await tree_api.resolve_last_commits_for_paths( + "lake", + "main", + [ + {"path": "README.md", "type": "file"}, + {"path": "docs", "type": "directory"}, + ], + ) + assert root_resolved == { + "README.md": { + "id": "root-commit", + "title": "Initial import", + "date": "2026-04-21T00:00:00.000000Z", + }, + "docs": { + "id": "root-commit", + "title": "Initial import", + "date": "2026-04-21T00:00:00.000000Z", + }, + } + assert await tree_api.resolve_last_commits_for_paths("lake", "main", []) == {} + + empty_client = _FakeLakeFSRestClient( + log_responses=[{"results": [], "pagination": {"has_more": False}}] + ) + monkeypatch.setattr(tree_api, "get_lakefs_rest_client", lambda: empty_client) + assert await tree_api.resolve_last_commits_for_paths( + "lake", + "main", + [{"path": "missing.txt", "type": "file"}], + ) == {} + + paginated_client = _FakeLakeFSRestClient( + log_responses=[ + { + "results": [ + { + "id": "commit-2", + "message": "Unrelated change", + "creation_date": 1713657600, + "parents": ["commit-1"], + } + ], + "pagination": {"has_more": True, "next_offset": "page-2"}, + }, + { + "results": [ + { + "id": "root-commit", + "message": "Initial import", + "creation_date": 1713657610, + "parents": [], + } + ], + "pagination": {"has_more": False}, + }, + ], + diff_responses=[ + { + "results": [{"path": "docs/other.md"}], + "pagination": {"has_more": False}, + } + ], + ) + monkeypatch.setattr(tree_api, "get_lakefs_rest_client", lambda: paginated_client) + + paginated_result = await tree_api.resolve_last_commits_for_paths( + "lake", + "main", + [{"path": "README.md", "type": "file"}], + ) + assert paginated_result["README.md"]["id"] == "root-commit" + assert paginated_client.log_calls[1]["after"] == "page-2" + + +@pytest.mark.asyncio +async def test_process_single_path_covers_file_directory_missing_and_errors(monkeypatch): + class _NotFoundError(Exception): + pass + + client = _FakeLakeFSClient( + stat_map={ + "weights/model.bin": { + "size_bytes": 32, + "checksum": "lakefs-sha", + "mtime": 1713657600, + }, + "docs": _NotFoundError("missing file"), + "ghost": _NotFoundError("missing path"), + "broken-dir": _NotFoundError("broken dir"), + "docs-error": _NotFoundError("directory stats failed"), + "broken": RuntimeError("server error"), + }, + list_map={ + "docs/": {"results": [{"checksum": "tree-oid", "mtime": 1713657610}]}, + "ghost/": {"results": []}, + "broken-dir/": RuntimeError("list failed"), + "docs-error/": { + "results": [{"checksum": "tree-oid-2", "mtime": 1713657615}] + }, + }, + ) + monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: client) monkeypatch.setattr( tree_api, - "hf_repo_not_found", - lambda repo_id, repo_type: {"missing": repo_id, "type": str(repo_type)}, + "should_use_lfs", + lambda repository, path, size: path == "weights/model.bin", + ) + monkeypatch.setattr( + tree_api, + "is_lakefs_not_found_error", + lambda error: isinstance(error, _NotFoundError), ) - missing = await tree_api.list_repo_tree.__wrapped__( - "model", - "owner", - "demo", - request, + async def _fake_directory_stats(*args, **kwargs): + if kwargs["directory_path"] == "docs-error": + raise RuntimeError("stats failed") + return (15, 1713657620) + + monkeypatch.setattr(tree_api, "_calculate_directory_stats", _fake_directory_stats) + semaphore = asyncio.Semaphore(1) + + file_result = await tree_api._process_single_path( + "lake", + "main", + SimpleNamespace(id=1), + "weights/model.bin", + {"weights/model.bin": SimpleNamespace(sha256="sha256-lfs", lfs=True)}, + semaphore, + expand=True, ) - assert missing["missing"] == "owner/demo" + assert file_result == { + "type": "file", + "path": "weights/model.bin", + "size": 32, + "oid": "sha256-lfs", + "lastModified": tree_api._format_last_modified(1713657600), + "lfs": { + "oid": "sha256-lfs", + "size": 32, + "pointerSize": 134, + }, + } - repo = SimpleNamespace(full_id="owner/demo", private=False) - captured = {} + directory_result = await tree_api._process_single_path( + "lake", + "main", + SimpleNamespace(id=1), + "docs", + {}, + semaphore, + expand=True, + ) + assert directory_result == { + "type": "directory", + "path": "docs", + "oid": "tree-oid", + "size": 15, + "lastModified": tree_api._format_last_modified(1713657620), + } - async def _fake_fetch(lakefs_repo, revision, prefix, recursive): - captured.update( - { - "lakefs_repo": lakefs_repo, - "revision": revision, - "prefix": prefix, - "recursive": recursive, - } + assert ( + await tree_api._process_single_path( + "lake", + "main", + SimpleNamespace(id=1), + "ghost", + {}, + semaphore, + expand=False, ) - return [ - {"path_type": "object", "path": "folder/file.txt"}, - {"path_type": "common_prefix", "path": "folder/"}, - ] + is None + ) + assert ( + await tree_api._process_single_path( + "lake", + "main", + SimpleNamespace(id=1), + "broken-dir", + {}, + semaphore, + expand=False, + ) + is None + ) + assert await tree_api._process_single_path( + "lake", + "main", + SimpleNamespace(id=1), + "docs-error", + {}, + semaphore, + expand=True, + ) == { + "type": "directory", + "path": "docs-error", + "oid": "tree-oid-2", + "size": 0, + "lastModified": tree_api._format_last_modified(1713657615), + } + assert ( + await tree_api._process_single_path( + "lake", + "main", + SimpleNamespace(id=1), + "broken", + {}, + semaphore, + expand=False, + ) + is None + ) + + +@pytest.mark.asyncio +async def test_list_repo_tree_covers_success_pagination_and_error_paths(monkeypatch): + request = _request( + "/api/models/owner/demo/tree/main/docs", + query={"recursive": "false", "expand": "true", "limit": "200"}, + ) + repo = SimpleNamespace(full_id="owner/demo", private=False) monkeypatch.setattr(tree_api, "get_repository", lambda *args: repo) monkeypatch.setattr(tree_api, "check_repo_read_permission", lambda repo_arg, user: True) monkeypatch.setattr(tree_api, "lakefs_repo_name", lambda repo_type, repo_id: "lake-repo") - monkeypatch.setattr(tree_api, "fetch_lakefs_objects", _fake_fetch) + async def _resolve_revision(client, lakefs_repo, revision): + return ("resolved-main", "branch") - async def _fake_convert_file_object(obj, repository): - return {"type": "file", "path": obj["path"]} + monkeypatch.setattr(tree_api, "resolve_revision", _resolve_revision) + fetch_calls = [] - async def _fake_convert_directory_object(obj, lakefs_repo, revision): - return {"type": "directory", "path": obj["path"]} + async def _fake_fetch(**kwargs): + fetch_calls.append(kwargs) + return { + "results": [ + { + "path_type": "object", + "path": "docs/guide.md", + "size_bytes": 12, + "checksum": "lakefs-guide", + "mtime": 1713657600, + }, + { + "path_type": "common_prefix", + "path": "docs/assets/", + "checksum": "tree-assets", + "mtime": 1713657605, + }, + ], + "pagination": {"has_more": True, "next_offset": "page-2"}, + } - monkeypatch.setattr(tree_api, "convert_file_object", _fake_convert_file_object) - monkeypatch.setattr(tree_api, "convert_directory_object", _fake_convert_directory_object) - - success = await tree_api.list_repo_tree.__wrapped__( - "model", - "owner", - "demo", - request, - path="folder", - ) - assert captured["prefix"] == "folder/" - assert success == [ - {"type": "file", "path": "folder/file.txt"}, - {"type": "directory", "path": "folder/"}, - ] - - error = RuntimeError("missing") - async def _raise_missing(*args, **kwargs): - raise error - - monkeypatch.setattr(tree_api, "fetch_lakefs_objects", _raise_missing) - monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: exc is error) - monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: True) + monkeypatch.setattr(tree_api, "fetch_lakefs_objects_page", _fake_fetch) monkeypatch.setattr( tree_api, - "hf_revision_not_found", - lambda repo_id, revision: {"revision": revision, "repo": repo_id}, + "_build_file_record_map", + lambda repository, paths: { + "docs/guide.md": SimpleNamespace(sha256="sha-db", lfs=False) + }, ) - revision_missing = await tree_api.list_repo_tree.__wrapped__( + async def _resolve_last_commits(lakefs_repo, revision, targets): + return { + "docs/guide.md": {"id": "commit-1", "title": "Update guide"}, + "docs/assets": {"id": "commit-2", "title": "Add assets"}, + } + + monkeypatch.setattr(tree_api, "resolve_last_commits_for_paths", _resolve_last_commits) + monkeypatch.setattr(tree_api.cfg.app, "base_url", "https://hub.local") + + response = await tree_api.list_repo_tree.__wrapped__( "model", "owner", "demo", request, - revision="bad-rev", + path="/docs/", + expand=True, + limit=200, + cursor="page-1", ) - assert revision_missing == {"revision": "bad-rev", "repo": "owner/demo"} - monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: False) + assert isinstance(response, JSONResponse) + assert fetch_calls == [ + { + "lakefs_repo": "lake-repo", + "revision": "resolved-main", + "prefix": "docs/", + "recursive": False, + "amount": tree_api.TREE_EXPAND_PAGE_SIZE, + "after": "page-1", + } + ] + assert response.headers["link"] == ( + '; rel="next"' + ) + assert _json_body(response) == [ + { + "type": "file", + "oid": "sha-db", + "size": 12, + "path": "docs/guide.md", + "lastModified": tree_api._format_last_modified(1713657600), + "lastCommit": {"id": "commit-1", "title": "Update guide"}, + "securityFileStatus": None, + }, + { + "type": "directory", + "oid": "tree-assets", + "size": 0, + "path": "docs/assets", + "lastModified": tree_api._format_last_modified(1713657605), + "lastCommit": {"id": "commit-2", "title": "Add assets"}, + }, + ] + + monkeypatch.setattr(tree_api, "get_repository", lambda *args: None) + monkeypatch.setattr( + tree_api, + "hf_repo_not_found", + lambda repo_id, repo_type: {"missing": repo_id, "type": repo_type}, + ) assert ( await tree_api.list_repo_tree.__wrapped__( "model", "owner", "demo", request, - revision="missing-path", + limit=None, ) - == [] - ) + ) == {"missing": "owner/demo", "type": "model"} - generic_error = RuntimeError("server") - async def _raise_generic(*args, **kwargs): + monkeypatch.setattr(tree_api, "get_repository", lambda *args: repo) + async def _raise_resolve_revision(client, lakefs_repo, revision): + raise RuntimeError("bad revision") + + monkeypatch.setattr(tree_api, "resolve_revision", _raise_resolve_revision) + monkeypatch.setattr( + tree_api, + "hf_revision_not_found", + lambda repo_id, revision: {"revision": revision, "repo": repo_id}, + ) + assert ( + await tree_api.list_repo_tree.__wrapped__( + "model", + "owner", + "demo", + request, + revision="bad-rev", + limit=None, + ) + ) == {"revision": "bad-rev", "repo": "owner/demo"} + + error = RuntimeError("missing path") + + async def _raise_missing(**kwargs): + raise error + + monkeypatch.setattr(tree_api, "resolve_revision", _resolve_revision) + monkeypatch.setattr(tree_api, "fetch_lakefs_objects_page", _raise_missing) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: exc is error) + monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: False) + monkeypatch.setattr( + tree_api, + "hf_entry_not_found", + lambda repo_id, path, revision: {"entry": path, "repo": repo_id, "revision": revision}, + ) + assert ( + await tree_api.list_repo_tree.__wrapped__( + "model", + "owner", + "demo", + request, + path="/docs", + limit=None, + ) + ) == {"entry": "docs", "repo": "owner/demo", "revision": "main"} + + monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: True) + assert ( + await tree_api.list_repo_tree.__wrapped__( + "model", + "owner", + "demo", + request, + revision="bad-rev", + limit=None, + ) + ) == {"revision": "bad-rev", "repo": "owner/demo"} + + async def _empty_page(**kwargs): + return {"results": [], "pagination": {"has_more": False}} + + monkeypatch.setattr(tree_api, "fetch_lakefs_objects_page", _empty_page) + assert ( + await tree_api.list_repo_tree.__wrapped__( + "model", + "owner", + "demo", + request, + path="/docs", + limit=None, + ) + ) == {"entry": "docs", "repo": "owner/demo", "revision": "main"} + + generic_error = RuntimeError("server error") + + async def _raise_generic(**kwargs): raise generic_error - monkeypatch.setattr(tree_api, "fetch_lakefs_objects", _raise_generic) + monkeypatch.setattr(tree_api, "fetch_lakefs_objects_page", _raise_generic) monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: False) monkeypatch.setattr(tree_api, "hf_server_error", lambda message: {"error": message}) - server_error = await tree_api.list_repo_tree.__wrapped__( + assert "Failed to list objects" in ( + await tree_api.list_repo_tree.__wrapped__( + "model", + "owner", + "demo", + request, + limit=None, + ) + )["error"] + + +@pytest.mark.asyncio +async def test_list_repo_tree_handles_last_commit_lookup_failures(monkeypatch): + request = _request("/api/models/owner/demo/tree/main") + repo = SimpleNamespace(full_id="owner/demo", private=False) + + monkeypatch.setattr(tree_api, "get_repository", lambda *args: repo) + monkeypatch.setattr(tree_api, "check_repo_read_permission", lambda repo_arg, user: True) + monkeypatch.setattr(tree_api, "lakefs_repo_name", lambda repo_type, repo_id: "lake-repo") + async def _resolve_revision(client, lakefs_repo, revision): + return ("resolved-main", "branch") + + monkeypatch.setattr(tree_api, "resolve_revision", _resolve_revision) + + async def _fetch_single_page(**kwargs): + return { + "results": [ + { + "path_type": "object", + "path": "README.md", + "size_bytes": 5, + "checksum": "sha-readme", + } + ], + "pagination": {"has_more": False}, + } + + monkeypatch.setattr(tree_api, "fetch_lakefs_objects_page", _fetch_single_page) + monkeypatch.setattr(tree_api, "_build_file_record_map", lambda repository, paths: {}) + monkeypatch.setattr(tree_api, "should_use_lfs", lambda repository, path, size: False) + + revision_error = RuntimeError("bad commit history") + async def _raise_revision_error(lakefs_repo, revision, targets): + raise revision_error + + monkeypatch.setattr(tree_api, "resolve_last_commits_for_paths", _raise_revision_error) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: exc is revision_error) + monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: True) + monkeypatch.setattr( + tree_api, + "hf_revision_not_found", + lambda repo_id, revision: {"revision": revision, "repo": repo_id}, + ) + assert ( + await tree_api.list_repo_tree.__wrapped__( + "model", + "owner", + "demo", + request, + expand=True, + limit=None, + ) + ) == {"revision": "main", "repo": "owner/demo"} + + generic_error = RuntimeError("commit lookup failed") + async def _raise_generic_commit_error(lakefs_repo, revision, targets): + raise generic_error + + monkeypatch.setattr( + tree_api, + "resolve_last_commits_for_paths", + _raise_generic_commit_error, + ) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: False) + + response = await tree_api.list_repo_tree.__wrapped__( "model", "owner", "demo", request, + expand=True, + limit=None, ) - assert "Failed to list objects" in server_error["error"] + assert _json_body(response) == [ + { + "type": "file", + "oid": "sha-readme", + "size": 5, + "path": "README.md", + "lastCommit": None, + "securityFileStatus": None, + } + ] @pytest.mark.asyncio -async def test_get_paths_info_covers_missing_repo_lfs_directory_and_missing_paths(monkeypatch): - request = SimpleNamespace() +async def test_get_paths_info_covers_limits_success_and_error_paths(monkeypatch): + request = _request("/api/models/owner/demo/paths-info/main") + repo = SimpleNamespace(full_id="owner/demo", private=False) + monkeypatch.setattr(tree_api, "get_repository", lambda *args: None) monkeypatch.setattr( tree_api, "hf_repo_not_found", lambda repo_id, repo_type: {"missing": repo_id}, ) + assert ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=["README.md"], + ) + ) == {"missing": "owner/demo"} - missing = await tree_api.get_paths_info.__wrapped__( - "model", - "owner", - "demo", - "main", - request, - paths=["README.md"], - ) - assert missing == {"missing": "owner/demo"} - - repo = SimpleNamespace(full_id="owner/demo", private=False) - - class _NotFoundError(Exception): - pass - - client = _FakeLakeFSClient( - stat_map={ - "weights.bin": {"size_bytes": 32, "checksum": "lakefs-sha"}, - "folder": _NotFoundError("folder"), - "ghost": _NotFoundError("ghost"), - "broken": _NotFoundError("broken"), - "server-error": RuntimeError("server error"), - }, - list_map={ - "folder/": {"results": [{"checksum": "tree-oid"}]}, - "ghost/": {"results": []}, - "broken/": RuntimeError("broken directory"), - }, - ) monkeypatch.setattr(tree_api, "get_repository", lambda *args: repo) monkeypatch.setattr(tree_api, "check_repo_read_permission", lambda repo_arg, user: True) + monkeypatch.setattr( + tree_api, + "hf_bad_request", + lambda message: {"bad_request": message}, + ) + too_many_paths = ["file.txt"] * (tree_api.PATHS_INFO_MAX_PATHS + 1) + assert "Maximum supported paths" in ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=too_many_paths, + ) + )["bad_request"] + monkeypatch.setattr(tree_api, "lakefs_repo_name", lambda repo_type, repo_id: "lake-repo") - monkeypatch.setattr(tree_api, "get_lakefs_client", lambda: client) + async def _resolve_revision(client, lakefs_repo, revision): + return ("resolved-main", "branch") + + monkeypatch.setattr(tree_api, "resolve_revision", _resolve_revision) monkeypatch.setattr( tree_api, - "should_use_lfs", - lambda repo_arg, path, size: path == "weights.bin", - ) - monkeypatch.setattr( - tree_api, - "get_file", - lambda repo_arg, path: ( - SimpleNamespace(sha256="sha256-lfs") if path == "weights.bin" else None - ), - ) - monkeypatch.setattr( - tree_api, - "is_lakefs_not_found_error", - lambda exc: isinstance(exc, _NotFoundError), + "_build_file_record_map", + lambda repository, paths: {"README.md": SimpleNamespace(sha256="sha-readme", lfs=False)}, ) + processed_paths = [] + + async def _fake_process_path(**kwargs): + processed_paths.append(kwargs["clean_path"]) + if kwargs["clean_path"] == "README.md": + return { + "type": "file", + "path": "README.md", + "size": 5, + "oid": "sha-readme", + } + if kwargs["clean_path"] == "docs": + return { + "type": "directory", + "path": "docs", + "oid": "tree-docs", + "size": 0, + } + return None + + monkeypatch.setattr(tree_api, "_process_single_path", _fake_process_path) + + async def _resolve_last_commits(lakefs_repo, revision, targets): + return { + "README.md": {"id": "commit-1", "title": "Update README"}, + "docs": {"id": "commit-2", "title": "Refresh docs"}, + } + + monkeypatch.setattr(tree_api, "resolve_last_commits_for_paths", _resolve_last_commits) results = await tree_api.get_paths_info.__wrapped__( "model", @@ -307,24 +1001,174 @@ async def test_get_paths_info_covers_missing_repo_lfs_directory_and_missing_path "demo", "main", request, - paths=["weights.bin", "folder", "ghost", "broken", "server-error"], + paths=["/README.md/", "docs", "", None], + expand=True, ) - + assert processed_paths == ["README.md", "docs"] assert results == [ { "type": "file", - "path": "weights.bin", - "size": 32, - "oid": "sha256-lfs", - "lfs": {"oid": "sha256-lfs", "size": 32, "pointerSize": 134}, - "last_commit": None, - "security": None, + "path": "README.md", + "size": 5, + "oid": "sha-readme", + "lastCommit": {"id": "commit-1", "title": "Update README"}, + "securityFileStatus": None, }, { "type": "directory", - "path": "folder", - "oid": "tree-oid", - "tree_id": "tree-oid", - "last_commit": None, + "path": "docs", + "oid": "tree-docs", + "size": 0, + "lastCommit": {"id": "commit-2", "title": "Refresh docs"}, }, ] + + async def _raise_resolve_revision(client, lakefs_repo, revision): + raise RuntimeError("bad revision") + + monkeypatch.setattr(tree_api, "resolve_revision", _raise_resolve_revision) + monkeypatch.setattr( + tree_api, + "hf_revision_not_found", + lambda repo_id, revision: {"revision": revision, "repo": repo_id}, + ) + assert ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "bad-rev", + request, + paths=["README.md"], + ) + ) == {"revision": "bad-rev", "repo": "owner/demo"} + + error = RuntimeError("missing revision") + monkeypatch.setattr(tree_api, "resolve_revision", _resolve_revision) + + async def _raise_process_revision_error(**kwargs): + raise error + + monkeypatch.setattr(tree_api, "_process_single_path", _raise_process_revision_error) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: exc is error) + monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: True) + assert ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=["README.md"], + ) + ) == {"revision": "main", "repo": "owner/demo"} + + monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: False) + assert ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=["README.md"], + ) + == [] + ) + + generic_error = RuntimeError("server error") + async def _raise_process_generic_error(**kwargs): + raise generic_error + + monkeypatch.setattr(tree_api, "_process_single_path", _raise_process_generic_error) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: False) + monkeypatch.setattr(tree_api, "hf_server_error", lambda message: {"error": message}) + assert "Failed to fetch paths info" in ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=["README.md"], + ) + )["error"] + + +@pytest.mark.asyncio +async def test_get_paths_info_handles_last_commit_lookup_failures(monkeypatch): + request = _request("/api/models/owner/demo/paths-info/main") + repo = SimpleNamespace(full_id="owner/demo", private=False) + + monkeypatch.setattr(tree_api, "get_repository", lambda *args: repo) + monkeypatch.setattr(tree_api, "check_repo_read_permission", lambda repo_arg, user: True) + monkeypatch.setattr(tree_api, "lakefs_repo_name", lambda repo_type, repo_id: "lake-repo") + async def _resolve_revision(client, lakefs_repo, revision): + return ("resolved-main", "branch") + + monkeypatch.setattr(tree_api, "resolve_revision", _resolve_revision) + monkeypatch.setattr(tree_api, "_build_file_record_map", lambda repository, paths: {}) + async def _process_path(**kwargs): + return { + "type": "file", + "path": kwargs["clean_path"], + "size": 5, + "oid": "sha-readme", + } + + monkeypatch.setattr(tree_api, "_process_single_path", _process_path) + + revision_error = RuntimeError("bad commit history") + async def _raise_revision_error(lakefs_repo, revision, targets): + raise revision_error + + monkeypatch.setattr(tree_api, "resolve_last_commits_for_paths", _raise_revision_error) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: exc is revision_error) + monkeypatch.setattr(tree_api, "is_lakefs_revision_error", lambda exc: True) + monkeypatch.setattr( + tree_api, + "hf_revision_not_found", + lambda repo_id, revision: {"revision": revision, "repo": repo_id}, + ) + assert ( + await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=["README.md"], + expand=True, + ) + ) == {"revision": "main", "repo": "owner/demo"} + + generic_error = RuntimeError("commit lookup failed") + async def _raise_generic_commit_error(lakefs_repo, revision, targets): + raise generic_error + + monkeypatch.setattr( + tree_api, + "resolve_last_commits_for_paths", + _raise_generic_commit_error, + ) + monkeypatch.setattr(tree_api, "is_lakefs_not_found_error", lambda exc: False) + + results = await tree_api.get_paths_info.__wrapped__( + "model", + "owner", + "demo", + "main", + request, + paths=["README.md"], + expand=True, + ) + assert results == [ + { + "type": "file", + "path": "README.md", + "size": 5, + "oid": "sha-readme", + "lastCommit": None, + "securityFileStatus": None, + } + ]