From 3e9a55d69a3ed07bca8bfd356516e354666c78f4 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Fri, 24 Oct 2025 01:55:44 +0800 Subject: [PATCH] add datasetviewer frontend module --- src/kohaku-hub-ui/package-lock.json | 16 +- src/kohaku-hub-ui/src/components.d.ts | 5 + .../src/components/DatasetViewer/DataGrid.vue | 152 ++++++ .../DatasetViewer/DataGridEnhanced.vue | 291 ++++++++++++ .../DatasetViewer/DatasetViewer.vue | 267 +++++++++++ .../src/components/DatasetViewer/LICENSE | 205 +++++++++ .../src/components/DatasetViewer/README.md | 316 +++++++++++++ .../components/DatasetViewer/TARFileList.vue | 137 ++++++ .../src/components/DatasetViewer/api.js | 145 ++++++ .../src/components/repo/DatasetViewerTab.vue | 433 ++++++++++++++++++ .../src/components/repo/RepoViewer.vue | 55 ++- 11 files changed, 2014 insertions(+), 8 deletions(-) create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/DataGrid.vue create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/DataGridEnhanced.vue create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/DatasetViewer.vue create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/LICENSE create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/README.md create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/TARFileList.vue create mode 100644 src/kohaku-hub-ui/src/components/DatasetViewer/api.js create mode 100644 src/kohaku-hub-ui/src/components/repo/DatasetViewerTab.vue diff --git a/src/kohaku-hub-ui/package-lock.json b/src/kohaku-hub-ui/package-lock.json index 8464e5b..423af02 100644 --- a/src/kohaku-hub-ui/package-lock.json +++ b/src/kohaku-hub-ui/package-lock.json @@ -732,6 +732,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=18" }, @@ -776,6 +777,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": ">=18" } @@ -1711,6 +1713,7 @@ "resolved": "https://registry.npmjs.org/@types/lodash-es/-/lodash-es-4.17.12.tgz", "integrity": "sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==", "license": "MIT", + "peer": true, "dependencies": { "@types/lodash": "*" } @@ -2243,6 +2246,7 @@ "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.22.tgz", "integrity": "sha512-tbTR1zKGce4Lj+JLzFXDq36K4vcSZbJ1RBu8FxcDv1IGRz//Dh2EBqksyGVypz3kXpshIfWKGOCcqpSbyGWRJQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/parser": "^7.28.4", "@vue/compiler-core": "3.5.22", @@ -2675,6 +2679,7 @@ "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz", "integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@chevrotain/cst-dts-gen": "11.0.3", "@chevrotain/gast": "11.0.3", @@ -2858,6 +2863,7 @@ "resolved": "https://registry.npmjs.org/cytoscape/-/cytoscape-3.33.1.tgz", "integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10" } @@ -3258,6 +3264,7 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -4400,13 +4407,15 @@ "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/lodash-es": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz", "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/lodash-unified": { "version": "1.0.3", @@ -4827,6 +4836,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -5530,6 +5540,7 @@ "integrity": "sha512-eSiiRJmovt8qDJkGyZuLnbxAOAdie6NCmmd0NkTC0RJI9duiSBTfr8X2mBYJOUFzxQa2USaHmL99J9uMxkjCyw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@oxc-project/runtime": "0.92.0", "fdir": "^6.5.0", @@ -5654,6 +5665,7 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.22.tgz", "integrity": "sha512-toaZjQ3a/G/mYaLSbV+QsQhIdMo9x5rrqIpYRObsJ6T/J+RyCSFwN2LHNVH9v8uIcljDNa3QzPVdv3Y6b9hAJQ==", "license": "MIT", + "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.22", "@vue/compiler-sfc": "3.5.22", diff --git a/src/kohaku-hub-ui/src/components.d.ts b/src/kohaku-hub-ui/src/components.d.ts index d9ebb6c..f275cd7 100644 --- a/src/kohaku-hub-ui/src/components.d.ts +++ b/src/kohaku-hub-ui/src/components.d.ts @@ -11,7 +11,11 @@ declare module 'vue' { AvatarUpload: typeof import('./components/profile/AvatarUpload.vue')['default'] CodeEditor: typeof import('./components/common/CodeEditor.vue')['default'] CodeViewer: typeof import('./components/common/CodeViewer.vue')['default'] + DataGrid: typeof import('./components/DatasetViewer/DataGrid.vue')['default'] + DataGridEnhanced: typeof import('./components/DatasetViewer/DataGridEnhanced.vue')['default'] DatasetInfoCard: typeof import('./components/repo/metadata/DatasetInfoCard.vue')['default'] + DatasetViewer: typeof import('./components/DatasetViewer/DatasetViewer.vue')['default'] + DatasetViewerTab: typeof import('./components/repo/DatasetViewerTab.vue')['default'] DetailedMetadataPanel: typeof import('./components/repo/metadata/DetailedMetadataPanel.vue')['default'] ElAlert: typeof import('element-plus/es')['ElAlert'] ElBreadcrumb: typeof import('element-plus/es')['ElBreadcrumb'] @@ -59,6 +63,7 @@ declare module 'vue' { RouterView: typeof import('vue-router')['RouterView'] SidebarRelationshipsCard: typeof import('./components/repo/metadata/SidebarRelationshipsCard.vue')['default'] SocialLinks: typeof import('./components/profile/SocialLinks.vue')['default'] + TARFileList: typeof import('./components/DatasetViewer/TARFileList.vue')['default'] TheFooter: typeof import('./components/layout/TheFooter.vue')['default'] TheHeader: typeof import('./components/layout/TheHeader.vue')['default'] } diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/DataGrid.vue b/src/kohaku-hub-ui/src/components/DatasetViewer/DataGrid.vue new file mode 100644 index 0000000..60807e2 --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/DataGrid.vue @@ -0,0 +1,152 @@ + + + + + diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/DataGridEnhanced.vue b/src/kohaku-hub-ui/src/components/DatasetViewer/DataGridEnhanced.vue new file mode 100644 index 0000000..4416226 --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/DataGridEnhanced.vue @@ -0,0 +1,291 @@ + + + + + diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/DatasetViewer.vue b/src/kohaku-hub-ui/src/components/DatasetViewer/DatasetViewer.vue new file mode 100644 index 0000000..62934f5 --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/DatasetViewer.vue @@ -0,0 +1,267 @@ + + + + + diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/LICENSE b/src/kohaku-hub-ui/src/components/DatasetViewer/LICENSE new file mode 100644 index 0000000..f638f90 --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/LICENSE @@ -0,0 +1,205 @@ + +# Kohaku Software License 1.0 + +**Published by KohakuBlueLeaf** + +## Purpose + +The **Kohaku Software License** aims to provide maximum freedom for users to work with the Software while protecting contributors from liability and ensuring the freedom of end users. It incorporates commercial usage restrictions to balance open access with sustainable development. + +## Definitions + +- **Software**: Refers to the source code, compiled binaries, libraries, modules, documentation, configuration files, and any other materials provided under this License. + +- **Source Code**: The preferred form for making modifications to the Software, including all source files, build scripts, configuration files, and documentation necessary to understand, compile, and modify the Software. + +- **Derivative Work**: Any software based on or derived from the original Software, including but not limited to: + - Modified versions of the Software + - Software that incorporates any portion of the Software + - Software that links to, imports, or otherwise depends on the Software in a manner that creates a combined work + + For a Derivative Work to qualify under this license, it must include the complete Source Code necessary to build, use, and modify the Derivative Work. + +- **Modify**: To alter, adapt, translate, or otherwise change the Software, or to create Derivative Works. + +- **Service Provider**: An entity that uses the Software to offer services to **End Users**, thereby making the **End Users** the recipients of the service. + +- **End User**: Any individual or entity that uses the Software directly or uses services provided by a **Service Provider** that utilizes the Software. + +- **Non-Commercial Purpose**: Uses that do not involve direct or indirect monetary compensation arising from the use of the Software, including personal use, academic research, experimentation, testing, or non-commercial organizational use. + +- **Commercial Usage**: Any use of the Software where: + - The Software is used to provide services or products to customers, clients, or users (internal or external) for monetary compensation, or + - The Software is incorporated into commercial products or services, or + - The Software is used as part of internal company systems that help internal teams execute their business operations in a for-profit organization, or + - The organization using the Software generates revenue from activities directly or indirectly involving the Software + +- **Total Revenue**: + - For Service Providers: The total revenue generated from services utilizing the Software + - For product vendors: The total revenue from products incorporating the Software + - For internal business systems: The total revenue of the organization using the Software for business operations + +## License Grant + +### 1. General Permissions + +Subject to compliance with this License, KohakuBlueLeaf grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free, and limited license to access, use, modify, create Derivative Works, and distribute the Software for **Non-Commercial Purposes** and **Commercial Usage** under certain conditions. + +### 2. Categories of Use + +#### a. Direct Users + +Individuals or entities that use the Software directly for their personal, academic, or non-commercial purposes without operating in a commercial capacity. + +#### b. Service Providers and Commercial Entities + +Entities that use the Software to offer services or products to **End Users**, or that use the Software for internal business operations in a for-profit organization. + +### 3. Source Code Availability + +When using or distributing the Software or any Derivative Works, you must: + +- Make the complete Source Code available to recipients +- Ensure the Source Code is in a form that allows recipients to build, modify, and use the Software +- Include all necessary build scripts, configuration files, and dependencies information + +### 4. Derivative Works + +Any Derivative Works created must be published under the **Kohaku Software License**. The minimal requirement includes: + +- Complete Source Code of the Derivative Work +- Build and installation instructions +- Clear indication of what has been modified from the original Software + +**Additional Requirements for Combined Works:** + +- If the Derivative Work combines multiple software components or libraries, all such components that form a combined work must be published under this License or a compatible license. +- You must provide clear documentation on how the components interact and how to build the combined work. +- **Note**: You are not obligated to release proprietary business logic or workflows that use the Software through standard APIs or interfaces without creating Derivative Works. + +## Restrictions + +### 1. Commercial Usage + +- **Definition**: **Commercial Usage** is defined as any use where: + - The Software is used to provide services or products to customers, clients, or users (internal or external) for monetary compensation + - The Software is incorporated into commercial products or services + - The Software is used as part of internal company systems that help internal teams execute their business operations in a for-profit organization + - The organization using the Software generates revenue from activities directly or indirectly involving the Software + +- **Conditions for Requiring a Commercial License**: Commercial Usage is prohibited **if either** of the following conditions are met: + - **Total Revenue** attributable to or associated with the Software exceeds $25,000 USD per year, OR + - **Usage Duration** exceeds 3 months + +- **Revenue Threshold and Usage Duration**: + - **Trial Period**: Entities are allowed to engage in **Commercial Usage** without a commercial license for a trial period of **up to 3 months**, provided their **Total Revenue** remains below or equal to $25,000 USD per year. + - **Revenue Limit**: Entities with **Total Revenue** attributable to or associated with the Software below or equal to $25,000 USD per year are permitted to continue **Commercial Usage** without a commercial license, provided the **Usage Duration** does not exceed 3 months. + - **Exceeding Either Threshold**: If an entity's **Total Revenue** exceeds $25,000 USD per year OR the **Commercial Usage** period exceeds 3 months, the entity must request a commercial license from the author. + +- **Requesting a Commercial License**: Entities that need to engage in **Commercial Usage** exceeding both thresholds must contact the author at kohaku@kblueleaf.net to request a commercial license. The author may grant such licenses at their sole discretion, potentially subject to fees, royalties, or revenue-sharing agreements. + +### 2. Prohibited Uses + +You may not use the Software for: + +- Military purposes or weapons development +- Surveillance systems or mass monitoring +- Biometric identification or tracking systems +- Any activity that infringes on third-party rights +- Any use violating applicable laws, including privacy and security regulations +- Generating or distributing malware, exploits, or other malicious software + +You may not: + +- Alter or remove copyright and proprietary notices +- Circumvent or remove any security or usage restrictions +- Impose additional terms that conflict with this License +- Distribute the Software to prohibited individuals, entities, or countries as defined by applicable export laws + +### 3. Distribution Requirements + +When distributing the Software or any Derivative Works, you must: + +- Include a copy of this License with the distribution +- Include the complete Source Code or provide clear instructions on how to obtain it +- **Attribution Notice**: Prominently display the following notice: + + ``` + This Software is licensed under the Kohaku Software License by KohakuBlueLeaf. + Copyright 2025 KohakuBlueLeaf. + + IN NO EVENT SHALL KohakuBlueLeaf BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER + LIABILITY ARISING FROM THE USE OF THIS SOFTWARE. + ``` + +- **For Derivative Works**: + - Include a statement clearly indicating that you have modified the original Software + - Document the nature of modifications made + - Ensure all Source Code is available under this License + +- **No Misrepresentation**: Do not misrepresent or imply that Derivative Works are official versions or have been endorsed by the original author unless authorized in writing. + +- **Service Provider Requirements**: + - **Service Providers** must provide **End Users** with clear notice that the service utilizes Software licensed under the Kohaku Software License + - Include a reference to the original Software and this License in service documentation, terms of service, or user interface (e.g., "About" page, footer) + +## No Harm and No Liability + +### 1. No Harm + +You agree that no contributor's conduct in creating the Software has caused you harm. To the extent permitted by law, you waive the right to pursue any legal claims against contributors related to the creation of the Software. + +### 2. No Liability + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +## Patent Grant + +Each contributor grants you a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable patent license to make, use, offer to sell, sell, import, and otherwise transfer the Software, where such license applies only to those patent claims licensable by such contributor that are necessarily infringed by their contribution(s) alone or by combination of their contribution(s) with the Software. + +## Interpretation of Ambiguous Terms + +In the event of any ambiguity or uncertainty in the interpretation of the terms of this License, the Licensee has the right to interpret the ambiguous descriptions in a manner that aligns with the intended purpose of this License, which is to promote open access while protecting sustainable development through commercial licensing. + +## Acceptance and Compliance + +By using, modifying, or distributing the Software, you agree to comply with all terms of this License. Non-compliance may result in the automatic termination of your rights under this License. + +## Termination + +Your rights under this License terminate automatically upon any breach of its terms. Upon termination, you must: + +- Cease all use, modification, and distribution of the Software and Derivative Works +- Destroy all copies of the Software in your possession or control +- If you are a Service Provider, cease providing services that utilize the Software + +Sections regarding No Liability, Indemnification, and General Provisions survive termination. + +## Indemnification + +You agree to indemnify, defend, and hold harmless KohakuBlueLeaf and its affiliates, contributors, and licensors from and against any claims, damages, losses, liabilities, costs, and expenses (including reasonable attorneys' fees) arising from: + +- Your use of the Software +- Your violation of this License +- Your violation of any rights of another party +- Your distribution of the Software or Derivative Works + +## General Provisions + +- **Governing Law**: This License is governed by the laws of Taiwan, without regard to conflict of law principles. + +- **Severability**: If any provision of this License is held to be unenforceable or invalid, that provision shall be modified to the minimum extent necessary to make it enforceable, and the remaining provisions shall remain in full force and effect. + +- **Entire Agreement**: This License constitutes the entire agreement between you and KohakuBlueLeaf regarding the Software and supersedes all prior agreements and understandings. + +- **No Waiver**: The failure of KohakuBlueLeaf to enforce any provision of this License shall not constitute a waiver of that provision or any other provision. + +- **Assignment**: You may not assign or transfer your rights or obligations under this License without prior written consent from KohakuBlueLeaf. + +## Revisions + +KohakuBlueLeaf may publish revised versions of the Kohaku Software License from time to time. Each version will be given a distinguishing version number. You may choose to use the Software under the terms of the version of the License under which you originally received the Software, or under the terms of any subsequent version published by KohakuBlueLeaf. + +## Contact + +For commercial licensing inquiries, please contact: kohaku@kblueleaf.net diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/README.md b/src/kohaku-hub-ui/src/components/DatasetViewer/README.md new file mode 100644 index 0000000..45b1eaa --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/README.md @@ -0,0 +1,316 @@ +# Dataset Viewer - Frontend Components + +Vue 3 components for previewing dataset files. + +## Components + +### DatasetViewer.vue + +Main container component that handles loading and displaying dataset previews. + +**Props:** +- `fileUrl` (String, required): S3 presigned URL or HTTP(S) URL +- `fileName` (String, required): File name (for format detection) +- `maxRows` (Number, default: 1000): Maximum rows to display + +**Events:** +- `@error`: Emitted when an error occurs + +**Example:** +```vue + + + +``` + +### DataGrid.vue + +Tabular data display with sorting. + +**Props:** +- `columns` (Array, required): Column names +- `rows` (Array, required): Row data (2D array) +- `truncated` (Boolean): Whether data is truncated + +**Features:** +- Click column headers to sort +- Ascending/descending toggle +- Max height: 600px (scrollable) +- Max cell width: 300px (ellipsis) + +### TARFileList.vue + +File browser for TAR archives. + +**Props:** +- `files` (Array, required): File list from backend + +**Events:** +- `@select`: Emitted when user selects a file + +**Features:** +- Grouped by directory +- Search/filter +- Shows file sizes +- Only previewable files are clickable + +## API Client (api.js) + +```javascript +import { + previewFile, + listTARFiles, + extractTARFile, + getRateLimitStats, + detectFormat, + formatBytes +} from '@/components/DatasetViewer/api' +``` + +### previewFile(url, options) + +Preview a dataset file. + +**Arguments:** +- `url` (String): File URL +- `options` (Object): + - `format` (String): File format (auto-detect if omitted) + - `maxRows` (Number): Max rows to return + - `delimiter` (String): CSV delimiter + +**Returns:** Promise +```javascript +{ + columns: ['col1', 'col2'], + rows: [['val1', 'val2']], + total_rows: 1, + truncated: false, + file_size: 1024, + format: 'csv' +} +``` + +### listTARFiles(url) + +List files in TAR archive. + +**Returns:** Promise +```javascript +{ + files: [ + { name: 'train.csv', size: 10240, offset: 512 } + ], + total_size: 20480 +} +``` + +### extractTARFile(url, fileName) + +Extract file from TAR archive. + +**Returns:** Promise + +### getRateLimitStats() + +Get rate limit statistics. + +**Returns:** Promise +```javascript +{ + requests_used: 10, + requests_limit: 60, + concurrent_requests: 1, + concurrent_limit: 3, + bytes_processed: 1048576, + window_seconds: 60 +} +``` + +### detectFormat(filename) + +Detect file format from filename. + +**Returns:** String | null + +Supported formats: `csv`, `tsv`, `json`, `jsonl`, `parquet`, `tar` + +### formatBytes(bytes) + +Format bytes to human-readable string. + +**Returns:** String (e.g., "1.5 MB") + +## Usage Example + +### Basic Preview + +```vue + + + +``` + +### Integrated into Repo Viewer + +```vue + + + + +``` + +## Styling + +All components support dark mode out of the box: +```vue +
+``` + +Customize with CSS: +```css +.dataset-viewer { + border-radius: 12px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +.data-grid-container { + max-height: 800px; /* Increase max height */ +} +``` + +## Error Handling + +```vue + + + +``` + +## Performance Tips + +1. **Limit max_rows**: Default 1000 is good balance +2. **Lazy load**: Only render viewer when file is selected +3. **Cancel requests**: Use AbortController for navigation +4. **Cache URLs**: Reuse presigned URLs (valid for 1 hour) + +## Browser Compatibility + +- Chrome: ✅ Full support +- Firefox: ✅ Full support +- Safari: ✅ Full support +- Edge: ✅ Full support + +Requires modern browser with fetch() and async/await support. + +## License + +MIT License - Free for commercial and non-commercial use. + +--- + +**Questions?** Check the backend README or open an issue! diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/TARFileList.vue b/src/kohaku-hub-ui/src/components/DatasetViewer/TARFileList.vue new file mode 100644 index 0000000..c7e52a4 --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/TARFileList.vue @@ -0,0 +1,137 @@ + + + + + diff --git a/src/kohaku-hub-ui/src/components/DatasetViewer/api.js b/src/kohaku-hub-ui/src/components/DatasetViewer/api.js new file mode 100644 index 0000000..fa125cd --- /dev/null +++ b/src/kohaku-hub-ui/src/components/DatasetViewer/api.js @@ -0,0 +1,145 @@ +/** + * Dataset Viewer API Client + * + * Minimal API client for dataset preview backend. + * No authentication required - relies on S3 presigned URLs. + */ + +import axios from "axios"; + +const API_BASE = "/api/dataset-viewer"; + +/** + * Preview a dataset file + * + * @param {string} url - S3 presigned URL or any HTTP(S) URL + * @param {Object} options - Preview options + * @param {string} options.format - File format (csv, json, jsonl, parquet, tar) + * @param {number} options.maxRows - Maximum rows to return (default: 1000) + * @param {string} options.delimiter - CSV delimiter (default: ",") + * @returns {Promise} Preview data + */ +export async function previewFile(url, options = {}) { + const { format, maxRows = 1000, delimiter = "," } = options; + + const response = await axios.post(`${API_BASE}/preview`, { + url, + format, + max_rows: maxRows, + delimiter, + }); + + return response.data; +} + +/** + * List files in TAR archive + * + * @param {string} url - TAR file URL + * @returns {Promise} File listing + */ +export async function listTARFiles(url) { + const response = await axios.post(`${API_BASE}/tar/list`, { url }); + return response.data; +} + +/** + * Extract file from TAR archive + * + * @param {string} url - TAR file URL + * @param {string} fileName - File name to extract + * @returns {Promise} File content + */ +export async function extractTARFile(url, fileName) { + const response = await axios.post( + `${API_BASE}/tar/extract`, + { + url, + file_name: fileName, + }, + { + responseType: "blob", + }, + ); + + return response.data; +} + +/** + * Execute SQL query on dataset + * + * @param {string} url - Dataset file URL + * @param {string} query - SQL query to execute + * @param {Object} options - Query options + * @param {string} options.format - File format + * @param {number} options.maxRows - Max rows to return + * @returns {Promise} Query results + */ +export async function executeSQLQuery(url, query, options = {}) { + const { format, maxRows = 10000 } = options; + + const response = await axios.post(`${API_BASE}/sql`, { + url, + query, // Query in body, not URL! + format, + max_rows: maxRows, + }); + + return response.data; +} + +/** + * Get rate limit statistics + * + * @returns {Promise} Rate limit stats + */ +export async function getRateLimitStats() { + const response = await axios.get(`${API_BASE}/rate-limit`); + return response.data; +} + +/** + * Detect file format from filename + * + * @param {string} filename - File name + * @returns {string|null} Format (csv, jsonl, parquet, tar) or null + * + * Note: JSON format is NOT supported (requires loading entire file). + * Use JSONL instead for streaming support. + */ +export function detectFormat(filename) { + const lower = filename.toLowerCase(); + + if (lower.endsWith(".csv")) return "csv"; + if (lower.endsWith(".tsv")) return "tsv"; + if (lower.endsWith(".jsonl") || lower.endsWith(".ndjson")) return "jsonl"; + if (lower.endsWith(".parquet")) return "parquet"; + if ( + lower.endsWith(".tar") || + lower.endsWith(".tar.gz") || + lower.endsWith(".tgz") || + lower.endsWith(".tar.bz2") + ) { + return "tar"; + } + // JSON format deliberately excluded - requires full file download + + return null; +} + +/** + * Format bytes to human-readable string + * + * @param {number} bytes - Bytes + * @returns {string} Formatted string (e.g., "1.5 MB") + */ +export function formatBytes(bytes) { + if (bytes === 0) return "0 Bytes"; + if (!bytes) return "Unknown"; + + const k = 1024; + const sizes = ["Bytes", "KB", "MB", "GB", "TB"]; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + + return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + " " + sizes[i]; +} diff --git a/src/kohaku-hub-ui/src/components/repo/DatasetViewerTab.vue b/src/kohaku-hub-ui/src/components/repo/DatasetViewerTab.vue new file mode 100644 index 0000000..6f21ae2 --- /dev/null +++ b/src/kohaku-hub-ui/src/components/repo/DatasetViewerTab.vue @@ -0,0 +1,433 @@ + + +