parse duckdb values properly (#216)

This commit is contained in:
carl
2025-10-11 17:42:01 -04:00
committed by GitHub
parent 80878a1142
commit 74fe7d4379
7 changed files with 190 additions and 22 deletions

View File

@@ -1,4 +1,4 @@
FROM lukemathwalker/cargo-chef:latest-rust-1 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-bookworm AS chef
WORKDIR /app
# Cache dependencies

64
dev/duckdb_init.sql Normal file
View File

@@ -0,0 +1,64 @@
-- test data generated by codex to test various data types
CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
CREATE TABLE duckdb_all_types (
id INTEGER,
bool_col BOOLEAN,
tinyint_col TINYINT,
smallint_col SMALLINT,
int_col INTEGER,
bigint_col BIGINT,
hugeint_col HUGEINT,
utinyint_col UTINYINT,
usmallint_col USMALLINT,
uint_col UINTEGER,
ubigint_col UBIGINT,
float_col FLOAT,
double_col DOUBLE,
decimal_col DECIMAL(18, 4),
timestamp_col TIMESTAMP,
timestamp_ms_col TIMESTAMP_MS,
timestamp_ns_col TIMESTAMP_NS,
date_col DATE,
time_col TIME,
interval_col INTERVAL,
text_col VARCHAR,
blob_col BLOB,
list_col INTEGER[],
array_col INTEGER[3],
enum_col mood,
struct_col STRUCT(a INTEGER, b VARCHAR),
map_col MAP(VARCHAR, INTEGER),
union_col UNION(num INT, txt VARCHAR)
);
INSERT INTO duckdb_all_types VALUES (
1,
TRUE,
-5,
-30000,
123456,
1234567890123,
hugeint '123456789012345678901234567890',
250,
65000,
4294967295,
18446744073709551615,
3.14,
2.718281828,
12345.6789,
TIMESTAMP '2024-05-01 12:34:56',
TIMESTAMP '2024-05-01 12:34:56.789',
TIMESTAMP '2024-05-01 12:34:56.789123456',
DATE '2024-05-01',
TIME '12:34:56.789123',
INTERVAL '1 year 2 months 3 days 4 hours 5 minutes 6 seconds',
'Hello, world!',
BLOB '\xDEADBEEF',
[1, 2, 3],
[4, 5, 6],
'happy',
{'a': 42, 'b': 'nested'},
map { 'alpha': 10, 'beta': 20 },
union_value(num := 999)::UNION(num INT, txt VARCHAR)
);

Binary file not shown.

View File

@@ -92,6 +92,7 @@ pub fn extract_driver_from_url(url: &str) -> Result<Driver> {
{
return Ok(Driver::DuckDb);
}
#[allow(unreachable_code)] // because of cfg above
Err(eyre::Report::msg("DuckDb is not supported on this architecture"))
} else if url.ends_with(".sqlite") || url.ends_with(".sqlite3") {
Ok(Driver::Sqlite)

View File

@@ -1,7 +1,6 @@
use color_eyre::eyre::Result;
use crossterm::event::{KeyEvent, MouseEvent, MouseEventKind};
use ratatui::prelude::*;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::UnboundedSender;
use tui_textarea::{Input, Key, TextArea};
@@ -16,18 +15,6 @@ use crate::{
vim::{Mode, Transition, Vim},
};
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
struct CursorPosition {
pub row: u32,
pub col: u32,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
struct Selection {
pub start: CursorPosition,
pub end: CursorPosition,
}
fn keyword_regex() -> String {
format!("(?i)(^|[^a-zA-Z0-9\'\"`._]+)({})($|[^a-zA-Z0-9\'\"`._]+)", get_keywords().join("|"))
}

View File

@@ -113,7 +113,7 @@ impl<'a> ScrollTable<'a> {
self.y_offset = self.y_offset.saturating_sub(std::cmp::max(
1,
self.pg_height.saturating_div(2).saturating_sub(
u16::from(self.pg_height % 2 == 0), // always round down
u16::from(self.pg_height.is_multiple_of(2)), // always round down
) as usize,
));
self
@@ -123,7 +123,7 @@ impl<'a> ScrollTable<'a> {
let new_y_offset = self.y_offset.saturating_add(std::cmp::max(
1,
self.pg_height.saturating_div(2).saturating_sub(
u16::from(self.pg_height % 2 == 0), // always rounds down
u16::from(self.pg_height.is_multiple_of(2)), // always rounds down
) as usize,
));
self.y_offset = std::cmp::min(self.max_y_offset, new_y_offset);

View File

@@ -1,11 +1,16 @@
use std::{
fmt::Write,
io::{self, Write as _},
string::String,
};
use async_trait::async_trait;
use chrono::{DateTime, Duration as ChronoDuration, NaiveDate, NaiveTime};
use color_eyre::eyre::{self, Result};
use duckdb::{Config, Connection};
use duckdb::{
Config, Connection,
types::{OrderedMap, TimeUnit, Value as DuckValue},
};
use crate::cli::{Cli, Driver};
@@ -151,11 +156,10 @@ fn fetch_rows(mut rows: duckdb::Rows<'_>) -> Result<Rows> {
}
let mut r: Vec<String> = Vec::new();
for i in 0..headers.len() {
let value = row.get::<_, Option<String>>(i);
if let Ok(Some(value)) = value {
r.push(value);
} else {
r.push(String::new());
let value = row.get::<usize, DuckValue>(i);
match value {
Ok(value) => r.push(duck_value_to_string(&value)),
Err(_) => r.push("_ERROR_".to_string()),
}
}
results.push(r);
@@ -163,6 +167,118 @@ fn fetch_rows(mut rows: duckdb::Rows<'_>) -> Result<Rows> {
Ok(Rows { headers, rows: results, rows_affected: None })
}
fn duck_value_to_string(value: &DuckValue) -> String {
match value {
DuckValue::Null => "NULL".to_string(),
DuckValue::Boolean(v) => v.to_string(),
DuckValue::TinyInt(v) => v.to_string(),
DuckValue::SmallInt(v) => v.to_string(),
DuckValue::Int(v) => v.to_string(),
DuckValue::BigInt(v) => v.to_string(),
DuckValue::HugeInt(v) => v.to_string(),
DuckValue::UTinyInt(v) => v.to_string(),
DuckValue::USmallInt(v) => v.to_string(),
DuckValue::UInt(v) => v.to_string(),
DuckValue::UBigInt(v) => v.to_string(),
DuckValue::Float(v) => v.to_string(),
DuckValue::Double(v) => v.to_string(),
DuckValue::Decimal(v) => v.to_string(),
DuckValue::Timestamp(unit, raw) => format_timestamp(*unit, *raw),
DuckValue::Text(text) => text.clone(),
DuckValue::Blob(bytes) => bytes_to_string(bytes),
DuckValue::Date32(days) => format_date(*days),
DuckValue::Time64(unit, raw) => format_time(*unit, *raw),
DuckValue::Interval { months, days, nanos } => format_interval(*months, *days, *nanos),
DuckValue::List(values) | DuckValue::Array(values) => format_list(values),
DuckValue::Enum(value) => value.clone(),
DuckValue::Struct(map) => format_struct(map),
DuckValue::Map(map) => format_map(map),
DuckValue::Union(inner) => duck_value_to_string(inner.as_ref()),
}
}
fn format_timestamp(unit: TimeUnit, raw: i64) -> String {
match unit {
TimeUnit::Second => format_timestamp_from_parts(raw, 0),
TimeUnit::Millisecond => {
let secs = raw.div_euclid(1_000);
let nanos = (raw.rem_euclid(1_000) * 1_000_000) as u32;
format_timestamp_from_parts(secs, nanos)
},
TimeUnit::Microsecond => {
let secs = raw.div_euclid(1_000_000);
let nanos = (raw.rem_euclid(1_000_000) * 1_000) as u32;
format_timestamp_from_parts(secs, nanos)
},
TimeUnit::Nanosecond => {
let secs = raw.div_euclid(1_000_000_000);
let nanos = raw.rem_euclid(1_000_000_000) as u32;
format_timestamp_from_parts(secs, nanos)
},
}
}
fn format_timestamp_from_parts(secs: i64, nanos: u32) -> String {
DateTime::from_timestamp(secs, nanos).map_or_else(|| format!("{secs}.{nanos:09}"), |dt| dt.to_string())
}
fn format_date(days_since_epoch: i32) -> String {
let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
epoch
.checked_add_signed(ChronoDuration::days(days_since_epoch as i64))
.map_or_else(|| days_since_epoch.to_string(), |date| date.to_string())
}
fn format_time(unit: TimeUnit, raw: i64) -> String {
let nanos_total: i128 = match unit {
TimeUnit::Second => (raw as i128) * 1_000_000_000,
TimeUnit::Millisecond => (raw as i128) * 1_000_000,
TimeUnit::Microsecond => (raw as i128) * 1_000,
TimeUnit::Nanosecond => raw as i128,
};
let nanos_per_day = 86_400_000_000_000i128;
let normalized = ((nanos_total % nanos_per_day) + nanos_per_day) % nanos_per_day;
let secs = (normalized / 1_000_000_000) as u32;
let nanos = (normalized % 1_000_000_000) as u32;
NaiveTime::from_num_seconds_from_midnight_opt(secs, nanos)
.map(|time| time.to_string())
.unwrap_or_else(|| raw.to_string())
}
fn format_interval(months: i32, days: i32, nanos: i64) -> String {
format!("months={months}, days={days}, nanos={nanos}")
}
fn format_list(values: &[DuckValue]) -> String {
let formatted: Vec<String> = values.iter().map(duck_value_to_string).collect();
format!("[{}]", formatted.join(", "))
}
fn format_struct(map: &OrderedMap<String, DuckValue>) -> String {
let formatted: Vec<String> =
map.iter().map(|(key, value)| format!("{key}: {}", duck_value_to_string(value))).collect();
format!("{{{}}}", formatted.join(", "))
}
fn format_map(map: &OrderedMap<DuckValue, DuckValue>) -> String {
let formatted: Vec<String> =
map.iter().map(|(key, value)| format!("{}: {}", duck_value_to_string(key), duck_value_to_string(value))).collect();
format!("{{{}}}", formatted.join(", "))
}
fn bytes_to_string(bytes: &[u8]) -> String {
match std::str::from_utf8(bytes) {
Ok(text) => text.to_owned(),
Err(_) => {
let mut output = String::from("0x");
for b in bytes {
let _ = write!(&mut output, "{b:02X}");
}
output
},
}
}
impl DuckDbDriver {
pub fn new() -> Self {
DuckDbDriver { connection: None, task: None }