Skip to content

Commit

Permalink
feat: bring vortex-tui into tree
Browse files Browse the repository at this point in the history
As we change/break format makes it easier to keep this tool up to date. Also helpful for debugging/exploring format
  • Loading branch information
a10y committed Jan 13, 2025
1 parent 5a58bfa commit 55e23f9
Show file tree
Hide file tree
Showing 12 changed files with 640 additions and 6 deletions.
1 change: 1 addition & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ rustflags = [

[alias]
xtask = "run -p xtask --"
vx = "run -p vortex-cli --"
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"vortex",
"vortex-array",
"vortex-buffer",
"vortex-cli",
"vortex-datafusion",
"vortex-datetime-dtype",
"vortex-dtype",
Expand Down
29 changes: 29 additions & 0 deletions vortex-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[package]
name = "vortex-cli"
description = "a small but might tool for working with Vortex files"
version.workspace = true
homepage.workspace = true
repository.workspace = true
authors.workspace = true
license.workspace = true
keywords.workspace = true
include.workspace = true
edition.workspace = true
rust-version.workspace = true
readme.workspace = true
categories.workspace = true

[dependencies]
bytes = { workspace = true }
clap = { version = "4", features = ["derive"] }
crossterm = "0.28"
ratatui = "0.29"
tokio = { workspace = true, features = ["rt-multi-thread"] }
vortex = { workspace = true, features = ["tokio"] }

[lints]
workspace = true

[[bin]]
name = "vx"
path = "src/main.rs"
12 changes: 12 additions & 0 deletions vortex-cli/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# `vx` Vortex CLI

A small, helpful CLI tool for exploring and analyzing Vortex files.

* `browse`: Browse the structure of your Vortex file with a rich TUI
* `tree`: print the file contents as JSON


TODO:

- [ ] `cat` to print a Vortex file as JSON to stdout
- [ ] `compress` to ingest JSON/CSV/other formats that are Arrow-compatible
203 changes: 203 additions & 0 deletions vortex-cli/src/browse/app.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
use std::ops::Range;
use std::os::unix::fs::FileExt;
use std::path::Path;
use std::sync::Arc;

use bytes::{Bytes, BytesMut};
use ratatui::widgets::ListState;
use vortex::dtype::{DType, Field, Nullability, StructDType};
use vortex::error::VortexResult;
use vortex::file::{
read_initial_bytes, InitialRead, CHUNKED_LAYOUT_ID, COLUMNAR_LAYOUT_ID, FLAT_LAYOUT_ID,
};
use vortex::flatbuffers::footer;
use vortex::io::{TokioFile, VortexReadAt};
use vortex::stats::stats_from_bitset_bytes;
// Add a shared Tokio Runtime for use in the app.

#[derive(Default, Copy, Clone, Eq, PartialEq)]
pub enum Tab {
/// The layout tree browser.
#[default]
Layout,
/// The encoding tree viewer
Encodings,
// TODO(aduffy): SQL query page powered by DF
// Query,
}

#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Encoding {
Flat,
Chunked,
Columnar,
Unknown,
}

impl From<u16> for Encoding {
fn from(value: u16) -> Self {
if value == FLAT_LAYOUT_ID.0 {
Encoding::Flat
} else if value == CHUNKED_LAYOUT_ID.0 {
Encoding::Chunked
} else if value == COLUMNAR_LAYOUT_ID.0 {
Encoding::Columnar
} else {
Encoding::Unknown
}
}
}

/// A pointer into the `Layout` hierarchy that can be advanced.
///
/// The pointer wraps an InitialRead.
pub struct LayoutCursor {
path: Vec<usize>,
initial_read: InitialRead,
dtype: DType,
encoding: Encoding,
}

impl LayoutCursor {
pub fn new(initial_read: InitialRead) -> Self {
Self {
dtype: initial_read.dtype(),
encoding: initial_read.fb_layout().encoding().into(),
initial_read,
path: Vec::new(),
}
}

pub fn new_with_path(initial_read: InitialRead, path: Vec<usize>) -> Self {
let mut layout = initial_read.fb_layout();
let mut dtype = initial_read.dtype();
let mut encoding = Encoding::from(layout.encoding());

// Traverse the layout tree at each element of the path.
for component in path.iter().copied() {
// Find the DType of the child based on the DType of the current node.
dtype = match encoding {
Encoding::Chunked => {
if component == 0 && layout.metadata().is_some() {
let stats = stats_from_bitset_bytes(
layout.metadata().expect("extracting stats").bytes(),
);

// When Chunked layout has a metadata field set, it will have a DType with
// STRUCT type and one field for each of the statistics.
let struct_dtype = StructDType::new(
stats
.iter()
.map(|stat| Arc::from(stat.to_string().as_str()))
.collect::<Vec<Arc<str>>>()
.into(),
stats
.iter()
.map(|stat| stat.dtype(&dtype))
.collect::<Vec<DType>>(),
);
DType::Struct(struct_dtype, Nullability::NonNullable)
} else {
// If there is no metadata, all children
dtype.clone()
}
}
Encoding::Columnar => dtype
.as_struct()
.expect("struct dtype")
.field_info(&Field::Index(component))
.expect("struct dtype component access")
.dtype
.value()
.expect("dtype value"),
Encoding::Unknown => todo!("unknown DType"),
// Flat layouts have no children
Encoding::Flat => unreachable!(),
};

layout = layout.children().expect("children").get(component);
encoding = layout.encoding().into();
}

Self {
path,
initial_read,
dtype,
encoding,
}
}

/// Create a new LayoutCursor indexing into the n-th child of the layout at the current
/// cursor position.
pub fn child(&self, n: usize) -> Self {
let mut path = self.path.clone();
path.push(n);

Self::new_with_path(self.initial_read.clone(), path)
}

pub fn parent(&self) -> Self {
let mut path = self.path.clone();
path.pop();

Self::new_with_path(self.initial_read.clone(), path)
}

pub fn dtype(&self) -> &DType {
&self.dtype
}

pub fn encoding(&self) -> Encoding {
self.encoding
}

pub fn layout_fb(&self) -> footer::Layout {
let mut layout = self.initial_read.fb_layout();
for component in self.path.iter().copied() {
layout = layout.children().expect("children").get(component);
}

layout
}
}

/// State saved across all Tabs.
///
/// Holding them all allows us to switch between tabs without resetting view state.
pub struct AppState {
pub reader: TokioFile,
pub cursor: LayoutCursor,
pub current_tab: Tab,

/// List state for the Layouts view
pub layouts_list_state: ListState,
}

impl AppState {
// Read the given byte range.
// We're cheating by doing the reads synchronously since we're given byte offsets.
pub fn read_bytes_sync(&self, range: Range<u64>) -> Bytes {
let mut buf = BytesMut::zeroed((range.end - range.start).try_into().expect("zeroed"));
self.reader
.read_exact_at(&mut buf, range.start)
.expect("read_exact_at sync");

buf.freeze()
}
}

/// Create an app backed from a file path.
pub async fn create_file_app(path: impl AsRef<Path>) -> VortexResult<AppState> {
let reader = TokioFile::open(path)?;
let size = reader.size().await?;
let initial_read = read_initial_bytes(&reader, size).await?;

let cursor = LayoutCursor::new(initial_read);

Ok(AppState {
reader,
cursor,
current_tab: Tab::default(),
layouts_list_state: ListState::default().with_selected(Some(0)),
})
}
83 changes: 83 additions & 0 deletions vortex-cli/src/browse/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use std::path::Path;

use app::{create_file_app, AppState, Tab};
use crossterm::event;
use crossterm::event::{Event, KeyCode, KeyEventKind};
use ratatui::widgets::ListState;
use ratatui::DefaultTerminal;
use ui::render_app;
use vortex::error::VortexResult;

use crate::TOKIO_RUNTIME;

mod app;
mod ui;

// Use the VortexResult and potentially launch a Backtrace.
fn run(mut terminal: DefaultTerminal, mut app: AppState) -> VortexResult<()> {
loop {
terminal.draw(|frame| render_app(&mut app, frame))?;

if let Event::Key(key) = event::read()? {
if key.kind == KeyEventKind::Press {
match key.code {
KeyCode::Char('q') => break Ok(()),
KeyCode::Tab => {
// toggle between tabs
app.current_tab = match app.current_tab {
Tab::Layout => Tab::Encodings,
Tab::Encodings => Tab::Layout,
};
}
KeyCode::Up => {
// We send the key-up to the list state if we're looking at
// the Layouts tab.
if app.current_tab == Tab::Layout {
app.layouts_list_state.scroll_up_by(1);
}
}
KeyCode::Down => {
if app.current_tab == Tab::Layout {
app.layouts_list_state.scroll_down_by(1);
}
}
KeyCode::Enter => {
if app.current_tab == Tab::Layout {
// Descend into the layout subtree for the selected child.
let selected = app.layouts_list_state.selected().unwrap_or_default();
app.cursor = app.cursor.child(selected);

// Reset the list scroll state.
app.layouts_list_state = ListState::default().with_selected(Some(0));
}
}
KeyCode::Left => {
if app.current_tab == Tab::Layout {
// Ascend back up to the Parent node
app.cursor = app.cursor.parent();
// Reset the list scroll state.
app.layouts_list_state = ListState::default().with_selected(Some(0));
}
}
// Most events not handled
_ => {}
}
}
}
}
}

// TODO: add tui_logger and have a logs tab so we can see the log output from
// doing Vortex things.¬

pub fn exec_tui(file: impl AsRef<Path>) -> VortexResult<()> {
let app = TOKIO_RUNTIME.block_on(create_file_app(file))?;

let mut terminal = ratatui::init();
terminal.clear()?;

run(terminal, app)?;

ratatui::restore();
Ok(())
}
Loading

0 comments on commit 55e23f9

Please sign in to comment.