-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
As we change/break format makes it easier to keep this tool up to date. Also helpful for debugging/exploring format
- Loading branch information
Showing
12 changed files
with
640 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,4 @@ rustflags = [ | |
|
||
[alias] | ||
xtask = "run -p xtask --" | ||
vx = "run -p vortex-cli --" |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[package] | ||
name = "vortex-cli" | ||
description = "a small but might tool for working with Vortex files" | ||
version.workspace = true | ||
homepage.workspace = true | ||
repository.workspace = true | ||
authors.workspace = true | ||
license.workspace = true | ||
keywords.workspace = true | ||
include.workspace = true | ||
edition.workspace = true | ||
rust-version.workspace = true | ||
readme.workspace = true | ||
categories.workspace = true | ||
|
||
[dependencies] | ||
bytes = { workspace = true } | ||
clap = { version = "4", features = ["derive"] } | ||
crossterm = "0.28" | ||
ratatui = "0.29" | ||
tokio = { workspace = true, features = ["rt-multi-thread"] } | ||
vortex = { workspace = true, features = ["tokio"] } | ||
|
||
[lints] | ||
workspace = true | ||
|
||
[[bin]] | ||
name = "vx" | ||
path = "src/main.rs" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# `vx` Vortex CLI | ||
|
||
A small, helpful CLI tool for exploring and analyzing Vortex files. | ||
|
||
* `browse`: Browse the structure of your Vortex file with a rich TUI | ||
* `tree`: print the file contents as JSON | ||
|
||
|
||
TODO: | ||
|
||
- [ ] `cat` to print a Vortex file as JSON to stdout | ||
- [ ] `compress` to ingest JSON/CSV/other formats that are Arrow-compatible |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
use std::ops::Range; | ||
use std::os::unix::fs::FileExt; | ||
use std::path::Path; | ||
use std::sync::Arc; | ||
|
||
use bytes::{Bytes, BytesMut}; | ||
use ratatui::widgets::ListState; | ||
use vortex::dtype::{DType, Field, Nullability, StructDType}; | ||
use vortex::error::VortexResult; | ||
use vortex::file::{ | ||
read_initial_bytes, InitialRead, CHUNKED_LAYOUT_ID, COLUMNAR_LAYOUT_ID, FLAT_LAYOUT_ID, | ||
}; | ||
use vortex::flatbuffers::footer; | ||
use vortex::io::{TokioFile, VortexReadAt}; | ||
use vortex::stats::stats_from_bitset_bytes; | ||
// Add a shared Tokio Runtime for use in the app. | ||
|
||
#[derive(Default, Copy, Clone, Eq, PartialEq)] | ||
pub enum Tab { | ||
/// The layout tree browser. | ||
#[default] | ||
Layout, | ||
/// The encoding tree viewer | ||
Encodings, | ||
// TODO(aduffy): SQL query page powered by DF | ||
// Query, | ||
} | ||
|
||
#[derive(Debug, Copy, Clone, Eq, PartialEq)] | ||
pub enum Encoding { | ||
Flat, | ||
Chunked, | ||
Columnar, | ||
Unknown, | ||
} | ||
|
||
impl From<u16> for Encoding { | ||
fn from(value: u16) -> Self { | ||
if value == FLAT_LAYOUT_ID.0 { | ||
Encoding::Flat | ||
} else if value == CHUNKED_LAYOUT_ID.0 { | ||
Encoding::Chunked | ||
} else if value == COLUMNAR_LAYOUT_ID.0 { | ||
Encoding::Columnar | ||
} else { | ||
Encoding::Unknown | ||
} | ||
} | ||
} | ||
|
||
/// A pointer into the `Layout` hierarchy that can be advanced. | ||
/// | ||
/// The pointer wraps an InitialRead. | ||
pub struct LayoutCursor { | ||
path: Vec<usize>, | ||
initial_read: InitialRead, | ||
dtype: DType, | ||
encoding: Encoding, | ||
} | ||
|
||
impl LayoutCursor { | ||
pub fn new(initial_read: InitialRead) -> Self { | ||
Self { | ||
dtype: initial_read.dtype(), | ||
encoding: initial_read.fb_layout().encoding().into(), | ||
initial_read, | ||
path: Vec::new(), | ||
} | ||
} | ||
|
||
pub fn new_with_path(initial_read: InitialRead, path: Vec<usize>) -> Self { | ||
let mut layout = initial_read.fb_layout(); | ||
let mut dtype = initial_read.dtype(); | ||
let mut encoding = Encoding::from(layout.encoding()); | ||
|
||
// Traverse the layout tree at each element of the path. | ||
for component in path.iter().copied() { | ||
// Find the DType of the child based on the DType of the current node. | ||
dtype = match encoding { | ||
Encoding::Chunked => { | ||
if component == 0 && layout.metadata().is_some() { | ||
let stats = stats_from_bitset_bytes( | ||
layout.metadata().expect("extracting stats").bytes(), | ||
); | ||
|
||
// When Chunked layout has a metadata field set, it will have a DType with | ||
// STRUCT type and one field for each of the statistics. | ||
let struct_dtype = StructDType::new( | ||
stats | ||
.iter() | ||
.map(|stat| Arc::from(stat.to_string().as_str())) | ||
.collect::<Vec<Arc<str>>>() | ||
.into(), | ||
stats | ||
.iter() | ||
.map(|stat| stat.dtype(&dtype)) | ||
.collect::<Vec<DType>>(), | ||
); | ||
DType::Struct(struct_dtype, Nullability::NonNullable) | ||
} else { | ||
// If there is no metadata, all children | ||
dtype.clone() | ||
} | ||
} | ||
Encoding::Columnar => dtype | ||
.as_struct() | ||
.expect("struct dtype") | ||
.field_info(&Field::Index(component)) | ||
.expect("struct dtype component access") | ||
.dtype | ||
.value() | ||
.expect("dtype value"), | ||
Encoding::Unknown => todo!("unknown DType"), | ||
// Flat layouts have no children | ||
Encoding::Flat => unreachable!(), | ||
}; | ||
|
||
layout = layout.children().expect("children").get(component); | ||
encoding = layout.encoding().into(); | ||
} | ||
|
||
Self { | ||
path, | ||
initial_read, | ||
dtype, | ||
encoding, | ||
} | ||
} | ||
|
||
/// Create a new LayoutCursor indexing into the n-th child of the layout at the current | ||
/// cursor position. | ||
pub fn child(&self, n: usize) -> Self { | ||
let mut path = self.path.clone(); | ||
path.push(n); | ||
|
||
Self::new_with_path(self.initial_read.clone(), path) | ||
} | ||
|
||
pub fn parent(&self) -> Self { | ||
let mut path = self.path.clone(); | ||
path.pop(); | ||
|
||
Self::new_with_path(self.initial_read.clone(), path) | ||
} | ||
|
||
pub fn dtype(&self) -> &DType { | ||
&self.dtype | ||
} | ||
|
||
pub fn encoding(&self) -> Encoding { | ||
self.encoding | ||
} | ||
|
||
pub fn layout_fb(&self) -> footer::Layout { | ||
let mut layout = self.initial_read.fb_layout(); | ||
for component in self.path.iter().copied() { | ||
layout = layout.children().expect("children").get(component); | ||
} | ||
|
||
layout | ||
} | ||
} | ||
|
||
/// State saved across all Tabs. | ||
/// | ||
/// Holding them all allows us to switch between tabs without resetting view state. | ||
pub struct AppState { | ||
pub reader: TokioFile, | ||
pub cursor: LayoutCursor, | ||
pub current_tab: Tab, | ||
|
||
/// List state for the Layouts view | ||
pub layouts_list_state: ListState, | ||
} | ||
|
||
impl AppState { | ||
// Read the given byte range. | ||
// We're cheating by doing the reads synchronously since we're given byte offsets. | ||
pub fn read_bytes_sync(&self, range: Range<u64>) -> Bytes { | ||
let mut buf = BytesMut::zeroed((range.end - range.start).try_into().expect("zeroed")); | ||
self.reader | ||
.read_exact_at(&mut buf, range.start) | ||
.expect("read_exact_at sync"); | ||
|
||
buf.freeze() | ||
} | ||
} | ||
|
||
/// Create an app backed from a file path. | ||
pub async fn create_file_app(path: impl AsRef<Path>) -> VortexResult<AppState> { | ||
let reader = TokioFile::open(path)?; | ||
let size = reader.size().await?; | ||
let initial_read = read_initial_bytes(&reader, size).await?; | ||
|
||
let cursor = LayoutCursor::new(initial_read); | ||
|
||
Ok(AppState { | ||
reader, | ||
cursor, | ||
current_tab: Tab::default(), | ||
layouts_list_state: ListState::default().with_selected(Some(0)), | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
use std::path::Path; | ||
|
||
use app::{create_file_app, AppState, Tab}; | ||
use crossterm::event; | ||
use crossterm::event::{Event, KeyCode, KeyEventKind}; | ||
use ratatui::widgets::ListState; | ||
use ratatui::DefaultTerminal; | ||
use ui::render_app; | ||
use vortex::error::VortexResult; | ||
|
||
use crate::TOKIO_RUNTIME; | ||
|
||
mod app; | ||
mod ui; | ||
|
||
// Use the VortexResult and potentially launch a Backtrace. | ||
fn run(mut terminal: DefaultTerminal, mut app: AppState) -> VortexResult<()> { | ||
loop { | ||
terminal.draw(|frame| render_app(&mut app, frame))?; | ||
|
||
if let Event::Key(key) = event::read()? { | ||
if key.kind == KeyEventKind::Press { | ||
match key.code { | ||
KeyCode::Char('q') => break Ok(()), | ||
KeyCode::Tab => { | ||
// toggle between tabs | ||
app.current_tab = match app.current_tab { | ||
Tab::Layout => Tab::Encodings, | ||
Tab::Encodings => Tab::Layout, | ||
}; | ||
} | ||
KeyCode::Up => { | ||
// We send the key-up to the list state if we're looking at | ||
// the Layouts tab. | ||
if app.current_tab == Tab::Layout { | ||
app.layouts_list_state.scroll_up_by(1); | ||
} | ||
} | ||
KeyCode::Down => { | ||
if app.current_tab == Tab::Layout { | ||
app.layouts_list_state.scroll_down_by(1); | ||
} | ||
} | ||
KeyCode::Enter => { | ||
if app.current_tab == Tab::Layout { | ||
// Descend into the layout subtree for the selected child. | ||
let selected = app.layouts_list_state.selected().unwrap_or_default(); | ||
app.cursor = app.cursor.child(selected); | ||
|
||
// Reset the list scroll state. | ||
app.layouts_list_state = ListState::default().with_selected(Some(0)); | ||
} | ||
} | ||
KeyCode::Left => { | ||
if app.current_tab == Tab::Layout { | ||
// Ascend back up to the Parent node | ||
app.cursor = app.cursor.parent(); | ||
// Reset the list scroll state. | ||
app.layouts_list_state = ListState::default().with_selected(Some(0)); | ||
} | ||
} | ||
// Most events not handled | ||
_ => {} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// TODO: add tui_logger and have a logs tab so we can see the log output from | ||
// doing Vortex things.¬ | ||
|
||
pub fn exec_tui(file: impl AsRef<Path>) -> VortexResult<()> { | ||
let app = TOKIO_RUNTIME.block_on(create_file_app(file))?; | ||
|
||
let mut terminal = ratatui::init(); | ||
terminal.clear()?; | ||
|
||
run(terminal, app)?; | ||
|
||
ratatui::restore(); | ||
Ok(()) | ||
} |
Oops, something went wrong.