diff --git a/.lock b/.lock new file mode 100644 index 0000000..e69de29 diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/crates.js b/crates.js new file mode 100644 index 0000000..1d2b164 --- /dev/null +++ b/crates.js @@ -0,0 +1 @@ +window.ALL_CRATES = ["lz_str","lz_str_py","lz_str_wasm"]; \ No newline at end of file diff --git a/help.html b/help.html new file mode 100644 index 0000000..e364073 --- /dev/null +++ b/help.html @@ -0,0 +1,2 @@ +
Redirecting to ../../lz_str/fn.compress.html...
+ + + \ No newline at end of file diff --git a/lz_str/compress/fn.compress_internal.html b/lz_str/compress/fn.compress_internal.html new file mode 100644 index 0000000..707d633 --- /dev/null +++ b/lz_str/compress/fn.compress_internal.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.compress_internal.html...
+ + + \ No newline at end of file diff --git a/lz_str/compress/fn.compress_to_base64.html b/lz_str/compress/fn.compress_to_base64.html new file mode 100644 index 0000000..6ce870b --- /dev/null +++ b/lz_str/compress/fn.compress_to_base64.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.compress_to_base64.html...
+ + + \ No newline at end of file diff --git a/lz_str/compress/fn.compress_to_encoded_uri_component.html b/lz_str/compress/fn.compress_to_encoded_uri_component.html new file mode 100644 index 0000000..cf6d807 --- /dev/null +++ b/lz_str/compress/fn.compress_to_encoded_uri_component.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.compress_to_encoded_uri_component.html...
+ + + \ No newline at end of file diff --git a/lz_str/compress/fn.compress_to_uint8_array.html b/lz_str/compress/fn.compress_to_uint8_array.html new file mode 100644 index 0000000..e3ceeee --- /dev/null +++ b/lz_str/compress/fn.compress_to_uint8_array.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.compress_to_uint8_array.html...
+ + + \ No newline at end of file diff --git a/lz_str/compress/fn.compress_to_utf16.html b/lz_str/compress/fn.compress_to_utf16.html new file mode 100644 index 0000000..a3ab501 --- /dev/null +++ b/lz_str/compress/fn.compress_to_utf16.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.compress_to_utf16.html...
+ + + \ No newline at end of file diff --git a/lz_str/decompress/fn.decompress.html b/lz_str/decompress/fn.decompress.html new file mode 100644 index 0000000..1d3dd1b --- /dev/null +++ b/lz_str/decompress/fn.decompress.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.decompress.html...
+ + + \ No newline at end of file diff --git a/lz_str/decompress/fn.decompress_from_base64.html b/lz_str/decompress/fn.decompress_from_base64.html new file mode 100644 index 0000000..2ea20b3 --- /dev/null +++ b/lz_str/decompress/fn.decompress_from_base64.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.decompress_from_base64.html...
+ + + \ No newline at end of file diff --git a/lz_str/decompress/fn.decompress_from_encoded_uri_component.html b/lz_str/decompress/fn.decompress_from_encoded_uri_component.html new file mode 100644 index 0000000..96d06dc --- /dev/null +++ b/lz_str/decompress/fn.decompress_from_encoded_uri_component.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.decompress_from_encoded_uri_component.html...
+ + + \ No newline at end of file diff --git a/lz_str/decompress/fn.decompress_from_uint8_array.html b/lz_str/decompress/fn.decompress_from_uint8_array.html new file mode 100644 index 0000000..8922472 --- /dev/null +++ b/lz_str/decompress/fn.decompress_from_uint8_array.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.decompress_from_uint8_array.html...
+ + + \ No newline at end of file diff --git a/lz_str/decompress/fn.decompress_from_utf16.html b/lz_str/decompress/fn.decompress_from_utf16.html new file mode 100644 index 0000000..2c2830d --- /dev/null +++ b/lz_str/decompress/fn.decompress_from_utf16.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.decompress_from_utf16.html...
+ + + \ No newline at end of file diff --git a/lz_str/decompress/fn.decompress_internal.html b/lz_str/decompress/fn.decompress_internal.html new file mode 100644 index 0000000..e067e5b --- /dev/null +++ b/lz_str/decompress/fn.decompress_internal.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../lz_str/fn.decompress_internal.html...
+ + + \ No newline at end of file diff --git a/lz_str/fn.compress.html b/lz_str/fn.compress.html new file mode 100644 index 0000000..77dd187 --- /dev/null +++ b/lz_str/fn.compress.html @@ -0,0 +1,4 @@ +pub fn compress_internal<F>(
+ data: &[u16],
+ bits_per_char: u8,
+ to_char: F
+) -> Vec<u16>
The internal function for compressing data.
+All other compression functions are built on top of this. +It generally should not be used directly.
+pub fn compress_to_base64(data: impl IntoWideIter) -> String
pub fn compress_to_encoded_uri_component(data: impl IntoWideIter) -> String
pub fn compress_to_uint8_array(data: impl IntoWideIter) -> Vec<u8>
Compress a string into a Vec<u8>
.
pub fn compress_to_utf16(data: impl IntoWideIter) -> String
pub fn decompress(compressed: impl IntoWideIter) -> Option<Vec<u16>>
pub fn decompress_from_base64(compressed: &str) -> Option<Vec<u16>>
Decompress a &str
compressed with crate::compress_to_base64
.
Returns an error if the compressed data could not be decompressed.
+pub fn decompress_from_encoded_uri_component(
+ compressed: &str
+) -> Option<Vec<u16>>
Decompress a &str
compressed with crate::compress_to_encoded_uri_component
.
Returns an error if the compressed data could not be decompressed.
+pub fn decompress_from_uint8_array(compressed: &[u8]) -> Option<Vec<u16>>
Decompress a byte slice compressed with crate::compress_to_uint8_array
.
Returns an error if the compressed data could not be decompressed.
+pub fn decompress_from_utf16(compressed: &str) -> Option<Vec<u16>>
Decompress a &str
compressed with crate::compress_to_utf16
.
Returns an error if the compressed data could not be decompressed.
+A port of lz-string to Rust.
+ let data = "The quick brown fox jumps over the lazy dog";
+
+ // Compress the data. This cannot fail.
+ let compressed_data = lz_str::compress(data);
+
+ // Decompress the data.
+ // This may return `Option::None` if it fails.
+ // Make sure to do error-checking in a real application to prevent crashes!
+ let decompressed_data =
+ lz_str::decompress(compressed_data).expect("`compressed_data` is invalid");
+
+ // The decompressed_data should be the same as data, except encoded as UTF16.
+ // We undo that here.
+ // In a real application,
+ // you will want to do error checking to prevent users from causing crashes with invalid data.
+ let decompressed_data =
+ String::from_utf16(&decompressed_data).expect("`decompressed_data` is not valid UTF16");
+
+ assert!(data == decompressed_data);
The original library uses invalid UTF16 strings to represent data.
+To maintain compatability, this library uses a Vec
of u16
s instead of Rust strings where applicable.
+The IntoWideIter
trait exists to ease the passing of data into functions.
+Most functions accept this generic parameter instead of a concrete type.
+Look at this trait’s documentation to see what types this trait is implemented for.
Vec<u16>
.String
, which is valid base64.String
, which can be safely used in a uri.Vec<u8>
.String
.Vec<u16>
.
+The result contains possibly invalid UTF16.&str
compressed with crate::compress_to_base64
.&str
compressed with crate::compress_to_encoded_uri_component
.crate::compress_to_uint8_array
.&str
compressed with crate::compress_to_utf16
.pub trait IntoWideIter {
+ type Iter: Iterator<Item = u16>;
+
+ // Required method
+ fn into_wide_iter(self) -> Self::Iter;
+}
A trait to make it easier to pass arguments to functions.
+Convert this object into something that yields possibly invalid wide characters.
+pub fn compress_to_base64(input: &PyString) -> PyResult<String>
pub fn decompress_from_base64(input: &PyString) -> PyResult<String>
pub fn decompress(data: &JsValue) -> JsValue
Decompress a JsString
.
Vec<u16>
.","The internal function for compressing data.","Compress a string into a String
, which is valid base64.","Compress a string into a String
, which can be safely used …","Compress a string into a Vec<u8>
.","Compress a string as a valid String
.","Decompress a string into a Vec<u16>
. The result contains …","Decompress a &str
compressed with crate::compress_to_base64
…","Decompress a &str
compressed with …","Decompress a byte slice compressed with …","Decompress a &str
compressed with crate::compress_to_utf16
.","The internal decompress function.","Convert this object into something that yields possibly …"],"i":[0,3,0,0,0,0,0,0,0,0,0,0,0,0,3],"f":[0,0,[-1,[[2,[1]]],3],[[[4,[1]],5,-1],[[2,[1]]],[[7,[1],[[6,[1]]]]]],[-1,8,3],[-1,8,3],[-1,[[2,[5]]],3],[-1,8,3],[-1,[[9,[[2,[1]]]]],3],[10,[[9,[[2,[1]]]]]],[10,[[9,[[2,[1]]]]]],[[[4,[5]]],[[9,[[2,[1]]]]]],[10,[[9,[[2,[1]]]]]],[[-1,5],[[9,[[2,[1]]]]],[[12,[],[[11,[1]]]]]],[[[3,[],[[13,[-1]]]]],-1,[[12,[],[[11,[1]]]]]]],"c":[],"p":[[1,"u16"],[5,"Vec",15],[10,"IntoWideIter",0],[1,"slice"],[1,"u8"],[17,"Output"],[10,"Fn",16],[5,"String",17],[6,"Option",18],[1,"str"],[17,"Item"],[10,"Iterator",19],[17,"Iter"]],"b":[]}],\
+["lz_str_py",{"doc":"","t":"HH","n":["compress_to_base64","decompress_from_base64"],"q":[[0,"lz_str_py"],[2,"pyo3::types::string"],[3,"alloc::string"],[4,"pyo3::err"]],"d":["",""],"i":[0,0],"f":[[1,[[3,[2]]]],[1,[[3,[2]]]]],"c":[],"p":[[5,"PyString",2],[5,"String",3],[8,"PyResult",4]],"b":[]}],\
+["lz_str_wasm",{"doc":"","t":"HH","n":["compress","decompress"],"q":[[0,"lz_str_wasm"],[2,"wasm_bindgen"]],"d":["Compress a JsString
.","Decompress a JsString
."],"i":[0,0],"f":[[1,1],[1,1]],"c":[],"p":[[5,"JsValue",2]],"b":[]}]\
+]'));
+if (typeof exports !== 'undefined') exports.searchIndex = searchIndex;
+else if (window.initSearch) window.initSearch(searchIndex);
diff --git a/settings.html b/settings.html
new file mode 100644
index 0000000..ca288dc
--- /dev/null
+++ b/settings.html
@@ -0,0 +1,2 @@
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +
use crate::constants::BASE64_KEY;
+use crate::constants::CLOSE_CODE;
+use crate::constants::START_CODE_BITS;
+use crate::constants::U16_CODE;
+use crate::constants::U8_CODE;
+use crate::constants::URI_KEY;
+use crate::IntoWideIter;
+use std::collections::hash_map::Entry as HashMapEntry;
+use std::convert::TryInto;
+
+#[cfg(not(feature = "rustc-hash"))]
+type HashMap<K, V> = std::collections::HashMap<K, V>;
+
+#[cfg(not(feature = "rustc-hash"))]
+type HashSet<T> = std::collections::HashSet<T>;
+
+#[cfg(feature = "rustc-hash")]
+type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
+
+#[cfg(feature = "rustc-hash")]
+type HashSet<T> = rustc_hash::FxHashSet<T>;
+
+/// The number of "base codes",
+/// the default codes of all streams.
+///
+/// These are U8_CODE, U16_CODE, and CLOSE_CODE.
+const NUM_BASE_CODES: usize = 3;
+
+#[derive(Debug)]
+pub(crate) struct CompressContext<'a, F> {
+ dictionary: HashMap<&'a [u16], u32>,
+ dictionary_to_create: HashSet<u16>,
+
+ /// The current word, w,
+ /// in terms of indexes into the input.
+ w_start_idx: usize,
+ w_end_idx: usize,
+
+ // The counter for increasing the current number of bits in a code.
+ // The max size of this is 1 << max(num_bits) == 1 + u32::MAX, so we use u64.
+ enlarge_in: u64,
+
+ /// The input buffer.
+ input: &'a [u16],
+
+ /// The output buffer.
+ output: Vec<u16>,
+
+ /// The bit buffer.
+ bit_buffer: u16,
+
+ /// The current number of bits in a code.
+ ///
+ /// This is a u8,
+ /// because we currently assume the max code size is 32 bits.
+ /// 32 < u8::MAX
+ num_bits: u8,
+
+ /// The current bit position.
+ bit_position: u8,
+
+ /// The maximum # of bits per char.
+ ///
+ /// This value may not exceed 16,
+ /// as the reference implementation will also not handle values over 16.
+ bits_per_char: u8,
+
+ /// A transformation function to map a u16 to another u16,
+ /// before appending it to the output buffer.
+ to_char: F,
+}
+
+impl<'a, F> CompressContext<'a, F>
+where
+ F: Fn(u16) -> u16,
+{
+ /// Make a new [`CompressContext`].
+ ///
+ /// # Panics
+ /// Panics if `bits_per_char` exceeds the number of bits in a u16.
+ #[inline]
+ pub fn new(input: &'a [u16], bits_per_char: u8, to_char: F) -> Self {
+ assert!(usize::from(bits_per_char) <= std::mem::size_of::<u16>() * 8);
+
+ CompressContext {
+ dictionary: HashMap::default(),
+ dictionary_to_create: HashSet::default(),
+
+ w_start_idx: 0,
+ w_end_idx: 0,
+
+ enlarge_in: 2,
+
+ input,
+ output: Vec::with_capacity(input.len() >> 1), // Lowball, assume we can get a 50% reduction in size.
+
+ bit_buffer: 0,
+
+ num_bits: START_CODE_BITS,
+
+ bit_position: 0,
+ bits_per_char,
+ to_char,
+ }
+ }
+
+ #[inline]
+ pub fn produce_w(&mut self) {
+ let w = &self.input[self.w_start_idx..self.w_end_idx];
+
+ match w
+ .first()
+ .map(|first_w_char| self.dictionary_to_create.take(first_w_char))
+ {
+ Some(Some(first_w_char)) => {
+ if first_w_char < 256 {
+ self.write_bits(self.num_bits, U8_CODE.into());
+ self.write_bits(8, first_w_char.into());
+ } else {
+ self.write_bits(self.num_bits, U16_CODE.into());
+ self.write_bits(16, first_w_char.into());
+ }
+ self.decrement_enlarge_in();
+ }
+ None | Some(None) => {
+ self.write_bits(self.num_bits, *self.dictionary.get(w).unwrap());
+ }
+ }
+ self.decrement_enlarge_in();
+ }
+
+ /// Append the bit to the bit buffer.
+ #[inline]
+ pub fn write_bit(&mut self, bit: bool) {
+ self.bit_buffer = (self.bit_buffer << 1) | u16::from(bit);
+ self.bit_position += 1;
+
+ if self.bit_position == self.bits_per_char {
+ self.bit_position = 0;
+ let output_char = (self.to_char)(self.bit_buffer);
+ self.bit_buffer = 0;
+
+ self.output.push(output_char);
+ }
+ }
+
+ #[inline]
+ pub fn write_bits(&mut self, n: u8, mut value: u32) {
+ for _ in 0..n {
+ self.write_bit(value & 1 == 1);
+ value >>= 1;
+ }
+ }
+
+ #[inline]
+ pub fn decrement_enlarge_in(&mut self) {
+ self.enlarge_in -= 1;
+ if self.enlarge_in == 0 {
+ self.enlarge_in = 1 << self.num_bits;
+ self.num_bits += 1;
+ }
+ }
+
+ /// Compress a `u16`. This represents a wide char.
+ #[inline]
+ pub fn write_u16(&mut self, i: usize) {
+ let c = &self.input[i];
+
+ let dictionary_len = self.dictionary.len();
+ if let HashMapEntry::Vacant(entry) = self.dictionary.entry(std::slice::from_ref(c)) {
+ entry.insert((dictionary_len + NUM_BASE_CODES).try_into().unwrap());
+ self.dictionary_to_create.insert(*c);
+ }
+
+ // wc = w + c.
+ let wc = &self.input[self.w_start_idx..self.w_end_idx + 1];
+
+ let dictionary_len = self.dictionary.len();
+ match self.dictionary.entry(wc) {
+ HashMapEntry::Occupied(_entry) => {
+ // w = wc.
+ self.w_end_idx += 1;
+ }
+ HashMapEntry::Vacant(entry) => {
+ // Add wc to the dictionary.
+ entry.insert((dictionary_len + NUM_BASE_CODES).try_into().unwrap());
+
+ // Originally, this was before adding wc to the dict.
+ // However, we only use the dict for a lookup that will crash if it fails in produce_w.
+ // Therefore, moving it here should be fine.
+ self.produce_w();
+
+ // w = c.
+ self.w_start_idx = i;
+ self.w_end_idx = i + 1;
+ }
+ }
+ }
+
+ /// Finish the stream and get the final result.
+ #[inline]
+ pub fn finish(mut self) -> Vec<u16> {
+ let w = &self.input[self.w_start_idx..self.w_end_idx];
+
+ // Output the code for w.
+ if !w.is_empty() {
+ self.produce_w();
+ }
+
+ // Mark the end of the stream
+ self.write_bits(self.num_bits, CLOSE_CODE.into());
+
+ let str_len = self.output.len();
+ // Flush the last char
+ while self.output.len() == str_len {
+ self.write_bit(false);
+ }
+
+ self.output
+ }
+
+ /// Perform the compression and return the result.
+ pub fn compress(mut self) -> Vec<u16> {
+ for i in 0..self.input.len() {
+ self.write_u16(i);
+ }
+ self.finish()
+ }
+}
+
+/// Compress a string into a [`Vec<u16>`].
+///
+/// The resulting [`Vec`] may contain invalid UTF16.
+#[inline]
+pub fn compress(data: impl IntoWideIter) -> Vec<u16> {
+ let data: Vec<u16> = data.into_wide_iter().collect();
+ compress_internal(&data, 16, std::convert::identity)
+}
+
+/// Compress a string as a valid [`String`].
+///
+/// This function converts the result back into a Rust [`String`] since it is guaranteed to be valid UTF16.
+#[inline]
+pub fn compress_to_utf16(data: impl IntoWideIter) -> String {
+ let data: Vec<u16> = data.into_wide_iter().collect();
+ let compressed = compress_internal(&data, 15, |n| n + 32);
+ let mut compressed =
+ String::from_utf16(&compressed).expect("`compress_to_utf16 output was not valid unicode`");
+ compressed.push(' ');
+
+ compressed
+}
+
+/// Compress a string into a [`String`], which can be safely used in a uri.
+///
+/// This function converts the result back into a Rust [`String`] since it is guaranteed to be valid unicode.
+#[inline]
+pub fn compress_to_encoded_uri_component(data: impl IntoWideIter) -> String {
+ let data: Vec<u16> = data.into_wide_iter().collect();
+ let compressed = compress_internal(&data, 6, |n| u16::from(URI_KEY[usize::from(n)]));
+
+ String::from_utf16(&compressed)
+ .expect("`compress_to_encoded_uri_component` output was not valid unicode`")
+}
+
+/// Compress a string into a [`String`], which is valid base64.
+///
+/// This function converts the result back into a Rust [`String`] since it is guaranteed to be valid unicode.
+pub fn compress_to_base64(data: impl IntoWideIter) -> String {
+ let data: Vec<u16> = data.into_wide_iter().collect();
+ let mut compressed = compress_internal(&data, 6, |n| u16::from(BASE64_KEY[usize::from(n)]));
+
+ let mod_4 = compressed.len() % 4;
+
+ if mod_4 != 0 {
+ for _ in mod_4..(4 + 1) {
+ compressed.push(u16::from(b'='));
+ }
+ }
+
+ String::from_utf16(&compressed).expect("`compress_to_base64` output was not valid unicode`")
+}
+
+/// Compress a string into a [`Vec<u8>`].
+pub fn compress_to_uint8_array(data: impl IntoWideIter) -> Vec<u8> {
+ compress(data)
+ .into_iter()
+ .flat_map(|value| value.to_be_bytes())
+ .collect()
+}
+
+/// The internal function for compressing data.
+///
+/// All other compression functions are built on top of this.
+/// It generally should not be used directly.
+#[inline]
+pub fn compress_internal<F>(data: &[u16], bits_per_char: u8, to_char: F) -> Vec<u16>
+where
+ F: Fn(u16) -> u16,
+{
+ let ctx = CompressContext::new(data, bits_per_char, to_char);
+ ctx.compress()
+}
+
pub const URI_KEY: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$";
+pub const BASE64_KEY: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
+
+/// The stream code for a `u8`.
+pub const U8_CODE: u8 = 0;
+
+/// The stream code for a `u16`.
+pub const U16_CODE: u8 = 1;
+
+/// End of stream signal
+pub const CLOSE_CODE: u8 = 2;
+
+/// The starting size of a code.
+///
+/// Compression starts with the following codes:
+/// 0: u8
+/// 1: u16
+/// 2: close stream
+pub const START_CODE_BITS: u8 = 2;
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +
use crate::constants::BASE64_KEY;
+use crate::constants::CLOSE_CODE;
+use crate::constants::START_CODE_BITS;
+use crate::constants::U16_CODE;
+use crate::constants::U8_CODE;
+use crate::constants::URI_KEY;
+use crate::IntoWideIter;
+use std::convert::TryFrom;
+use std::convert::TryInto;
+
+#[derive(Debug)]
+pub struct DecompressContext<I> {
+ val: u16,
+ compressed_data: I,
+ position: u16,
+ reset_val: u16,
+}
+
+impl<I> DecompressContext<I>
+where
+ I: Iterator<Item = u16>,
+{
+ /// Make a new [`DecompressContext`].
+ ///
+ /// # Errors
+ /// Returns `None` if the iterator is empty.
+ ///
+ /// # Panics
+ /// Panics if `bits_per_char` is greater than the number of bits in a `u16`.
+ #[inline]
+ pub fn new(mut compressed_data: I, bits_per_char: u8) -> Option<Self> {
+ assert!(usize::from(bits_per_char) <= std::mem::size_of::<u16>() * 8);
+
+ let reset_val_pow = bits_per_char - 1;
+ // (1 << 15) <= u16::MAX
+ let reset_val: u16 = 1 << reset_val_pow;
+
+ Some(DecompressContext {
+ val: compressed_data.next()?,
+ compressed_data,
+ position: reset_val,
+ reset_val,
+ })
+ }
+
+ #[inline]
+ pub fn read_bit(&mut self) -> Option<bool> {
+ let res = self.val & self.position;
+ self.position >>= 1;
+
+ if self.position == 0 {
+ self.position = self.reset_val;
+ self.val = self.compressed_data.next()?;
+ }
+
+ Some(res != 0)
+ }
+
+ /// Read n bits.
+ ///
+ /// `u32` is the return type as we expect all possible codes to be within that type's range.
+ #[inline]
+ pub fn read_bits(&mut self, n: u8) -> Option<u32> {
+ let mut res = 0;
+ let max_power: u32 = 1 << n;
+ let mut power: u32 = 1;
+ while power != max_power {
+ res |= u32::from(self.read_bit()?) * power;
+ power <<= 1;
+ }
+
+ Some(res)
+ }
+}
+
+/// Decompress a string into a [`Vec<u16>`].
+/// The result contains possibly invalid UTF16.
+///
+/// # Errors
+/// Returns `None` if the decompression fails.
+#[inline]
+pub fn decompress(compressed: impl IntoWideIter) -> Option<Vec<u16>> {
+ decompress_internal(compressed.into_wide_iter(), 16)
+}
+
+/// Decompress a [`&str`] compressed with [`crate::compress_to_utf16`].
+///
+/// # Errors
+/// Returns an error if the compressed data could not be decompressed.
+#[inline]
+pub fn decompress_from_utf16(compressed: &str) -> Option<Vec<u16>> {
+ decompress_internal(compressed.encode_utf16().map(|c| c - 32), 15)
+}
+
+/// Decompress a [`&str`] compressed with [`crate::compress_to_encoded_uri_component`].
+///
+/// # Errors
+/// Returns an error if the compressed data could not be decompressed.
+#[inline]
+pub fn decompress_from_encoded_uri_component(compressed: &str) -> Option<Vec<u16>> {
+ let compressed: Option<Vec<u16>> = compressed
+ .encode_utf16()
+ .map(|c| {
+ if c == u16::from(b' ') {
+ u16::from(b'+')
+ } else {
+ c
+ }
+ })
+ .map(u32::from)
+ .flat_map(|c| {
+ URI_KEY
+ .iter()
+ .position(|k| u8::try_from(c) == Ok(*k))
+ .map(|n| u16::try_from(n).ok())
+ })
+ .collect();
+
+ decompress_internal(compressed?.into_iter(), 6)
+}
+
+/// Decompress a [`&str`] compressed with [`crate::compress_to_base64`].
+///
+/// # Errors
+/// Returns an error if the compressed data could not be decompressed.
+#[inline]
+pub fn decompress_from_base64(compressed: &str) -> Option<Vec<u16>> {
+ let compressed: Option<Vec<u16>> = compressed
+ .encode_utf16()
+ .flat_map(|c| {
+ BASE64_KEY
+ .iter()
+ .position(|k| u8::try_from(c) == Ok(*k))
+ .map(|n| u16::try_from(n).ok())
+ })
+ .collect();
+
+ decompress_internal(compressed?.into_iter(), 6)
+}
+
+/// Decompress a byte slice compressed with [`crate::compress_to_uint8_array`].
+///
+/// # Errors
+/// Returns an error if the compressed data could not be decompressed.
+#[inline]
+pub fn decompress_from_uint8_array(compressed: &[u8]) -> Option<Vec<u16>> {
+ // The buffer is a UCS2 big endian encoded string.
+ // If it is not a multiple of 2, it is invalid.
+ let compressed_len = compressed.len();
+ if compressed_len & 1 == 1 {
+ return None;
+ }
+
+ let buffer: Vec<u16> = compressed
+ .chunks(2)
+ .map(|slice| {
+ // The slice is always guaranteed to be 2 here.
+ // We check to see if the length is a multiple of 2 earlier.
+ u16::from_be_bytes(slice.try_into().unwrap())
+ })
+ .collect();
+
+ decompress(buffer)
+}
+
+/// The internal decompress function.
+///
+/// All other decompress functions are built on top of this one.
+/// It generally should not be used directly.
+///
+/// # Errors
+/// Returns an error if the compressed data could not be decompressed.
+///
+/// # Panics
+/// Panics if `bits_per_char` is greater than the number of bits in a `u16`.
+#[inline]
+pub fn decompress_internal<I>(compressed: I, bits_per_char: u8) -> Option<Vec<u16>>
+where
+ I: Iterator<Item = u16>,
+{
+ let mut ctx = match DecompressContext::new(compressed, bits_per_char) {
+ Some(ctx) => ctx,
+ None => return Some(Vec::new()),
+ };
+
+ let mut dictionary: Vec<Vec<u16>> = Vec::with_capacity(16);
+ for i in 0_u16..3_u16 {
+ dictionary.push(vec![i]);
+ }
+
+ // u8::MAX > u2::MAX
+ let code = u8::try_from(ctx.read_bits(START_CODE_BITS)?).unwrap();
+ let first_entry = match code {
+ U8_CODE | U16_CODE => {
+ let bits_to_read = (code * 8) + 8;
+ // bits_to_read == 8 or 16 <= 16
+ u16::try_from(ctx.read_bits(bits_to_read)?).unwrap()
+ }
+ CLOSE_CODE => return Some(Vec::new()),
+ _ => return None,
+ };
+ dictionary.push(vec![first_entry]);
+
+ let mut w = vec![first_entry];
+ let mut result = vec![first_entry];
+ let mut num_bits: u8 = 3;
+ let mut enlarge_in: u64 = 4;
+ let mut entry;
+ loop {
+ let mut code = ctx.read_bits(num_bits)?;
+ match u8::try_from(code) {
+ Ok(code_u8 @ (U8_CODE | U16_CODE)) => {
+ let bits_to_read = (code_u8 * 8) + 8;
+ // if cc == 0 {
+ // if (errorCount++ > 10000) return "Error"; // TODO: Error logic
+ // }
+
+ // bits_to_read == 8 or 16 <= 16
+ let bits = u16::try_from(ctx.read_bits(bits_to_read)?).unwrap();
+ dictionary.push(vec![bits]);
+ code = u32::try_from(dictionary.len() - 1).ok()?;
+ enlarge_in -= 1;
+ }
+ Ok(CLOSE_CODE) => return Some(result),
+ _ => {}
+ }
+
+ if enlarge_in == 0 {
+ enlarge_in = 1 << num_bits;
+ num_bits += 1;
+ }
+
+ // Return error if code cannot be converted to dictionary index
+ let code_usize = usize::try_from(code).ok()?;
+ if let Some(entry_value) = dictionary.get(code_usize) {
+ entry = entry_value.clone();
+ } else if code_usize == dictionary.len() {
+ entry = w.clone();
+ entry.push(*w.first()?);
+ } else {
+ return None;
+ }
+
+ result.extend(&entry);
+
+ // Add w+entry[0] to the dictionary.
+ let mut to_be_inserted = w.clone();
+ to_be_inserted.push(*entry.first()?);
+ dictionary.push(to_be_inserted);
+ enlarge_in -= 1;
+
+ w = entry;
+
+ if enlarge_in == 0 {
+ enlarge_in = 1 << num_bits;
+ num_bits += 1;
+ }
+ }
+}
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +
#![forbid(unsafe_code)]
+#![deny(missing_docs)]
+#![warn(clippy::cast_lossless)]
+#![warn(clippy::cast_possible_wrap)]
+#![warn(clippy::cast_possible_truncation)]
+
+//! A port of [lz-string](https://github.com/pieroxy/lz-string) to Rust.
+//!
+//!
+//! # Example
+//! ```rust
+//! # // The demonstrated functions correspond with `LZString.compress` and `LZString.decompress` from the JS version.
+//! # fn main() {
+//! let data = "The quick brown fox jumps over the lazy dog";
+//!
+//! // Compress the data. This cannot fail.
+//! let compressed_data = lz_str::compress(data);
+//!
+//! // Decompress the data.
+//! // This may return `Option::None` if it fails.
+//! // Make sure to do error-checking in a real application to prevent crashes!
+//! let decompressed_data =
+//! lz_str::decompress(compressed_data).expect("`compressed_data` is invalid");
+//!
+//! // The decompressed_data should be the same as data, except encoded as UTF16.
+//! // We undo that here.
+//! // In a real application,
+//! // you will want to do error checking to prevent users from causing crashes with invalid data.
+//! let decompressed_data =
+//! String::from_utf16(&decompressed_data).expect("`decompressed_data` is not valid UTF16");
+//!
+//! assert!(data == decompressed_data);
+//! # }
+//! ```
+//!
+//! # Passing and Recieving Data
+//! The original library uses invalid UTF16 strings to represent data.
+//! To maintain compatability, this library uses a [`Vec`] of [`u16`]s instead of Rust strings where applicable.
+//! The [`IntoWideIter`] trait exists to ease the passing of data into functions.
+//! Most functions accept this generic parameter instead of a concrete type.
+//! Look at this trait's documentation to see what types this trait is implemented for.
+
+mod compress;
+mod constants;
+mod decompress;
+
+pub use crate::compress::compress;
+pub use crate::compress::compress_internal;
+pub use crate::compress::compress_to_base64;
+pub use crate::compress::compress_to_encoded_uri_component;
+pub use crate::compress::compress_to_uint8_array;
+pub use crate::compress::compress_to_utf16;
+pub use crate::decompress::decompress;
+pub use crate::decompress::decompress_from_base64;
+pub use crate::decompress::decompress_from_encoded_uri_component;
+pub use crate::decompress::decompress_from_uint8_array;
+pub use crate::decompress::decompress_from_utf16;
+pub use crate::decompress::decompress_internal;
+
+/// A trait to make it easier to pass arguments to functions.
+pub trait IntoWideIter {
+ /// The Iterator type
+ type Iter: Iterator<Item = u16>;
+
+ /// Convert this object into something that yields possibly invalid wide characters.
+ fn into_wide_iter(self) -> Self::Iter;
+}
+
+impl<'a> IntoWideIter for &'a str {
+ type Iter = std::str::EncodeUtf16<'a>;
+
+ #[inline]
+ fn into_wide_iter(self) -> Self::Iter {
+ self.encode_utf16()
+ }
+}
+
+impl<'a> IntoWideIter for &&'a str {
+ type Iter = std::str::EncodeUtf16<'a>;
+
+ #[inline]
+ fn into_wide_iter(self) -> Self::Iter {
+ self.encode_utf16()
+ }
+}
+
+impl<'a> IntoWideIter for &'a String {
+ type Iter = std::str::EncodeUtf16<'a>;
+
+ #[inline]
+ fn into_wide_iter(self) -> Self::Iter {
+ self.as_str().encode_utf16()
+ }
+}
+
+impl<'a> IntoWideIter for &'a [u16] {
+ type Iter = std::iter::Copied<std::slice::Iter<'a, u16>>;
+
+ #[inline]
+ fn into_wide_iter(self) -> Self::Iter {
+ self.iter().copied()
+ }
+}
+
+// TODO: Remove this in the next version.
+// We do not benefit from taking ownership of the buffer.
+impl IntoWideIter for Vec<u16> {
+ type Iter = std::vec::IntoIter<u16>;
+
+ #[inline]
+ fn into_wide_iter(self) -> Self::Iter {
+ self.into_iter()
+ }
+}
+
+impl<'a> IntoWideIter for &'a Vec<u16> {
+ type Iter = std::iter::Copied<std::slice::Iter<'a, u16>>;
+
+ #[inline]
+ fn into_wide_iter(self) -> Self::Iter {
+ self.iter().copied()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn into_wide_iter_check() {
+ const DATA: &str = "test argument";
+ let expected: Vec<u16> = DATA.encode_utf16().collect();
+
+ fn check(arg: impl IntoWideIter, expected: &[u16]) {
+ let arg: Vec<u16> = arg.into_wide_iter().collect();
+ assert!(arg == expected);
+ }
+
+ {
+ let data: &str = DATA;
+ check(data, &expected);
+ }
+
+ {
+ let data: &&str = &DATA;
+ check(data, &expected);
+ }
+
+ // TODO: Should IntoWideIter be implemented for String?
+ // It's always better to pass an &str or an &String, so users should be forced to do that?
+ // {
+ // let data: String = DATA.into();
+ // check(data, &expected);
+ // }
+
+ {
+ let data: String = DATA.into();
+ let data: &String = &data;
+ check(data, &expected);
+ }
+
+ {
+ let data: Vec<u16> = DATA.encode_utf16().collect();
+ let data: &[u16] = &data;
+ check(data, &expected);
+ }
+
+ {
+ let data: Vec<u16> = DATA.encode_utf16().collect();
+ check(data, &expected);
+ }
+
+ {
+ let data: Vec<u16> = DATA.encode_utf16().collect();
+ let data: &Vec<u16> = &data;
+ check(data, &expected);
+ }
+ }
+}
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +
use pyo3::exceptions::PyValueError;
+use pyo3::prelude::*;
+use pyo3::types::PyString;
+
+#[pyfunction(name = "compressToBase64")]
+pub fn compress_to_base64(input: &PyString) -> PyResult<String> {
+ let input = input.to_str()?;
+ Ok(lz_str::compress_to_base64(input))
+}
+
+#[pyfunction(name = "decompressFromBase64")]
+pub fn decompress_from_base64(input: &PyString) -> PyResult<String> {
+ let input = input.to_str()?;
+ match lz_str::decompress_from_base64(input) {
+ Some(result) => {
+ // TODO: Make string from invalid unicode
+ match String::from_utf16(&result) {
+ Ok(value) => Ok(value),
+ Err(_e) => Err(PyValueError::new_err("invalid unicode output")),
+ }
+ }
+ None => Err(PyValueError::new_err("invalid input")),
+ }
+}
+
+#[pymodule]
+fn lz_str_py(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(compress_to_base64, m)?)?;
+ m.add_function(wrap_pyfunction!(decompress_from_base64, m)?)?;
+ Ok(())
+}
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +
use js_sys::JsString;
+use wasm_bindgen::prelude::*;
+use wasm_bindgen::JsCast;
+
+#[wasm_bindgen]
+extern "C" {
+ #[wasm_bindgen(js_name = "convertU16SliceToString")]
+ fn convert_u16_slice_to_string(slice: &[u16]) -> JsString;
+}
+
+/// Compress a [`JsString`].
+#[wasm_bindgen]
+pub fn compress(data: &JsValue) -> JsValue {
+ let data: &JsString = match data.dyn_ref::<JsString>() {
+ Some(data) => data,
+ None => {
+ return JsValue::NULL;
+ }
+ };
+ let data: Vec<u16> = data.iter().collect();
+ let compressed = lz_str::compress(data);
+ convert_u16_slice_to_string(&compressed).into()
+}
+
+/// Decompress a [`JsString`].
+#[wasm_bindgen]
+pub fn decompress(data: &JsValue) -> JsValue {
+ let data: &JsString = match data.dyn_ref::<JsString>() {
+ Some(data) => data,
+ None => {
+ return JsValue::NULL;
+ }
+ };
+ let data: Vec<u16> = data.iter().collect();
+ lz_str::decompress(data)
+ .map(|decompressed| convert_u16_slice_to_string(&decompressed).into())
+ .unwrap_or(JsValue::NULL)
+}
+
fn:
) to \
+ restrict the search to a given item kind.","Accepted kinds are: fn
, mod
, struct
, \
+ enum
, trait
, type
, macro
, \
+ and const
.","Search functions by type signature (e.g., vec -> usize
or \
+ -> vec
or String, enum:Cow -> bool
)","You can look for items with an exact name by putting double quotes around \
+ your request: \"string\"
","Look for functions that accept or return \
+ slices and \
+ arrays by writing \
+ square brackets (e.g., -> [u8]
or [] -> Option
)","Look for items inside another one by searching for a path: vec::Vec
",].map(x=>""+x+"
").join("");const div_infos=document.createElement("div");addClass(div_infos,"infos");div_infos.innerHTML="${value.replaceAll(" ", " ")}
`}else{error[index]=value}});output+=`