Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bidi Data Adapter #1784

Merged
merged 21 commits into from
Apr 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions components/properties/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ icu_provider = { version = "0.5", path = "../../provider/core", features = ["mac
icu_uniset = { version = "0.4.1", path = "../../utils/uniset"}
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
zerovec = { version = "0.6", path = "../../utils/zerovec", features = ["derive"] }
unicode-bidi = { version = "0.3.8", optional = true , default-features = false }

[dev-dependencies]
icu = { path = "../icu", default-features = false }
Expand All @@ -52,3 +53,4 @@ std = ["icu_provider/std"]
default = []
serde = ["dep:serde", "zerovec/serde", "icu_uniset/serde", "icu_codepointtrie/serde"]
datagen = ["serde", "zerovec/serde_serialize", "icu_uniset/serde_serialize", "icu_codepointtrie/serde_serialize"]
bidi = [ "dep:unicode-bidi" ]
163 changes: 163 additions & 0 deletions components/properties/src/bidi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! This module exposes tooling for running the [unicode bidi algorithm](https://unicode.org/reports/tr9/) using ICU4X data.
//!
//! `BidiClassAdapter` enables ICU4X to provide data to [`unicode-bidi`].
//!
//! # Examples
//!
//!```
//! use icu_properties::bidi::BidiClassAdapter;
//! use icu_properties::{maps, BidiClass};
//! use icu_codepointtrie::CodePointTrie;
//! use unicode_bidi::BidiClass as DataSourceBidiClass;
//! use unicode_bidi::BidiDataSource;
//! use unicode_bidi::BidiInfo;
//! // This example text is defined using `concat!` because some browsers
//! // and text editors have trouble displaying bidi strings.
//! let text = concat![
younies marked this conversation as resolved.
Show resolved Hide resolved
//! "א",
//! "ב",
//! "ג",
//! "a",
//! "b",
//! "c",
//! ];
//!
//! // Create an adapter to provide the data to `BidiInfo`.
//! let provider = icu_testdata::get_provider();
//!
//! let payload =
//! maps::get_bidi_class(&provider)
//! .expect("The data should be valid");
//! let data_struct = payload.get();
//! let bc = &data_struct.code_point_trie;
//!
//! let adapter = BidiClassAdapter::new(&bc);
//! // Resolve embedding levels within the text. Pass `None` to detect the
//! // paragraph level automatically.
//!
//! let bidi_info = BidiInfo::new_with_data_source(&adapter, &text, None);
//!
//! // This paragraph has embedding level 1 because its first strong character is RTL.
//! assert_eq!(bidi_info.paragraphs.len(), 1);
//! let para = &bidi_info.paragraphs[0];
//! assert_eq!(para.level.number(), 1);
//! assert_eq!(para.level.is_rtl(), true);
//!
//! // Re-ordering is done after wrapping each paragraph into a sequence of
//! // lines. For this example, I'll just use a single line that spans the
//! // entire paragraph.
//! let line = para.range.clone();
//!
//! let display = bidi_info.reorder_line(para, line);
//! assert_eq!(display, concat![
//! "a",
//! "b",
//! "c",
//! "ג",
//! "ב",
//! "א",
//! ]);
//! ```

use crate::props::BidiClass;
use icu_codepointtrie::CodePointTrie;
use unicode_bidi::data_source::BidiDataSource;
use unicode_bidi::BidiClass as DataSourceBidiClass;

/// An adapter to convert from icu4x `BidiClass` to `unicode_bidi::BidiClass`.
///
/// # Example
///
/// ```
/// use icu_properties::bidi::BidiClassAdapter;
/// use icu_properties::{maps, BidiClass};
/// use icu_codepointtrie::CodePointTrie;
/// use unicode_bidi::BidiClass as DataSourceBidiClass;
/// use unicode_bidi::BidiDataSource;
///
/// let provider = icu_testdata::get_provider();
///
/// let payload =
/// maps::get_bidi_class(&provider)
/// .expect("The data should be valid");
/// let data_struct = payload.get();
/// let bc = &data_struct.code_point_trie;
///
/// let adapter = BidiClassAdapter::new(&bc);
/// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
/// assert_eq!(adapter.bidi_class('ع'), DataSourceBidiClass::AL);
/// ```
pub struct BidiClassAdapter<'a> {
sffc marked this conversation as resolved.
Show resolved Hide resolved
bidi_trie: &'a CodePointTrie<'a, BidiClass>,
}

impl<'a> BidiClassAdapter<'a> {
/// Creates new instance of `BidiClassAdapter`.
pub fn new(bidi_trie: &'a CodePointTrie<'a, BidiClass>) -> BidiClassAdapter<'a> {
sffc marked this conversation as resolved.
Show resolved Hide resolved
BidiClassAdapter { bidi_trie }
}
}

impl<'a> BidiDataSource for BidiClassAdapter<'a> {
/// Returns a [`DataSourceBidiClass`] given a unicode character.
///
/// # Example
///
/// ```
/// use icu_properties::bidi::BidiClassAdapter;
/// use icu_properties::{maps, BidiClass};
/// use icu_codepointtrie::CodePointTrie;
/// use unicode_bidi::BidiClass as DataSourceBidiClass;
/// use unicode_bidi::BidiDataSource;
///
/// let provider = icu_testdata::get_provider();
///
/// let payload =
/// maps::get_bidi_class(&provider)
/// .expect("The data should be valid");
/// let data_struct = payload.get();
/// let bc = &data_struct.code_point_trie;
///
/// let adapter = BidiClassAdapter::new(&bc);
/// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
/// ```
///
/// [`CodePointTrie`]: icu_codepointtrie::CodePointTrie
fn bidi_class(&self, c: char) -> DataSourceBidiClass {
let bidi_class = self.bidi_trie.get(c as u32);
match bidi_class {
BidiClass::LeftToRight => DataSourceBidiClass::L,
BidiClass::RightToLeft => DataSourceBidiClass::R,
BidiClass::EuropeanNumber => DataSourceBidiClass::EN,
BidiClass::EuropeanSeparator => DataSourceBidiClass::ES,
BidiClass::EuropeanTerminator => DataSourceBidiClass::ET,
BidiClass::ArabicNumber => DataSourceBidiClass::AN,
BidiClass::CommonSeparator => DataSourceBidiClass::CS,
BidiClass::ParagraphSeparator => DataSourceBidiClass::B,
BidiClass::SegmentSeparator => DataSourceBidiClass::S,
BidiClass::WhiteSpace => DataSourceBidiClass::WS,
BidiClass::OtherNeutral => DataSourceBidiClass::ON,
BidiClass::LeftToRightEmbedding => DataSourceBidiClass::LRE,
BidiClass::LeftToRightOverride => DataSourceBidiClass::LRO,
BidiClass::ArabicLetter => DataSourceBidiClass::AL,
BidiClass::RightToLeftEmbedding => DataSourceBidiClass::RLE,
BidiClass::RightToLeftOverride => DataSourceBidiClass::RLO,
BidiClass::PopDirectionalFormat => DataSourceBidiClass::PDF,
BidiClass::NonspacingMark => DataSourceBidiClass::NSM,
BidiClass::BoundaryNeutral => DataSourceBidiClass::BN,
BidiClass::FirstStrongIsolate => DataSourceBidiClass::FSI,
BidiClass::LeftToRightIsolate => DataSourceBidiClass::LRI,
BidiClass::RightToLeftIsolate => DataSourceBidiClass::RLI,
BidiClass::PopDirectionalIsolate => DataSourceBidiClass::PDI,
_ =>
// This must not happen.
{
DataSourceBidiClass::ON
}
}
}
}
3 changes: 3 additions & 0 deletions components/properties/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@
)
)]

#[cfg(feature = "bidi")]
pub mod bidi;
sffc marked this conversation as resolved.
Show resolved Hide resolved

mod error;
pub mod maps;
mod props;
Expand Down