Skip to content

Commit

Permalink
Regenerated indices to the 2016-01-20 version.
Browse files Browse the repository at this point in the history
Also updated the label mapping to include newer aliases.

Affected encodings: gb18030 (index), koi8_u (index, label),
shift_jis (label). Other encodings are only updated for tests.
  • Loading branch information
lifthrasiir committed Aug 28, 2016
1 parent b912f1e commit 40717f5
Show file tree
Hide file tree
Showing 43 changed files with 2,487 additions and 2,524 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ path = "src/types"
# so we should use tilde requirements here.

[dependencies.encoding-index-singlebyte]
version = "~1.20141219.6"
version = "~1.20160120.0"
path = "src/index/singlebyte"

[dependencies.encoding-index-korean]
Expand All @@ -48,7 +48,7 @@ version = "~1.20141219.6"
path = "src/index/japanese"

[dependencies.encoding-index-simpchinese]
version = "~1.20141219.6"
version = "~1.20160120.0"
path = "src/index/simpchinese"

[dependencies.encoding-index-tradchinese]
Expand Down
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ authors:

.PHONY: test
test:
# `test_correct_table` tests with indices with non-BMP mappings tend to be
# very slow without the optimization, so japanese and tradchinese got flags
cargo test -v
cargo test -v -p encoding-index-singlebyte
cargo test -v -p encoding-index-korean
cargo test -v -p encoding-index-japanese
RUSTFLAGS='-C opt-level=1' cargo test -v -p encoding-index-japanese
cargo test -v -p encoding-index-simpchinese
cargo test -v -p encoding-index-tradchinese
RUSTFLAGS='-C opt-level=1' cargo test -v -p encoding-index-tradchinese
cargo test -v -p encoding-types

.PHONY: readme
Expand Down
24 changes: 6 additions & 18 deletions src/index/gen_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,6 @@ def make_minimal_search(data, invdata, premap, maxsearch):
return bestsearch

def generate_single_byte_index(opts, crate, name):
modname = name.replace('-', '_')

data = [None] * 128
invdata = {}
comments = []
Expand All @@ -254,7 +252,6 @@ def generate_single_byte_index(opts, crate, name):
assert 2**16 <= bitmap < 2**32

args = dict(
modname=modname,
datasz=len(data),
maxvalue=max(invdata),
bitmap=bitmap,
Expand Down Expand Up @@ -319,18 +316,15 @@ def generate_single_byte_index(opts, crate, name):
|}}
|
|#[cfg(test)]
|single_byte_tests!(
| mod = {modname}
|);
|single_byte_tests! {{
|}}
''')

forwardsz = 2 * len(data)
backwardsz = len(trielower) + 2 * len(trieupper)
return forwardsz, backwardsz, 0

def generate_multi_byte_index(opts, crate, name):
modname = name.replace('-', '_')

# some indices need an additional function for efficient mapping.
premap = lambda i: i
premapcode = ''
Expand Down Expand Up @@ -547,7 +541,6 @@ def premap(i):
minkey = min(data)
maxkey = max(data) + 1
args = dict(
modname=modname,
premapcode=premapcode,
maxvalue=max(invdata),
dataoff=minkey,
Expand Down Expand Up @@ -751,8 +744,7 @@ def premap(i):
write_fmt(f, args, '''\
|
|#[cfg(test)]
|multi_byte_tests!(
| mod = {modname},
|multi_byte_tests! {{
''')
write_fmt(f, args, remap, '''\
| remap = [{remapmin}, {remapmax}],
Expand All @@ -770,7 +762,7 @@ def premap(i):
| dups = []
''')
write_fmt(f, args, '''\
|);
|}}
''')

forwardsz = 2 * (maxkey - minkey)
Expand All @@ -782,8 +774,6 @@ def premap(i):
return forwardsz, backwardsz + backwardmore, backwardszslow + backwardmore

def generate_multi_byte_range_lbound_index(opts, crate, name):
modname = name.replace('-', '_')

data = []
comments = []
for key, value in whatwg_index(opts, name, comments):
Expand All @@ -806,7 +796,6 @@ def generate_multi_byte_range_lbound_index(opts, crate, name):
valueubound = maxvalue + 1

args = dict(
modname=modname,
datasz=len(data),
minkey=minkey,
maxkey=maxkey,
Expand Down Expand Up @@ -873,11 +862,10 @@ def generate_multi_byte_range_lbound_index(opts, crate, name):
|}}
|
|#[cfg(test)]
|multi_byte_range_tests!(
| mod = {modname},
|multi_byte_range_tests! {{
| key = [{minkey}, {maxkey}], key < {keyubound},
| value = [{minvalue}, {maxvalue}], value < {valueubound}
|);
|}}
''')

forwardsz = 4 * len(data)
Expand Down
4 changes: 4 additions & 0 deletions src/index/japanese/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ no-optimized-legacy-encoding = []
# TODO consider using dev-dependencies instead (Cargo issue #860)
version = "0.1.5"
path = "../tests"

[profile.test]
# `test_correct_table` tests tend to be especially slow
opt-level = 1
7 changes: 3 additions & 4 deletions src/index/japanese/jis0208.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// https://encoding.spec.whatwg.org/
//
// Identifier: cbaa91f3deb7d0841faf5c33041fc15a285da0e87e64ab802c4bf04b7c4da861
// Date: 2014-12-19
// Date: 2016-01-20

#[allow(dead_code)] const X: u16 = 0xffff;

Expand Down Expand Up @@ -2494,8 +2494,7 @@ pub fn backward_remapped(code: u32) -> u16 {
}

#[cfg(test)]
multi_byte_tests!(
mod = jis0208,
multi_byte_tests! {
remap = [8272, 8835],
dups = [
1207, 1208, 1209, 1212, 1213, 1214, 1217, 1218, 1219, 8644, 10716,
Expand Down Expand Up @@ -2539,4 +2538,4 @@ multi_byte_tests!(
11087, 11088, 11089, 11090, 11091, 11092, 11093, 11094, 11095, 11096,
11097, 11098, 11099, 11100, 11101, 11102, 11103,
]
);
}
7 changes: 3 additions & 4 deletions src/index/japanese/jis0212.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// https://encoding.spec.whatwg.org/
//
// Identifier: 83bf90dd1c591a4355730d8c4567efc499d74da7490531019ef22a879991cfb7
// Date: 2014-12-19
// Date: 2016-01-20

#[allow(dead_code)] const X: u16 = 0xffff;

Expand Down Expand Up @@ -1841,7 +1841,6 @@ pub fn backward(code: u32) -> u16 {
}

#[cfg(test)]
multi_byte_tests!(
mod = jis0212,
multi_byte_tests! {
dups = []
);
}
7 changes: 3 additions & 4 deletions src/index/korean/euc_kr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// https://encoding.spec.whatwg.org/
//
// Identifier: 1d97134cbf187263585bc8f593ca4196654ed4c7a673f5672eaad4f5d9fdc4ba
// Date: 2014-12-19
// Date: 2016-01-20

#[allow(dead_code)] const X: u16 = 0xffff;

Expand Down Expand Up @@ -4606,7 +4606,6 @@ pub fn backward(code: u32) -> u16 {
}

#[cfg(test)]
multi_byte_tests!(
mod = euc_kr,
multi_byte_tests! {
dups = []
);
}
2 changes: 1 addition & 1 deletion src/index/simpchinese/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "encoding-index-simpchinese"
version = "1.20141219.6"
version = "1.20160120.0"
authors = ["Kang Seonghoon <[email protected]>"]

description = "Index tables for simplified Chinese character encodings"
Expand Down
Loading

0 comments on commit 40717f5

Please sign in to comment.