Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: select first matching language name where there are conflicts, and fix null language description #254

Merged
merged 3 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion tools/db/build/build.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,15 @@ function BuildDatabase($DBDataSources, $schema, $do_force) {
$this->sqlrun("${data_path}keyboards.sql");
$this->sqlrun("${data_path}models.sql");

$this->sqlrun(dirname(__FILE__)."/search-prepare-data.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-1.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-2.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-3.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-4.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-5.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-6.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-7.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-8.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-9.sql");
$this->sqlrun(dirname(__FILE__)."/indexes.sql");

$this->sqlrun(dirname(__FILE__)."/full-text-indexes.sql", false, false);
Expand Down
5 changes: 4 additions & 1 deletion tools/db/build/build_keyboards_script.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,17 @@ function generate_keyboard_language_inserts() {
assert(!is_array($keyboard->languages)); // array format was deprecated in 1.0.5, kmcomp should never generate it any more
foreach($keyboard->languages as $id => $language) {
$this->parse_bcp47($id, $lang, $region, $script);
$langName = empty($language->languageName)
? (empty($language->displayName) ? 'undefined' : $language->displayName)
: $language->languageName;
$result .= <<<END
$insert
({$this->sqlv($keyboard, 'id')},
{$this->sqlv(null, strtolower($id))},
{$this->sqlv(null, $lang)},
{$this->sqlv(null, $region)},
{$this->sqlv(null, $script)},
{$this->sqlv($language, 'languageName')});
{$this->sqlv(null, $langName)});
GO
Expand Down
6 changes: 6 additions & 0 deletions tools/db/build/search-prepare-data-1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
UPDATE t_iso639_3 SET Part2B=NULL WHERE Part2B='';
UPDATE t_iso639_3 SET Part2T=NULL WHERE Part2T='';
UPDATE t_iso639_3 SET Part1=NULL WHERE Part1='';
UPDATE t_iso639_3 SET _Comment=NULL WHERE _Comment='';
UPDATE t_iso639_3 SET CanonicalId=COALESCE(CAST(Part1 AS NVARCHAR),CAST(Id AS NVARCHAR))

15 changes: 15 additions & 0 deletions tools/db/build/search-prepare-data-2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
--
-- We need to do some sanitisation of the t_language_index and t_iso639_3_names
-- to remove names marked as pejorative in the Ethnologue index.
--

delete
t_iso639_3_names
where exists (select * from t_ethnologue_language_index el where el.LangID = t_iso639_3_names.Id and (el.nametype='LP' or el.nametype='DP'))

delete
t_language_index
where exists (select * from t_ethnologue_language_index el where el.LangID = t_language_index.language_id and (el.nametype='LP' or el.nametype='DP'))

delete from t_ethnologue_language_index where nametype='LP' or nametype='DP';

19 changes: 19 additions & 0 deletions tools/db/build/search-prepare-data-3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--
-- Deprecated keyboards and models should be flagged as such in the t_keyboard/t_model data
--

update t_keyboard
set deprecated = 1
where exists (select * from t_keyboard_related kr where kr.related_keyboard_id = t_keyboard.keyboard_id and kr.deprecates = 1);

update t_model
set deprecated = 1
where exists (select * from t_model_related mr where mr.related_model_id = t_model.model_id and mr.deprecates = 1);

--
-- Any keyboard that has been replaced by another one, or is not Unicode, is marked as obsolete
--

update t_keyboard
set obsolete = 1
where deprecated = 1 or is_unicode = 0
29 changes: 29 additions & 0 deletions tools/db/build/search-prepare-data-4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
--
-- Canonicalize bcp47 codes into langtags entries
--
-- Fixup those that are missing from t_langtags, first
--

-- Find those that are missing where there is a matching base tag but not a matching full tag

INSERT
t_langtag (tag, [full], iso639_3, region, regionname, name, sldr, script, windows)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
null,
t.region,
t.regionname,
kl.description,
0,
kl.script_id,
kl.bcp47
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag LEFT JOIN
t_langtag_tag tt0 ON kl.language_id = tt0.tag LEFT JOIN
t_langtag t ON tt0.base_tag = t.tag
WHERE
tt.tag IS NULL AND
tt0.tag IS NOT NULL

16 changes: 16 additions & 0 deletions tools/db/build/search-prepare-data-5.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- Insert the tags above for searching against

INSERT
t_langtag_tag (base_tag, tag, tagtype)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
5 -- custom (keyboard) tag type
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag LEFT JOIN
t_langtag_tag tt0 ON kl.language_id = tt0.tag
WHERE
tt.tag IS NULL AND
tt0.tag IS NOT NULL

19 changes: 19 additions & 0 deletions tools/db/build/search-prepare-data-6.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Fixup those where we cannot find any matching base tag at all (e.g. qa? tags will fit into this)

INSERT
t_langtag (tag, [full], iso639_3, region, regionname, name, sldr, script, windows)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
null,
'001', --t.region,
'World', --t.regionname,
(select top 1 kl0.description from k0.t_keyboard_language kl0 where kl0.bcp47 = kl.bcp47),
Copy link
Member Author

@mcdurdin mcdurdin Jun 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only change in the search-prepare-data*.sql series; was:

  kl.description,

0,
kl.script_id,
kl.bcp47
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag
WHERE
tt.tag IS NULL
15 changes: 15 additions & 0 deletions tools/db/build/search-prepare-data-7.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Insert the tags above for searching against

INSERT
t_langtag_tag (base_tag, tag, tagtype)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
5 -- custom (keyboard) tag type
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag LEFT JOIN
t_langtag t ON kl.bcp47 = t.tag
WHERE
tt.tag IS NULL AND
t.tag IS NOT NULL
15 changes: 15 additions & 0 deletions tools/db/build/search-prepare-data-8.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Add new names that have been defined by keyboard authors

INSERT
t_langtag_name (tag, name, name_kd, nametype)
SELECT DISTINCT
t.base_tag,
kl.description,
kl.description, -- TODO: we can't do full normalisation here, but we'll live with it for now
4 -- custom
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag t ON kl.bcp47 = t.tag LEFT JOIN
t_langtag_name n ON n.tag = t.base_tag AND n.name = kl.description
WHERE
n._id IS NULL and t.tag is not null
9 changes: 9 additions & 0 deletions tools/db/build/search-prepare-data-9.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- Finally, match up all the keyboards with langtags!

INSERT
t_keyboard_langtag
SELECT
kl.keyboard_id, tt.base_tag
FROM
t_keyboard_language kl INNER JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag
147 changes: 0 additions & 147 deletions tools/db/build/search-prepare-data.sql

This file was deleted.

Loading