Skip to content

Commit

Permalink
Merge pull request #254 from keymanapp/fix/253-data-fixups-for-stagin…
Browse files Browse the repository at this point in the history
…g-17

fix: select first matching language name where there are conflicts, and fix null language description
  • Loading branch information
mcdurdin authored Jun 3, 2024
2 parents 44679c9 + 286a0b9 commit 10d532d
Show file tree
Hide file tree
Showing 12 changed files with 156 additions and 149 deletions.
10 changes: 9 additions & 1 deletion tools/db/build/build.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,15 @@ function BuildDatabase($DBDataSources, $schema, $do_force) {
$this->sqlrun("${data_path}keyboards.sql");
$this->sqlrun("${data_path}models.sql");

$this->sqlrun(dirname(__FILE__)."/search-prepare-data.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-1.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-2.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-3.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-4.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-5.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-6.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-7.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-8.sql");
$this->sqlrun(dirname(__FILE__)."/search-prepare-data-9.sql");
$this->sqlrun(dirname(__FILE__)."/indexes.sql");

$this->sqlrun(dirname(__FILE__)."/full-text-indexes.sql", false, false);
Expand Down
5 changes: 4 additions & 1 deletion tools/db/build/build_keyboards_script.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,17 @@ function generate_keyboard_language_inserts() {
assert(!is_array($keyboard->languages)); // array format was deprecated in 1.0.5, kmcomp should never generate it any more
foreach($keyboard->languages as $id => $language) {
$this->parse_bcp47($id, $lang, $region, $script);
$langName = empty($language->languageName)
? (empty($language->displayName) ? 'undefined' : $language->displayName)
: $language->languageName;
$result .= <<<END
$insert
({$this->sqlv($keyboard, 'id')},
{$this->sqlv(null, strtolower($id))},
{$this->sqlv(null, $lang)},
{$this->sqlv(null, $region)},
{$this->sqlv(null, $script)},
{$this->sqlv($language, 'languageName')});
{$this->sqlv(null, $langName)});
GO
Expand Down
6 changes: 6 additions & 0 deletions tools/db/build/search-prepare-data-1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
UPDATE t_iso639_3 SET Part2B=NULL WHERE Part2B='';
UPDATE t_iso639_3 SET Part2T=NULL WHERE Part2T='';
UPDATE t_iso639_3 SET Part1=NULL WHERE Part1='';
UPDATE t_iso639_3 SET _Comment=NULL WHERE _Comment='';
UPDATE t_iso639_3 SET CanonicalId=COALESCE(CAST(Part1 AS NVARCHAR),CAST(Id AS NVARCHAR))

15 changes: 15 additions & 0 deletions tools/db/build/search-prepare-data-2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
--
-- We need to do some sanitisation of the t_language_index and t_iso639_3_names
-- to remove names marked as pejorative in the Ethnologue index.
--

delete
t_iso639_3_names
where exists (select * from t_ethnologue_language_index el where el.LangID = t_iso639_3_names.Id and (el.nametype='LP' or el.nametype='DP'))

delete
t_language_index
where exists (select * from t_ethnologue_language_index el where el.LangID = t_language_index.language_id and (el.nametype='LP' or el.nametype='DP'))

delete from t_ethnologue_language_index where nametype='LP' or nametype='DP';

19 changes: 19 additions & 0 deletions tools/db/build/search-prepare-data-3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--
-- Deprecated keyboards and models should be flagged as such in the t_keyboard/t_model data
--

update t_keyboard
set deprecated = 1
where exists (select * from t_keyboard_related kr where kr.related_keyboard_id = t_keyboard.keyboard_id and kr.deprecates = 1);

update t_model
set deprecated = 1
where exists (select * from t_model_related mr where mr.related_model_id = t_model.model_id and mr.deprecates = 1);

--
-- Any keyboard that has been replaced by another one, or is not Unicode, is marked as obsolete
--

update t_keyboard
set obsolete = 1
where deprecated = 1 or is_unicode = 0
29 changes: 29 additions & 0 deletions tools/db/build/search-prepare-data-4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
--
-- Canonicalize bcp47 codes into langtags entries
--
-- Fixup those that are missing from t_langtags, first
--

-- Find those that are missing where there is a matching base tag but not a matching full tag

INSERT
t_langtag (tag, [full], iso639_3, region, regionname, name, sldr, script, windows)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
null,
t.region,
t.regionname,
kl.description,
0,
kl.script_id,
kl.bcp47
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag LEFT JOIN
t_langtag_tag tt0 ON kl.language_id = tt0.tag LEFT JOIN
t_langtag t ON tt0.base_tag = t.tag
WHERE
tt.tag IS NULL AND
tt0.tag IS NOT NULL

16 changes: 16 additions & 0 deletions tools/db/build/search-prepare-data-5.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- Insert the tags above for searching against

INSERT
t_langtag_tag (base_tag, tag, tagtype)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
5 -- custom (keyboard) tag type
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag LEFT JOIN
t_langtag_tag tt0 ON kl.language_id = tt0.tag
WHERE
tt.tag IS NULL AND
tt0.tag IS NOT NULL

19 changes: 19 additions & 0 deletions tools/db/build/search-prepare-data-6.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Fixup those where we cannot find any matching base tag at all (e.g. qa? tags will fit into this)

INSERT
t_langtag (tag, [full], iso639_3, region, regionname, name, sldr, script, windows)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
null,
'001', --t.region,
'World', --t.regionname,
(select top 1 kl0.description from k0.t_keyboard_language kl0 where kl0.bcp47 = kl.bcp47),
0,
kl.script_id,
kl.bcp47
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag
WHERE
tt.tag IS NULL
15 changes: 15 additions & 0 deletions tools/db/build/search-prepare-data-7.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Insert the tags above for searching against

INSERT
t_langtag_tag (base_tag, tag, tagtype)
SELECT DISTINCT
kl.bcp47,
kl.bcp47,
5 -- custom (keyboard) tag type
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag LEFT JOIN
t_langtag t ON kl.bcp47 = t.tag
WHERE
tt.tag IS NULL AND
t.tag IS NOT NULL
15 changes: 15 additions & 0 deletions tools/db/build/search-prepare-data-8.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Add new names that have been defined by keyboard authors

INSERT
t_langtag_name (tag, name, name_kd, nametype)
SELECT DISTINCT
t.base_tag,
kl.description,
kl.description, -- TODO: we can't do full normalisation here, but we'll live with it for now
4 -- custom
FROM
t_keyboard_language kl LEFT JOIN
t_langtag_tag t ON kl.bcp47 = t.tag LEFT JOIN
t_langtag_name n ON n.tag = t.base_tag AND n.name = kl.description
WHERE
n._id IS NULL and t.tag is not null
9 changes: 9 additions & 0 deletions tools/db/build/search-prepare-data-9.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- Finally, match up all the keyboards with langtags!

INSERT
t_keyboard_langtag
SELECT
kl.keyboard_id, tt.base_tag
FROM
t_keyboard_language kl INNER JOIN
t_langtag_tag tt ON kl.bcp47 = tt.tag
147 changes: 0 additions & 147 deletions tools/db/build/search-prepare-data.sql

This file was deleted.

0 comments on commit 10d532d

Please sign in to comment.