diff --git a/README-pypi.rst b/README-pypi.rst
index 9a2551da..69e042a2 100644
--- a/README-pypi.rst
+++ b/README-pypi.rst
@@ -103,7 +103,7 @@ Features
Provide Zero-shot classification interface using Transformer-Bahasa to recognize texts without any labeled training data.
- **Hybrid 8-bit Quantization**
- Provide hybrid 8-bit quantization for all models to reduce speed inference up to 2x and model size up to 4x.
+ Provide hybrid 8-bit quantization for all models to reduce inference time up to 2x and model size up to 4x.
Pretrained Models
------------------
diff --git a/README.rst b/README.rst
index c5601a82..e08594de 100644
--- a/README.rst
+++ b/README.rst
@@ -123,7 +123,7 @@ Features
Provide Zero-shot classification interface using Transformer-Bahasa to recognize texts without any labeled training data.
- **Hybrid 8-bit Quantization**
- Provide hybrid 8-bit quantization for all models to reduce speed inference up to 2x and model size up to 4x.
+ Provide hybrid 8-bit quantization for all models to reduce inference time up to 2x and model size up to 4x.
Pretrained Models
------------------
diff --git a/docs/README.rst b/docs/README.rst
index c5601a82..e08594de 100644
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -123,7 +123,7 @@ Features
Provide Zero-shot classification interface using Transformer-Bahasa to recognize texts without any labeled training data.
- **Hybrid 8-bit Quantization**
- Provide hybrid 8-bit quantization for all models to reduce speed inference up to 2x and model size up to 4x.
+ Provide hybrid 8-bit quantization for all models to reduce inference time up to 2x and model size up to 4x.
Pretrained Models
------------------
diff --git a/docs/load-language-detection.ipynb b/docs/load-language-detection.ipynb
index 0f81ac4f..d2cbe6c6 100644
--- a/docs/load-language-detection.ipynb
+++ b/docs/load-language-detection.ipynb
@@ -18,6 +18,17 @@
""
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "This module trained on both standard and local (included social media) language structures, so it is save to use for both.\n",
+ " \n",
+ "
"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
diff --git a/docs/load-translation-en-ms.ipynb b/docs/load-translation-en-ms.ipynb
index 3ec38c94..bab46014 100644
--- a/docs/load-translation-en-ms.ipynb
+++ b/docs/load-translation-en-ms.ipynb
@@ -38,8 +38,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 5.01 s, sys: 749 ms, total: 5.75 s\n",
- "Wall time: 5.09 s\n"
+ "CPU times: user 5.26 s, sys: 1.01 s, total: 6.27 s\n",
+ "Wall time: 7.2 s\n"
]
}
],
@@ -60,6 +60,13 @@
"execution_count": 2,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:tested on 77k EN-MY sentences.\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -82,6 +89,7 @@
" \n",
" | \n",
" Size (MB) | \n",
+ " Quantized Size (MB) | \n",
" BLEU | \n",
"
\n",
" \n",
@@ -89,16 +97,19 @@
" \n",
" small | \n",
" 42.7 | \n",
+ " 13.4 | \n",
" 0.142 | \n",
"
\n",
" \n",
" base | \n",
" 234.0 | \n",
+ " 82.7 | \n",
" 0.696 | \n",
"
\n",
" \n",
" large | \n",
" 817.0 | \n",
+ " 244.0 | \n",
" 0.699 | \n",
"
\n",
" \n",
@@ -106,10 +117,10 @@
""
],
"text/plain": [
- " Size (MB) BLEU\n",
- "small 42.7 0.142\n",
- "base 234.0 0.696\n",
- "large 817.0 0.699"
+ " Size (MB) Quantized Size (MB) BLEU\n",
+ "small 42.7 13.4 0.142\n",
+ "base 234.0 82.7 0.696\n",
+ "large 817.0 244.0 0.699"
]
},
"execution_count": 2,
@@ -159,6 +170,34 @@
"transformer_large = malaya.translation.en_ms.transformer(model = 'large')"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Load Quantized model\n",
+ "\n",
+ "To load 8-bit quantized model, simply pass `quantized = True`, default is `False`.\n",
+ "\n",
+ "We can expect slightly accuracy drop from quantized model, and not necessary faster than normal 32-bit float model, totally depends on machine."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:root:Load quantized model will cause accuracy drop.\n"
+ ]
+ }
+ ],
+ "source": [
+ "quantized_transformer = malaya.translation.en_ms.transformer(quantized = True)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -187,7 +226,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -196,7 +235,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -220,7 +259,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -246,7 +285,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 15,
"metadata": {
"scrolled": false
},
@@ -273,7 +312,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -296,7 +335,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -320,7 +359,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -549,6 +588,44 @@
"pprint(transformer.translate([string_news1, string_news2, string_news3], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['KUALA LUMPUR 1 Julai - Datuk Seri Anwar Ibrahim tidak sesuai menjadi calon '\n",
+ " 'Perdana Menteri kerana beliau didakwa tidak \"popular\" dalam kalangan orang '\n",
+ " 'Melayu, Tun Dr Mahathir Mohamad mendakwa, bekas Perdana Menteri itu '\n",
+ " 'dilaporkan berkata Presiden PKR itu memerlukan seseorang seperti dirinya '\n",
+ " 'bagi mendapatkan sokongan daripada orang Melayu dan memenangi pilihan raya.',\n",
+ " '(CNN) Peguam Negara New York Letitia James pada hari Isnin memerintahkan '\n",
+ " 'Black Lives Matter Foundation - yang menurutnya tidak berafiliasi dengan '\n",
+ " 'gerakan Black Lives Matter yang lebih besar - untuk berhenti mengumpulkan '\n",
+ " 'sumbangan di New York. \"Saya memerintahkan Black Lives Matter Foundation '\n",
+ " 'untuk berhenti secara haram menerima sumbangan yang ditujukan untuk gerakan '\n",
+ " '#BlackLivesMatter. Yayasan ini tidak berafiliasi dengan gerakan itu, namun '\n",
+ " 'ia menerima banyak sumbangan dan muhibah yang ditipu,\" tweet James.',\n",
+ " 'Di antara inisiatif luas yang diusulkan adalah kerangka pelabelan makanan '\n",
+ " 'yang berkelanjutan, penyusunan semula makanan yang diproses, dan bab '\n",
+ " 'keberlanjutan dalam semua perjanjian perdagangan dua hala EU. EU juga '\n",
+ " 'berencana untuk menerbitkan proposal untuk kerangka perundangan untuk sistem '\n",
+ " 'makanan lestari pada tahun 2023 untuk memastikan semua makanan di pasar EU '\n",
+ " 'menjadi semakin lestari.']\n",
+ "CPU times: user 25.3 s, sys: 13.3 s, total: 38.6 s\n",
+ "Wall time: 10.3 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([string_news1, string_news2, string_news3], beam_search = False))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 12,
@@ -579,6 +656,36 @@
"pprint(transformer.translate([string_article1, string_article2], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Halaman ini berkongsi artikel terbaik saya untuk dibaca mengenai topik '\n",
+ " 'seperti kesihatan, kebahagiaan, kreativiti, produktiviti dan banyak lagi. '\n",
+ " 'Soalan utama yang mendorong kerja saya adalah, \"Bagaimana kita dapat hidup '\n",
+ " 'lebih baik?\" Untuk menjawab soalan itu, saya suka menulis mengenai kaedah '\n",
+ " 'berasaskan sains untuk menyelesaikan masalah praktikal.',\n",
+ " 'Pemadanan kabur pada skala. Dari 3.7 jam hingga 0.2 saat. Cara melakukan '\n",
+ " 'pemadanan rentetan pintar dengan cara yang dapat meningkatkan bahkan set '\n",
+ " 'data terbesar. Data di dunia nyata tidak kemas. Berurusan dengan set data '\n",
+ " 'yang tidak kemas menyakitkan dan terbakar sepanjang masa yang dapat '\n",
+ " 'dihabiskan untuk menganalisis data itu sendiri.']\n",
+ "CPU times: user 17 s, sys: 9.56 s, total: 26.5 s\n",
+ "Wall time: 5.83 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([string_article1, string_article2], beam_search = False))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 13,
@@ -603,6 +710,30 @@
"pprint(transformer.translate([random_string1, random_string2], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['saya di sekolah perubatan.',\n",
+ " 'Emmerdale adalah album studio debut, lagu-lagu tidak dikeluarkan di A.S <> '\n",
+ " 'Lagu-lagu ini tidak dikeluarkan dalam edisi A.S. album tersebut dan '\n",
+ " 'sebelumnya tidak tersedia pada sebarang pelepasan A.S.']\n",
+ "CPU times: user 10.8 s, sys: 6.33 s, total: 17.1 s\n",
+ "Wall time: 3.63 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([random_string1, random_string2], beam_search = False))"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
diff --git a/docs/load-translation-ms-en.ipynb b/docs/load-translation-ms-en.ipynb
index e87ec253..4d8fe4ac 100644
--- a/docs/load-translation-ms-en.ipynb
+++ b/docs/load-translation-ms-en.ipynb
@@ -18,6 +18,17 @@
""
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "This module only trained on standard language structure, so it is not save to use it for local language structure.\n",
+ " \n",
+ "
"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
@@ -27,8 +38,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 5.14 s, sys: 755 ms, total: 5.89 s\n",
- "Wall time: 5.04 s\n"
+ "CPU times: user 4.96 s, sys: 676 ms, total: 5.63 s\n",
+ "Wall time: 4.64 s\n"
]
}
],
@@ -50,6 +61,13 @@
"execution_count": 2,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:tested on 100k MY-EN sentences.\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -72,6 +90,7 @@
" \n",
" | \n",
" Size (MB) | \n",
+ " Quantized Size (MB) | \n",
" BLEU | \n",
"
\n",
" \n",
@@ -79,16 +98,19 @@
" \n",
" small | \n",
" 42.7 | \n",
+ " 13.4 | \n",
" 0.626 | \n",
"
\n",
" \n",
" base | \n",
" 234.0 | \n",
+ " 82.7 | \n",
" 0.792 | \n",
"
\n",
" \n",
" large | \n",
" 815.0 | \n",
+ " 244.0 | \n",
" 0.714 | \n",
"
\n",
" \n",
@@ -96,10 +118,10 @@
""
],
"text/plain": [
- " Size (MB) BLEU\n",
- "small 42.7 0.626\n",
- "base 234.0 0.792\n",
- "large 815.0 0.714"
+ " Size (MB) Quantized Size (MB) BLEU\n",
+ "small 42.7 13.4 0.626\n",
+ "base 234.0 82.7 0.792\n",
+ "large 815.0 244.0 0.714"
]
},
"execution_count": 2,
@@ -136,6 +158,34 @@
"transformer_large = malaya.translation.ms_en.transformer(model = 'large')"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Load Quantized model\n",
+ "\n",
+ "To load 8-bit quantized model, simply pass `quantized = True`, default is `False`.\n",
+ "\n",
+ "We can expect slightly accuracy drop from quantized model, and not necessary faster than normal 32-bit float model, totally depends on machine."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:root:Load quantized model will cause accuracy drop.\n"
+ ]
+ }
+ ],
+ "source": [
+ "quantized_transformer = malaya.translation.ms_en.transformer(quantized = True)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -164,7 +214,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -173,7 +223,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -198,7 +248,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -223,7 +273,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -246,7 +296,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -276,7 +326,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -302,7 +352,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -323,7 +373,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -514,6 +564,42 @@
"pprint(transformer.translate([string_news1, string_news2, string_news3], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['TANGKAK - Tan Sri Muhyiddin Yassin said he did not want to touch on '\n",
+ " 'political issues at the moment, instead focusing on the welfare of the '\n",
+ " \"people and efforts to revitalize the affected country's economy following \"\n",
+ " 'the Covid-19 pandemic. The prime minister explained the matter when speaking '\n",
+ " 'at a Leadership Meeting with Gambir State Assembly (DUN) leaders at the '\n",
+ " 'Bukit Gambir Multipurpose Hall today.',\n",
+ " 'ALOR SETAR - Pakatan Harapan (PH) political turmoil has not ended when it '\n",
+ " \"has failed to finalize the Prime Minister's candidate agreed upon. Sik MP \"\n",
+ " 'Ahmad Tarmizi Sulaiman said he had suggested former United Nations (UN) '\n",
+ " \"Indigenous Party chairman Tun Dr Mahathir Mohamad and People's Justice Party \"\n",
+ " '(PKR) president Datuk Seri Anwar Ibrahim resign from politics as a solution.',\n",
+ " 'Senior Minister (Security Cluster) Datuk Seri Ismail Sabri Yaakob said the '\n",
+ " 'relaxation was given as the government was aware of the problems they had to '\n",
+ " 'renew the document. He added that for foreigners who had passed the social '\n",
+ " 'visit during the Movement Control Order (CPP) they could go to the nearest '\n",
+ " 'Immigration Department office for an extension.']\n",
+ "CPU times: user 23.5 s, sys: 13.1 s, total: 36.6 s\n",
+ "Wall time: 10.2 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([string_news1, string_news2, string_news3], beam_search = False))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 17,
@@ -549,6 +635,41 @@
"pprint(transformer.translate([string_karangan, string_parlimen], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['In addition, career exhibitions help students determine their careers. As we '\n",
+ " 'know, the career market in Malaysia is very broad and there are still many '\n",
+ " 'job sectors in the country that are still vacant because it is difficult to '\n",
+ " 'find a truly qualified workforce. For example, the medical sector in '\n",
+ " 'Malaysia is facing a critical shortage of labor, especially specialists due '\n",
+ " 'to the resignation of doctors and physicians to enter the private sector and '\n",
+ " 'develop health and medical services. Upon realizing this fact, students will '\n",
+ " 'be more interested in the medical field as the career exhibitions are very '\n",
+ " 'helpful to provide general knowledge of this career.',\n",
+ " 'Subclause 6 (b) seeks to introduce new subsections 39 (3) and (4) into Act '\n",
+ " '452. Subsection (3) proposed to make an offense for any person leaving '\n",
+ " 'Malaysia without paying a deferred and payable contribution or to submit a '\n",
+ " 'guarantee for his payment. Subsection (4) proposed provides that for the '\n",
+ " 'purpose of section 39 of Act 452, the \"contribution\" includes any dividend '\n",
+ " 'or late payment charge payable on any contribution.']\n",
+ "CPU times: user 30.1 s, sys: 17.7 s, total: 47.8 s\n",
+ "Wall time: 11.2 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([string_karangan, string_parlimen], beam_search = False))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 18,
@@ -579,6 +700,36 @@
"pprint(result)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['I enjoy movies about aliens attacking the earth. <> I think science fiction '\n",
+ " 'is an incredible genre for anything. Future science, technology, time '\n",
+ " \"travel, FTL travel, everything is an exciting concept. <> I'm a science \"\n",
+ " 'fiction fan!',\n",
+ " 'Science fiction <> I enjoy movies about aliens invading the earth. <> '\n",
+ " 'Science fiction (often shortened to SF or sci-fi) is a genre of speculative '\n",
+ " 'fiction, usually dealing with imaginary concepts such as science and '\n",
+ " 'futuristic technology, space travel, time travel, faster than light travel, '\n",
+ " 'parallel universe, and life abroad.']\n",
+ "CPU times: user 19 s, sys: 11.1 s, total: 30.1 s\n",
+ "Wall time: 7.12 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "result = quantized_transformer.translate([string_random1, string_random2], beam_search = False)\n",
+ "pprint(result)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 19,
diff --git a/example/language-detection/load-language-detection.ipynb b/example/language-detection/load-language-detection.ipynb
index 0f81ac4f..d2cbe6c6 100644
--- a/example/language-detection/load-language-detection.ipynb
+++ b/example/language-detection/load-language-detection.ipynb
@@ -18,6 +18,17 @@
""
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "This module trained on both standard and local (included social media) language structures, so it is save to use for both.\n",
+ " \n",
+ "
"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
diff --git a/example/ms-en-translation/load-translation-ms-en.ipynb b/example/ms-en-translation/load-translation-ms-en.ipynb
index e87ec253..4d8fe4ac 100644
--- a/example/ms-en-translation/load-translation-ms-en.ipynb
+++ b/example/ms-en-translation/load-translation-ms-en.ipynb
@@ -18,6 +18,17 @@
""
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "This module only trained on standard language structure, so it is not save to use it for local language structure.\n",
+ " \n",
+ "
"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
@@ -27,8 +38,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 5.14 s, sys: 755 ms, total: 5.89 s\n",
- "Wall time: 5.04 s\n"
+ "CPU times: user 4.96 s, sys: 676 ms, total: 5.63 s\n",
+ "Wall time: 4.64 s\n"
]
}
],
@@ -50,6 +61,13 @@
"execution_count": 2,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:tested on 100k MY-EN sentences.\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -72,6 +90,7 @@
" \n",
" | \n",
" Size (MB) | \n",
+ " Quantized Size (MB) | \n",
" BLEU | \n",
"
\n",
" \n",
@@ -79,16 +98,19 @@
" \n",
" small | \n",
" 42.7 | \n",
+ " 13.4 | \n",
" 0.626 | \n",
"
\n",
" \n",
" base | \n",
" 234.0 | \n",
+ " 82.7 | \n",
" 0.792 | \n",
"
\n",
" \n",
" large | \n",
" 815.0 | \n",
+ " 244.0 | \n",
" 0.714 | \n",
"
\n",
" \n",
@@ -96,10 +118,10 @@
""
],
"text/plain": [
- " Size (MB) BLEU\n",
- "small 42.7 0.626\n",
- "base 234.0 0.792\n",
- "large 815.0 0.714"
+ " Size (MB) Quantized Size (MB) BLEU\n",
+ "small 42.7 13.4 0.626\n",
+ "base 234.0 82.7 0.792\n",
+ "large 815.0 244.0 0.714"
]
},
"execution_count": 2,
@@ -136,6 +158,34 @@
"transformer_large = malaya.translation.ms_en.transformer(model = 'large')"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Load Quantized model\n",
+ "\n",
+ "To load 8-bit quantized model, simply pass `quantized = True`, default is `False`.\n",
+ "\n",
+ "We can expect slightly accuracy drop from quantized model, and not necessary faster than normal 32-bit float model, totally depends on machine."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:root:Load quantized model will cause accuracy drop.\n"
+ ]
+ }
+ ],
+ "source": [
+ "quantized_transformer = malaya.translation.ms_en.transformer(quantized = True)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -164,7 +214,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -173,7 +223,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -198,7 +248,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -223,7 +273,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -246,7 +296,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -276,7 +326,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -302,7 +352,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -323,7 +373,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -514,6 +564,42 @@
"pprint(transformer.translate([string_news1, string_news2, string_news3], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['TANGKAK - Tan Sri Muhyiddin Yassin said he did not want to touch on '\n",
+ " 'political issues at the moment, instead focusing on the welfare of the '\n",
+ " \"people and efforts to revitalize the affected country's economy following \"\n",
+ " 'the Covid-19 pandemic. The prime minister explained the matter when speaking '\n",
+ " 'at a Leadership Meeting with Gambir State Assembly (DUN) leaders at the '\n",
+ " 'Bukit Gambir Multipurpose Hall today.',\n",
+ " 'ALOR SETAR - Pakatan Harapan (PH) political turmoil has not ended when it '\n",
+ " \"has failed to finalize the Prime Minister's candidate agreed upon. Sik MP \"\n",
+ " 'Ahmad Tarmizi Sulaiman said he had suggested former United Nations (UN) '\n",
+ " \"Indigenous Party chairman Tun Dr Mahathir Mohamad and People's Justice Party \"\n",
+ " '(PKR) president Datuk Seri Anwar Ibrahim resign from politics as a solution.',\n",
+ " 'Senior Minister (Security Cluster) Datuk Seri Ismail Sabri Yaakob said the '\n",
+ " 'relaxation was given as the government was aware of the problems they had to '\n",
+ " 'renew the document. He added that for foreigners who had passed the social '\n",
+ " 'visit during the Movement Control Order (CPP) they could go to the nearest '\n",
+ " 'Immigration Department office for an extension.']\n",
+ "CPU times: user 23.5 s, sys: 13.1 s, total: 36.6 s\n",
+ "Wall time: 10.2 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([string_news1, string_news2, string_news3], beam_search = False))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 17,
@@ -549,6 +635,41 @@
"pprint(transformer.translate([string_karangan, string_parlimen], beam_search = False))"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['In addition, career exhibitions help students determine their careers. As we '\n",
+ " 'know, the career market in Malaysia is very broad and there are still many '\n",
+ " 'job sectors in the country that are still vacant because it is difficult to '\n",
+ " 'find a truly qualified workforce. For example, the medical sector in '\n",
+ " 'Malaysia is facing a critical shortage of labor, especially specialists due '\n",
+ " 'to the resignation of doctors and physicians to enter the private sector and '\n",
+ " 'develop health and medical services. Upon realizing this fact, students will '\n",
+ " 'be more interested in the medical field as the career exhibitions are very '\n",
+ " 'helpful to provide general knowledge of this career.',\n",
+ " 'Subclause 6 (b) seeks to introduce new subsections 39 (3) and (4) into Act '\n",
+ " '452. Subsection (3) proposed to make an offense for any person leaving '\n",
+ " 'Malaysia without paying a deferred and payable contribution or to submit a '\n",
+ " 'guarantee for his payment. Subsection (4) proposed provides that for the '\n",
+ " 'purpose of section 39 of Act 452, the \"contribution\" includes any dividend '\n",
+ " 'or late payment charge payable on any contribution.']\n",
+ "CPU times: user 30.1 s, sys: 17.7 s, total: 47.8 s\n",
+ "Wall time: 11.2 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "pprint(quantized_transformer.translate([string_karangan, string_parlimen], beam_search = False))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 18,
@@ -579,6 +700,36 @@
"pprint(result)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['I enjoy movies about aliens attacking the earth. <> I think science fiction '\n",
+ " 'is an incredible genre for anything. Future science, technology, time '\n",
+ " \"travel, FTL travel, everything is an exciting concept. <> I'm a science \"\n",
+ " 'fiction fan!',\n",
+ " 'Science fiction <> I enjoy movies about aliens invading the earth. <> '\n",
+ " 'Science fiction (often shortened to SF or sci-fi) is a genre of speculative '\n",
+ " 'fiction, usually dealing with imaginary concepts such as science and '\n",
+ " 'futuristic technology, space travel, time travel, faster than light travel, '\n",
+ " 'parallel universe, and life abroad.']\n",
+ "CPU times: user 19 s, sys: 11.1 s, total: 30.1 s\n",
+ "Wall time: 7.12 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "result = quantized_transformer.translate([string_random1, string_random2], beam_search = False)\n",
+ "pprint(result)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 19,
diff --git a/malaya/path/__init__.py b/malaya/path/__init__.py
index 48a327f8..c2af9cd1 100644
--- a/malaya/path/__init__.py
+++ b/malaya/path/__init__.py
@@ -400,10 +400,12 @@
't5': {
'base': {
'model': home + '/generator/t5/base/model.pb',
+ 'quantized': home + '/generator/t5/base/quantized/model.pb',
'version': 'v38',
},
'small': {
'model': home + '/generator/t5/small/model.pb',
+ 'quantized': home + '/generator/t5/small/quantized/model.pb',
'version': 'v38',
},
},
@@ -415,8 +417,14 @@
'small': {'model': 'v35/generator/sample-generator-t5-small.tar.gz'},
},
't5': {
- 'base': {'model': 'v38/generator/base.pb'},
- 'small': {'model': 'v38/generator/small.pb'},
+ 'base': {
+ 'model': 'v38/generator/base.pb',
+ 'quantized': 'v40/generator/base.pb.quantized',
+ },
+ 'small': {
+ 'model': 'v38/generator/small.pb',
+ 'quantized': 'v40/generator/small.pb.quantized',
+ },
},
}
@@ -1272,16 +1280,19 @@
'ms-en': {
'base': {
'model': home + '/translation/ms-en/base/model.pb',
+ 'quantized': home + '/translation/ms-en/base/quantized/model.pb',
'vocab': home + '/translation/ms-en/base/vocab.subwords',
'version': 'v37',
},
'large': {
'model': home + '/translation/ms-en/large/model.pb',
+ 'quantized': home + '/translation/ms-en/large/quantized/model.pb',
'vocab': home + '/translation/ms-en/large/vocab.subwords',
'version': 'v37',
},
'small': {
'model': home + '/translation/ms-en/small/model.pb',
+ 'quantized': home + '/translation/ms-en/small/quantized/model.pb',
'vocab': home + '/translation/ms-en/small/vocab.subwords',
'version': 'v37',
},
@@ -1289,16 +1300,19 @@
'en-ms': {
'base': {
'model': home + '/translation/en-ms/base/model.pb',
+ 'quantized': home + '/translation/en-ms/base/quantized/model.pb',
'vocab': home + '/translation/en-ms/base/vocab.subwords',
'version': 'v38',
},
'large': {
'model': home + '/translation/en-ms/large/model.pb',
+ 'quantized': home + '/translation/en-ms/large/quantized/model.pb',
'vocab': home + '/translation/en-ms/large/vocab.subwords',
'version': 'v38',
},
'small': {
'model': home + '/translation/en-ms/small/model.pb',
+ 'quantized': home + '/translation/en-ms/small/quantized/model.pb',
'vocab': home + '/translation/en-ms/small/vocab.subwords',
'version': 'v38',
},
@@ -1308,28 +1322,34 @@
'ms-en': {
'base': {
'model': 'v37/translation/ms-en/base-translation.pb',
+ 'quantized': 'v40/translation/ms-en/base-translation.pb.quantized',
'vocab': 'v37/translation/ms-en/vocab.subwords',
},
'large': {
'model': 'v37/translation/ms-en/large-translation.pb',
+ 'quantized': 'v40/translation/ms-en/large-translation.pb.quantized',
'vocab': 'v37/translation/ms-en/vocab.subwords',
},
'small': {
'model': 'v37/translation/ms-en/small-translation.pb',
+ 'model': 'v40/translation/ms-en/small-translation.pb.quantized',
'vocab': 'v37/translation/ms-en/vocab.subwords',
},
},
'en-ms': {
'base': {
'model': 'v38/translation/en-ms/base-translation.pb',
+ 'quantized': 'v40/translation/en-ms/base-translation.pb.quantized',
'vocab': 'v38/translation/en-ms/vocab.subwords',
},
'large': {
'model': 'v38/translation/en-ms/large-translation.pb',
+ 'quantized': 'v40/translation/en-ms/large-translation.pb.quantized',
'vocab': 'v38/translation/en-ms/vocab.subwords',
},
'small': {
'model': 'v38/translation/en-ms/small-translation.pb',
+ 'quantized': 'v38/translation/en-ms/small-translation.pb.quantized',
'vocab': 'v38/translation/en-ms/vocab.subwords',
},
},
diff --git a/malaya/transformer.py b/malaya/transformer.py
index 03d501ee..e465d249 100644
--- a/malaya/transformer.py
+++ b/malaya/transformer.py
@@ -32,26 +32,6 @@
},
}
-_standard_transformer_availability = {
- 'bert': {'Size (MB)': 425.6, 'Description': 'Google BERT BASE parameters'},
- 'albert': {
- 'Size (MB)': 48.6,
- 'Description': 'Google ALBERT BASE parameters',
- },
- 'tiny-albert': {
- 'Size (MB)': 22.4,
- 'Description': 'Google ALBERT TINY parameters',
- },
- 'xlnet': {
- 'Size (MB)': 446.6,
- 'Description': 'Google XLNET BASE parameters',
- },
- 'alxlnet': {
- 'Size (MB)': 46.8,
- 'Description': 'Malaya ALXLNET BASE parameters',
- },
-}
-
def available_transformer():
"""
@@ -132,61 +112,3 @@ def load(model: str = 'electra', pool_mode: str = 'last', **kwargs):
from malaya.transformers.electra import load
return load(model = model, **kwargs)
-
-
-@check_type
-def load_standard_language(
- model: str = 'bert', pool_mode: str = 'last', **kwargs
-):
-
- """
- Load transformer model pretrained on standard language only.
-
- Parameters
- ----------
- model : str, optional (default='bert')
- Model architecture supported. Allowed values:
-
- * ``'bert'`` - Google BERT BASE parameters.
- * ``'albert'`` - Google ALBERT BASE parameters.
- * ``'tiny-albert'`` - Google ALBERT TINY parameters.
- * ``'xlnet'`` - Google XLNET BASE parameters.
- * ``'alxlnet'`` - Malaya ALXLNET BASE parameters.
-
- pool_mode : str, optional (default='last')
- Model logits architecture supported. Only usable if model in ['xlnet', 'alxlnet'].
- Allowed values:
-
- * ``'last'`` - last of the sequence.
- * ``'first'`` - first of the sequence.
- * ``'mean'`` - mean of the sequence.
- * ``'attn'`` - attention of the sequence.
-
- Returns
- -------
- result: malaya.transformers.* class
- """
- model = model.lower()
- pool_mode = pool_mode.lower()
- if model not in _standard_transformer_availability:
- raise ValueError(
- 'model not supported, please check supported models from `malaya.transformer.available_transformer_standard_language()`.'
- )
-
- if model in ['bert']:
- from malaya.transformers.bert import load
-
- return load(model = model, **kwargs)
- if model in ['albert', 'tiny-albert']:
- from malaya.transformers.albert import load
-
- return load(model = model, **kwargs)
- if model in ['xlnet']:
- from malaya.transformers.xlnet import load
-
- return load(model = model, pool_mode = pool_mode, **kwargs)
-
- if model in ['alxlnet']:
- from malaya.transformers.alxlnet import load
-
- return load(model = model, pool_mode = pool_mode, **kwargs)
diff --git a/malaya/translation/en_ms.py b/malaya/translation/en_ms.py
index d6d02638..70c5425c 100644
--- a/malaya/translation/en_ms.py
+++ b/malaya/translation/en_ms.py
@@ -4,9 +4,9 @@
from herpetologist import check_type
_transformer_availability = {
- 'small': {'Size (MB)': 42.7, 'Quantized Size (MB)': 13.1, 'BLEU': 0.142},
- 'base': {'Size (MB)': 234, 'Quantized Size (MB)': 63.8, 'BLEU': 0.696},
- 'large': {'Size (MB)': 817, 'Quantized Size (MB)': 254.6, 'BLEU': 0.699},
+ 'small': {'Size (MB)': 42.7, 'Quantized Size (MB)': 13.4, 'BLEU': 0.142},
+ 'base': {'Size (MB)': 234, 'Quantized Size (MB)': 82.7, 'BLEU': 0.696},
+ 'large': {'Size (MB)': 817, 'Quantized Size (MB)': 244, 'BLEU': 0.699},
}
diff --git a/malaya/translation/ms_en.py b/malaya/translation/ms_en.py
index 19165d00..cb7c2f42 100644
--- a/malaya/translation/ms_en.py
+++ b/malaya/translation/ms_en.py
@@ -4,9 +4,9 @@
from herpetologist import check_type
_transformer_availability = {
- 'small': {'Size (MB)': 42.7, 'Quantized Size (MB)': 13.1, 'BLEU': 0.626},
- 'base': {'Size (MB)': 234, 'Quantized Size (MB)': 63.8, 'BLEU': 0.792},
- 'large': {'Size (MB)': 815, 'Quantized Size (MB)': 254.6, 'BLEU': 0.714},
+ 'small': {'Size (MB)': 42.7, 'Quantized Size (MB)': 13.4, 'BLEU': 0.626},
+ 'base': {'Size (MB)': 234, 'Quantized Size (MB)': 82.7, 'BLEU': 0.792},
+ 'large': {'Size (MB)': 815, 'Quantized Size (MB)': 244, 'BLEU': 0.714},
}
diff --git a/session/quantization/quantize-generator-model.ipynb b/session/quantization/quantize-generator-model.ipynb
new file mode 100644
index 00000000..eced8311
--- /dev/null
+++ b/session/quantization/quantize-generator-model.ipynb
@@ -0,0 +1,358 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2020-11-16 11:22:24-- https://f000.backblazeb2.com/file/malaya-model/v38/generator/base.pb\n",
+ "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n",
+ "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 \n",
+ "Length: 1252668691 (1.2G) [application/octet-stream]\n",
+ "Saving to: ‘base.pb’\n",
+ "\n",
+ "base.pb 100%[===================>] 1.17G 13.0MB/s in 1m 46s \n",
+ "\n",
+ "2020-11-16 11:24:12 (11.3 MB/s) - ‘base.pb’ saved [1252668691/1252668691]\n",
+ "\n",
+ "--2020-11-16 11:24:12-- https://f000.backblazeb2.com/file/malaya-model/v38/generator/small.pb\n",
+ "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n",
+ "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 \n",
+ "Length: 355570391 (339M) [application/octet-stream]\n",
+ "Saving to: ‘small.pb’\n",
+ "\n",
+ "small.pb 100%[===================>] 339.10M 11.2MB/s in 31s \n",
+ "\n",
+ "2020-11-16 11:24:45 (11.1 MB/s) - ‘small.pb’ saved [355570391/355570391]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget https://f000.backblazeb2.com/file/malaya-model/v38/generator/base.pb\n",
+ "!wget https://f000.backblazeb2.com/file/malaya-model/v38/generator/small.pb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "from tensorflow.tools.graph_transforms import TransformGraph\n",
+ "from glob import glob\n",
+ "tf.set_random_seed(0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['small.pb', 'base.pb']"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pbs = glob('*.pb')\n",
+ "pbs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow_text\n",
+ "import tf_sentencepiece"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:From :12: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n",
+ "Instructions for updating:\n",
+ "Use tf.gfile.GFile.\n",
+ "small.pb\n",
+ "base.pb\n"
+ ]
+ }
+ ],
+ "source": [
+ "transforms = ['add_default_attributes',\n",
+ " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n",
+ " 'fold_constants(ignore_errors=true)',\n",
+ " 'fold_batch_norms',\n",
+ " 'fold_old_batch_norms',\n",
+ "# 'quantize_weights(fallback_min=-10, fallback_max=10)',\n",
+ " 'strip_unused_nodes',\n",
+ " 'sort_by_execution_order']\n",
+ "\n",
+ "for pb in pbs:\n",
+ " input_graph_def = tf.GraphDef()\n",
+ " with tf.gfile.FastGFile(pb, 'rb') as f:\n",
+ " input_graph_def.ParseFromString(f.read())\n",
+ " \n",
+ " print(pb)\n",
+ " \n",
+ " transformed_graph_def = TransformGraph(input_graph_def, \n",
+ " ['inputs'],\n",
+ " ['SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp'], transforms)\n",
+ " \n",
+ " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n",
+ " f.write(transformed_graph_def.SerializeToString())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_graph(frozen_graph_filename, **kwargs):\n",
+ " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n",
+ " graph_def = tf.GraphDef()\n",
+ " graph_def.ParseFromString(f.read())\n",
+ "\n",
+ " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n",
+ " # to fix import T5\n",
+ " for node in graph_def.node:\n",
+ " if node.op == 'RefSwitch':\n",
+ " node.op = 'Switch'\n",
+ " for index in xrange(len(node.input)):\n",
+ " if 'moving_' in node.input[index]:\n",
+ " node.input[index] = node.input[index] + '/read'\n",
+ " elif node.op == 'AssignSub':\n",
+ " node.op = 'Sub'\n",
+ " if 'use_locking' in node.attr:\n",
+ " del node.attr['use_locking']\n",
+ " elif node.op == 'AssignAdd':\n",
+ " node.op = 'Add'\n",
+ " if 'use_locking' in node.attr:\n",
+ " del node.attr['use_locking']\n",
+ " elif node.op == 'Assign':\n",
+ " node.op = 'Identity'\n",
+ " if 'use_locking' in node.attr:\n",
+ " del node.attr['use_locking']\n",
+ " if 'validate_shape' in node.attr:\n",
+ " del node.attr['validate_shape']\n",
+ " if len(node.input) == 2:\n",
+ " node.input[0] = node.input[1]\n",
+ " del node.input[1]\n",
+ "\n",
+ " with tf.Graph().as_default() as graph:\n",
+ " tf.import_graph_def(graph_def)\n",
+ " return graph"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# g = load_graph('base.pb.quantized')\n",
+ "# x = g.get_tensor_by_name('import/inputs:0')\n",
+ "# logits = g.get_tensor_by_name('import/SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp:0')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# x, x_len, logits"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test_sess = tf.InteractiveSession(graph = g)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%time\n",
+ "# test_sess.run(logits, feed_dict = {x: ['ringkasan: KUALA LUMPUR: Presiden Perancis Emmanuel Macron tidak menampakkan beliau seorang sosok yang bertamadun, selar Tun Dr Mahathir Mohamad menerusi kemas kini terbaharu di blognya. Bekas Perdana Menteri itu mendakwa, pemerintah tertinggi Perancis itu bersikap primitif kerana menuduh orang Islam terlibat dalam pembunuhan guru yang menghina Islam, malah menegaskan tindakan membunuh bukan ajaran Islam. Jelas Dr Mahathir, sejarah membuktikan bahawa orang Perancis pernah membunuh jutaan manusia, yang ramai mangsanya terdiri dari orang Islam.']})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%time\n",
+ "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['small.pb.quantized', 'base.pb.quantized']"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "quantized = glob('*.pb.quantized')\n",
+ "quantized"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm *.pb*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n",
+ "# graph_def_file='test.pb',\n",
+ "# input_arrays=['Placeholder', 'Placeholder_1'],\n",
+ "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n",
+ "# output_arrays=['logits'],\n",
+ "# )\n",
+ "# # converter.allow_custom_ops=True"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.target_spec.supported_types = [tf.float16]\n",
+ "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+ "# converter.experimental_new_converter = True\n",
+ "# tflite_model = converter.convert()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
+ "# tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.target_spec.supported_types = [tf.float16]\n",
+ "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+ "# tflite_model = converter.convert()\n",
+ "\n",
+ "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n",
+ "# f.write(tflite_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
+ "# tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
+ "# tflite_model = converter.convert()\n",
+ "\n",
+ "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n",
+ "# f.write(tflite_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n",
+ "# interpreter.allocate_tensors()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/session/quantization/quantize-translation-en-ms-model.ipynb b/session/quantization/quantize-translation-en-ms-model.ipynb
new file mode 100644
index 00000000..0eb900c6
--- /dev/null
+++ b/session/quantization/quantize-translation-en-ms-model.ipynb
@@ -0,0 +1,233 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !wget https://f000.backblazeb2.com/file/malaya-model/v38/translation/en-ms/base-translation.pb\n",
+ "# !wget https://f000.backblazeb2.com/file/malaya-model/v38/translation/en-ms/small-translation.pb\n",
+ "# !wget https://f000.backblazeb2.com/file/malaya-model/v38/translation/en-ms/large-translation.pb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "from tensorflow.tools.graph_transforms import TransformGraph\n",
+ "from glob import glob\n",
+ "tf.set_random_seed(0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['base-translation.pb', 'large-translation.pb', 'small-translation.pb']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pbs = glob('*.pb')\n",
+ "pbs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow_text\n",
+ "import tf_sentencepiece"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:From :12: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n",
+ "Instructions for updating:\n",
+ "Use tf.gfile.GFile.\n",
+ "base-translation.pb\n",
+ "large-translation.pb\n",
+ "small-translation.pb\n"
+ ]
+ }
+ ],
+ "source": [
+ "transforms = ['add_default_attributes',\n",
+ " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n",
+ " 'fold_constants(ignore_errors=true)',\n",
+ " 'fold_batch_norms',\n",
+ " 'fold_old_batch_norms',\n",
+ " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n",
+ " 'strip_unused_nodes',\n",
+ " 'sort_by_execution_order']\n",
+ "\n",
+ "for pb in pbs:\n",
+ " input_graph_def = tf.GraphDef()\n",
+ " with tf.gfile.FastGFile(pb, 'rb') as f:\n",
+ " input_graph_def.ParseFromString(f.read())\n",
+ " \n",
+ " print(pb)\n",
+ " \n",
+ " transformed_graph_def = TransformGraph(input_graph_def, \n",
+ " ['Placeholder'],\n",
+ " ['greedy', 'beam'], transforms)\n",
+ " \n",
+ " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n",
+ " f.write(transformed_graph_def.SerializeToString())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['base-translation.pb.quantized',\n",
+ " 'small-translation.pb.quantized',\n",
+ " 'large-translation.pb.quantized']"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "quantized = glob('*.pb.quantized')\n",
+ "quantized"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!rm *.pb*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n",
+ "# graph_def_file='test.pb',\n",
+ "# input_arrays=['Placeholder', 'Placeholder_1'],\n",
+ "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n",
+ "# output_arrays=['logits'],\n",
+ "# )\n",
+ "# # converter.allow_custom_ops=True"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.target_spec.supported_types = [tf.float16]\n",
+ "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+ "# converter.experimental_new_converter = True\n",
+ "# tflite_model = converter.convert()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
+ "# tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.target_spec.supported_types = [tf.float16]\n",
+ "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+ "# tflite_model = converter.convert()\n",
+ "\n",
+ "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n",
+ "# f.write(tflite_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
+ "# tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
+ "# tflite_model = converter.convert()\n",
+ "\n",
+ "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n",
+ "# f.write(tflite_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n",
+ "# interpreter.allocate_tensors()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/session/quantization/quantize-translation-ms-en-model.ipynb b/session/quantization/quantize-translation-ms-en-model.ipynb
new file mode 100644
index 00000000..38e95097
--- /dev/null
+++ b/session/quantization/quantize-translation-ms-en-model.ipynb
@@ -0,0 +1,241 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !wget https://f000.backblazeb2.com/file/malaya-model/v37/translation/ms-en/base-translation.pb\n",
+ "# !wget https://f000.backblazeb2.com/file/malaya-model/v37/translation/ms-en/small-translation.pb\n",
+ "# !wget https://f000.backblazeb2.com/file/malaya-model/v37/translation/ms-en/large-translation.pb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "from tensorflow.tools.graph_transforms import TransformGraph\n",
+ "from glob import glob\n",
+ "tf.set_random_seed(0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['base-translation.pb', 'large-translation.pb', 'small-translation.pb']"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pbs = glob('*.pb')\n",
+ "pbs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import tensorflow_text\n",
+ "import tf_sentencepiece"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:From :12: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n",
+ "Instructions for updating:\n",
+ "Use tf.gfile.GFile.\n",
+ "base-translation.pb\n",
+ "large-translation.pb\n",
+ "small-translation.pb\n"
+ ]
+ }
+ ],
+ "source": [
+ "transforms = ['add_default_attributes',\n",
+ " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n",
+ " 'fold_constants(ignore_errors=true)',\n",
+ " 'fold_batch_norms',\n",
+ " 'fold_old_batch_norms',\n",
+ " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n",
+ " 'strip_unused_nodes',\n",
+ " 'sort_by_execution_order']\n",
+ "\n",
+ "for pb in pbs:\n",
+ " input_graph_def = tf.GraphDef()\n",
+ " with tf.gfile.FastGFile(pb, 'rb') as f:\n",
+ " input_graph_def.ParseFromString(f.read())\n",
+ " \n",
+ " print(pb)\n",
+ " \n",
+ " transformed_graph_def = TransformGraph(input_graph_def, \n",
+ " ['Placeholder'],\n",
+ " ['greedy', 'beam'], transforms)\n",
+ " \n",
+ " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n",
+ " f.write(transformed_graph_def.SerializeToString())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['base-translation.pb.quantized',\n",
+ " 'small-translation.pb.quantized',\n",
+ " 'large-translation.pb.quantized']"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "quantized = glob('*.pb.quantized')\n",
+ "quantized"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rm: cannot remove '*.pb*': No such file or directory\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "!rm *.pb*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n",
+ "# graph_def_file='test.pb',\n",
+ "# input_arrays=['Placeholder', 'Placeholder_1'],\n",
+ "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n",
+ "# output_arrays=['logits'],\n",
+ "# )\n",
+ "# # converter.allow_custom_ops=True"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.target_spec.supported_types = [tf.float16]\n",
+ "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+ "# converter.experimental_new_converter = True\n",
+ "# tflite_model = converter.convert()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
+ "# tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.target_spec.supported_types = [tf.float16]\n",
+ "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+ "# tflite_model = converter.convert()\n",
+ "\n",
+ "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n",
+ "# f.write(tflite_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n",
+ "# tf.lite.OpsSet.SELECT_TF_OPS]\n",
+ "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
+ "# tflite_model = converter.convert()\n",
+ "\n",
+ "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n",
+ "# f.write(tflite_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n",
+ "# interpreter.allocate_tensors()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}