From e356fc9c4a7b728b536e2881020503f9bdd703eb Mon Sep 17 00:00:00 2001 From: husein zolkepli Date: Thu, 12 Nov 2020 16:17:46 +0800 Subject: [PATCH] added quantization notebooks --- .../quantize-constituency-model.ipynb | 471 +++++++++++++ .../quantize-dependency-model.ipynb | 354 ++++++++++ .../quantization/quantize-emotion-model.ipynb | 385 +++++++++++ .../quantization/quantize-entity-model.ipynb | 413 ++++++++++++ ...uantize-paraphrase-model-transformer.ipynb | 271 ++++++++ .../quantize-paraphrase-model.ipynb | 358 ++++++++++ session/quantization/quantize-pos-model.ipynb | 413 ++++++++++++ .../quantize-relevancy-model.ipynb | 397 +++++++++++ .../quantize-sentiment-model.ipynb | 409 ++++++++++++ .../quantize-similarity-model.ipynb | 623 ++++++++++++++++++ .../quantization/quantize-stem-model.ipynb | 197 ++++++ .../quantize-subjectivity-model.ipynb | 394 +++++++++++ ...zation-abstractive-model-transformer.ipynb | 421 ++++++++++++ ...tize-summarization-abstractive-model.ipynb | 386 +++++++++++ .../quantize-toxicity-model.ipynb | 394 +++++++++++ .../quantize-true-case-model.ipynb | 265 ++++++++ 16 files changed, 6151 insertions(+) create mode 100644 session/quantization/quantize-constituency-model.ipynb create mode 100644 session/quantization/quantize-dependency-model.ipynb create mode 100644 session/quantization/quantize-emotion-model.ipynb create mode 100644 session/quantization/quantize-entity-model.ipynb create mode 100644 session/quantization/quantize-paraphrase-model-transformer.ipynb create mode 100644 session/quantization/quantize-paraphrase-model.ipynb create mode 100644 session/quantization/quantize-pos-model.ipynb create mode 100644 session/quantization/quantize-relevancy-model.ipynb create mode 100644 session/quantization/quantize-sentiment-model.ipynb create mode 100644 session/quantization/quantize-similarity-model.ipynb create mode 100644 session/quantization/quantize-stem-model.ipynb create mode 100644 session/quantization/quantize-subjectivity-model.ipynb create mode 100644 session/quantization/quantize-summarization-abstractive-model-transformer.ipynb create mode 100644 session/quantization/quantize-summarization-abstractive-model.ipynb create mode 100644 session/quantization/quantize-toxicity-model.ipynb create mode 100644 session/quantization/quantize-true-case-model.ipynb diff --git a/session/quantization/quantize-constituency-model.ipynb b/session/quantization/quantize-constituency-model.ipynb new file mode 100644 index 00000000..fd4a3cd2 --- /dev/null +++ b/session/quantization/quantize-constituency-model.ipynb @@ -0,0 +1,471 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_CONSTITUENCY = {\n", + " 'bert': {\n", + " 'model': 'v38/constituency/bert-base.pb',\n", + " 'dictionary': 'v38/constituency/vocab-bert-base.json',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v38/constituency/tiny-bert.pb',\n", + " 'dictionary': 'v38/constituency/vocab-tiny-bert.json',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + "# 'albert': {\n", + "# 'model': 'v38/constituency/albert-base.pb',\n", + "# 'dictionary': 'v38/constituency/vocab-albert-base.json',\n", + "# 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + "# 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + "# },\n", + "# 'tiny-albert': {\n", + "# 'model': 'v38/constituency/albert-tiny.pb',\n", + "# 'dictionary': 'v38/constituency/vocab-albert-tiny.json',\n", + "# 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + "# 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + "# },\n", + "# 'xlnet': {\n", + "# 'model': 'v40/constituency/xlnet-base.pb',\n", + "# 'quantized': 'v40/constituency/xlnet-base.pb.quantized',\n", + "# 'dictionary': 'v40/constituency/vocab-xlnet-base.json',\n", + "# 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + "# 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + "# },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n" + ] + } + ], + "source": [ + "for k in S3_PATH_CONSTITUENCY.keys():\n", + " if k not in ['multinomial']:\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_CONSTITUENCY[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tiny-bert.pb',\n", + " 'albert-tiny-similarity.pb',\n", + " 'bert-base.pb',\n", + " 'xlnet-base-similarity.pb',\n", + " 'albert-base-similarity.pb',\n", + " 'bert-base-similarity.pb',\n", + " 'alxlnet-base-similarity.pb',\n", + " 'tiny-bert-similarity.pb']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,),\n", + " (,)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with tf.gfile.GFile('tiny-bert.pb', \"rb\") as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + "with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + "\n", + "op = graph.get_operations()\n", + "x = []\n", + "for i in op:\n", + " try:\n", + " if i.values()[0].shape[-1] == 312:\n", + " #if 'batchnorm/add_1' in i.values()[0].name:\n", + " x.append(i.values())\n", + " except:\n", + " pass\n", + " \n", + "x[-100:]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "mapping = {'albert-base.pb': 'import/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1:0',\n", + " 'albert-tiny.pb': 'import/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1:0',\n", + " 'bert-base.pb': 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0',\n", + " 'tiny-bert.pb': 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0',\n", + " 'xlnet-base.pb': 'import/model/transformer/layer_11/ff/LayerNorm/batchnorm/add_1:0'}" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[pbs[0]].replace('import/','').replace(':0','')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " a = [mapping[pb].replace('import/','').replace(':0','')]\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['input_ids', 'word_end_mask'],\n", + " ['charts', 'tags'] + a, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-base.pb.quantized',\n", + " 'xlnet-base.pb.quantized',\n", + " 'albert-tiny.pb.quantized',\n", + " 'tiny-bert.pb.quantized',\n", + " 'bert-base.pb.quantized']" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-dependency-model.ipynb b/session/quantization/quantize-dependency-model.ipynb new file mode 100644 index 00000000..cd3576dc --- /dev/null +++ b/session/quantization/quantize-dependency-model.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_DEPENDENCY = {\n", + " 'bert': {\n", + " 'model': 'v34/dependency/bert-base-dependency.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/dependency/tiny-bert-dependency.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/dependency/albert-base-dependency.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/dependency/albert-tiny-dependency.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/dependency/xlnet-base-dependency.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/dependency/alxlnet-base-dependency.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_DEPENDENCY.keys():\n", + " if k not in ['multinomial']:\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_DEPENDENCY[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['alxlnet-base-dependency.pb',\n", + " 'albert-tiny-dependency.pb',\n", + " 'albert-base-dependency.pb',\n", + " 'bert-base-dependency.pb',\n", + " 'xlnet-base-dependency.pb',\n", + " 'tiny-bert-dependency.pb']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# with tf.gfile.GFile('xlnet-base.pb', \"rb\") as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "\n", + "# op = graph.get_operations()\n", + "# x = []\n", + "# for i in op:\n", + "# try:\n", + "# #if i.values()[0].shape[-1] == 768:\n", + "# if 'batchnorm/add_1' in i.values()[0].name:\n", + "# x.append(i.values())\n", + "# except:\n", + "# pass\n", + " \n", + "# x[-100:]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder']\n", + " a = ['dense/BiasAdd']\n", + " if 'xlnet' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " a = ['transpose_3']\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits', 'heads_seq'] + a, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-base-dependency.pb.quantized',\n", + " 'albert-tiny-dependency.pb.quantized',\n", + " 'xlnet-base-dependency.pb.quantized',\n", + " 'bert-base-dependency.pb.quantized',\n", + " 'tiny-bert-dependency.pb.quantized',\n", + " 'alxlnet-base-dependency.pb.quantized']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-emotion-model.ipynb b/session/quantization/quantize-emotion-model.ipynb new file mode 100644 index 00000000..14686d0b --- /dev/null +++ b/session/quantization/quantize-emotion-model.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_EMOTION = {\n", + " 'multinomial': {\n", + " 'model': 'v34/emotion/multinomial.pkl',\n", + " 'vector': 'v34/emotion/tfidf.pkl',\n", + " 'bpe': 'v34/emotion/bpe.model',\n", + " },\n", + " 'bert': {\n", + " 'model': 'v34/emotion/bert-base-emotion.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/emotion/tiny-bert-emotion.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/emotion/albert-base-emotion.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/emotion/albert-tiny-emotion.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/emotion/xlnet-base-emotion.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/emotion/alxlnet-base-emotion.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_EMOTION.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_EMOTION[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bert-base-emotion.pb',\n", + " 'xlnet-base-emotion.pb',\n", + " 'alxlnet-base-emotion.pb',\n", + " 'albert-base-emotion.pb',\n", + " 'tiny-bert-emotion.pb',\n", + " 'albert-tiny-emotion.pb']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "bert-base-emotion.pb ['Placeholder', 'Placeholder_1']\n", + "xlnet-base-emotion.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "alxlnet-base-emotion.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-base-emotion.pb ['Placeholder', 'Placeholder_1']\n", + "tiny-bert-emotion.pb ['Placeholder', 'Placeholder_1']\n", + "albert-tiny-emotion.pb ['Placeholder', 'Placeholder_1']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " \n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits', 'logits_seq'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# def load_graph(frozen_graph_filename, **kwargs):\n", + "# with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + "# # to fix import T5\n", + "# for node in graph_def.node:\n", + "# if node.op == 'RefSwitch':\n", + "# node.op = 'Switch'\n", + "# for index in xrange(len(node.input)):\n", + "# if 'moving_' in node.input[index]:\n", + "# node.input[index] = node.input[index] + '/read'\n", + "# elif node.op == 'AssignSub':\n", + "# node.op = 'Sub'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'AssignAdd':\n", + "# node.op = 'Add'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'Assign':\n", + "# node.op = 'Identity'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# if 'validate_shape' in node.attr:\n", + "# del node.attr['validate_shape']\n", + "# if len(node.input) == 2:\n", + "# node.input[0] = node.input[1]\n", + "# del node.input[1]\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "# return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# g = load_graph('xlnet-base-emotion.pb.quantized')\n", + "# x = g.get_tensor_by_name('import/Placeholder:0')\n", + "# x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "# x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "# logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],\n", + "# x_len2: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bert-base-emotion.pb.quantized',\n", + " 'albert-base-emotion.pb.quantized',\n", + " 'xlnet-base-emotion.pb.quantized',\n", + " 'tiny-bert-emotion.pb.quantized',\n", + " 'alxlnet-base-emotion.pb.quantized',\n", + " 'albert-tiny-emotion.pb.quantized']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-entity-model.ipynb b/session/quantization/quantize-entity-model.ipynb new file mode 100644 index 00000000..7b2c81c6 --- /dev/null +++ b/session/quantization/quantize-entity-model.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_ENTITIES = {\n", + " 'bert': {\n", + " 'model': 'v34/entity/bert-base-entity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " 'setting': 'bert-bahasa/dictionary-entities.json',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/entity/tiny-bert-entity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " 'setting': 'bert-bahasa/dictionary-entities.json',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/entity/albert-base-entity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " 'setting': 'bert-bahasa/dictionary-entities.json',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/entity/albert-tiny-entity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " 'setting': 'bert-bahasa/dictionary-entities.json',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/entity/xlnet-base-entity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " 'setting': 'bert-bahasa/dictionary-entities.json',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/entity/alxlnet-base-entity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " 'setting': 'bert-bahasa/dictionary-entities.json',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_ENTITIES.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_ENTITIES[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['xlnet-base-entity.pb',\n", + " 'alxlnet-base-entity.pb',\n", + " 'albert-tiny-entity.pb',\n", + " 'tiny-bert-entity.pb',\n", + " 'bert-base-entity.pb',\n", + " 'albert-base-entity.pb']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "xlnet-base-entity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "alxlnet-base-entity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-tiny-entity.pb ['Placeholder', 'Placeholder_1']\n", + "tiny-bert-entity.pb ['Placeholder', 'Placeholder_1']\n", + "bert-base-entity.pb ['Placeholder', 'Placeholder_1']\n", + "albert-base-entity.pb ['Placeholder', 'Placeholder_1']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "g = load_graph('xlnet-base-entity.pb.quantized')\n", + "x = g.get_tensor_by_name('import/Placeholder:0')\n", + "x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.58 s, sys: 615 ms, total: 3.19 s\n", + "Wall time: 2.68 s\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[2, 2, 2, 0, 0]], dtype=int32)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],\n", + " x_len2: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bert-base-entity.pb.quantized',\n", + " 'tiny-bert-entity.pb.quantized',\n", + " 'alxlnet-base-entity.pb.quantized',\n", + " 'xlnet-base-entity.pb.quantized',\n", + " 'albert-tiny-entity.pb.quantized',\n", + " 'albert-base-entity.pb.quantized']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-paraphrase-model-transformer.ipynb b/session/quantization/quantize-paraphrase-model-transformer.ipynb new file mode 100644 index 00000000..8904584e --- /dev/null +++ b/session/quantization/quantize-paraphrase-model-transformer.ipynb @@ -0,0 +1,271 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# !wget https://f000.backblazeb2.com/file/malaya-model/v39/paraphrase/base.pb\n", + "# !wget https://f000.backblazeb2.com/file/malaya-model/v39/paraphrase/small.pb" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb', 'base.pb']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow_text\n", + "import tf_sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :12: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "small.pb\n", + "base.pb\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_constants(ignore_errors=true)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " print(pb)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['Placeholder', 'Placeholder_2'],\n", + " ['greedy', 'beam', 'nucleus'], transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb.quantized', 'base.pb.quantized']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-paraphrase-model.ipynb b/session/quantization/quantize-paraphrase-model.ipynb new file mode 100644 index 00000000..5cc9515c --- /dev/null +++ b/session/quantization/quantize-paraphrase-model.ipynb @@ -0,0 +1,358 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-11-12 15:47:30-- https://f000.backblazeb2.com/file/malaya-model/v38/paraphrase/base.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 1252315113 (1.2G) [application/octet-stream]\n", + "Saving to: ‘base.pb’\n", + "\n", + "base.pb 100%[===================>] 1.17G 9.45MB/s in 1m 58s \n", + "\n", + "2020-11-12 15:49:31 (10.1 MB/s) - ‘base.pb’ saved [1252315113/1252315113]\n", + "\n", + "--2020-11-12 15:49:31-- https://f000.backblazeb2.com/file/malaya-model/v38/paraphrase/small.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 355193291 (339M) [application/octet-stream]\n", + "Saving to: ‘small.pb’\n", + "\n", + "small.pb 100%[===================>] 338.74M 11.3MB/s in 31s \n", + "\n", + "2020-11-12 15:50:04 (11.0 MB/s) - ‘small.pb’ saved [355193291/355193291]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://f000.backblazeb2.com/file/malaya-model/v38/paraphrase/base.pb\n", + "!wget https://f000.backblazeb2.com/file/malaya-model/v38/paraphrase/small.pb" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb', 'base.pb']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow_text\n", + "import tf_sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :12: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "small.pb\n", + "base.pb\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_constants(ignore_errors=true)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + "# 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " print(pb)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['inputs'],\n", + " ['SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp'], transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# g = load_graph('base.pb.quantized')\n", + "# x = g.get_tensor_by_name('import/inputs:0')\n", + "# logits = g.get_tensor_by_name('import/SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# x" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: ['ringkasan: KUALA LUMPUR: Presiden Perancis Emmanuel Macron tidak menampakkan beliau seorang sosok yang bertamadun, selar Tun Dr Mahathir Mohamad menerusi kemas kini terbaharu di blognya. Bekas Perdana Menteri itu mendakwa, pemerintah tertinggi Perancis itu bersikap primitif kerana menuduh orang Islam terlibat dalam pembunuhan guru yang menghina Islam, malah menegaskan tindakan membunuh bukan ajaran Islam. Jelas Dr Mahathir, sejarah membuktikan bahawa orang Perancis pernah membunuh jutaan manusia, yang ramai mangsanya terdiri dari orang Islam.']})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb.quantized', 'base.pb.quantized']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-pos-model.ipynb b/session/quantization/quantize-pos-model.ipynb new file mode 100644 index 00000000..191df7f3 --- /dev/null +++ b/session/quantization/quantize-pos-model.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_POS = {\n", + " 'bert': {\n", + " 'model': 'v34/pos/bert-base-pos.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " 'setting': 'bert-bahasa/dictionary-pos.json',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/pos/tiny-bert-pos.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " 'setting': 'bert-bahasa/dictionary-pos.json',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/pos/albert-base-pos.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " 'setting': 'bert-bahasa/dictionary-pos.json',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/pos/albert-tiny-pos.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " 'setting': 'bert-bahasa/dictionary-pos.json',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/pos/xlnet-base-pos.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " 'setting': 'bert-bahasa/dictionary-pos.json',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/pos/alxlnet-base-pos.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " 'setting': 'bert-bahasa/dictionary-pos.json',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_POS.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_POS[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bert-base-pos.pb',\n", + " 'xlnet-base-pos.pb',\n", + " 'alxlnet-base-pos.pb',\n", + " 'albert-base-pos.pb',\n", + " 'tiny-bert-pos.pb',\n", + " 'albert-tiny-pos.pb']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "bert-base-pos.pb ['Placeholder', 'Placeholder_1']\n", + "xlnet-base-pos.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "alxlnet-base-pos.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-base-pos.pb ['Placeholder', 'Placeholder_1']\n", + "tiny-bert-pos.pb ['Placeholder', 'Placeholder_1']\n", + "albert-tiny-pos.pb ['Placeholder', 'Placeholder_1']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "g = load_graph('xlnet-base-pos.pb.quantized')\n", + "x = g.get_tensor_by_name('import/Placeholder:0')\n", + "x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.62 s, sys: 318 ms, total: 2.93 s\n", + "Wall time: 2.45 s\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[5, 1, 1, 1, 5]], dtype=int32)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],\n", + " x_len2: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['alxlnet-base-pos.pb.quantized',\n", + " 'xlnet-base-pos.pb.quantized',\n", + " 'bert-base-pos.pb.quantized',\n", + " 'tiny-bert-pos.pb.quantized',\n", + " 'albert-tiny-pos.pb.quantized',\n", + " 'albert-base-pos.pb.quantized']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-relevancy-model.ipynb b/session/quantization/quantize-relevancy-model.ipynb new file mode 100644 index 00000000..3c210530 --- /dev/null +++ b/session/quantization/quantize-relevancy-model.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_RELEVANCY = {\n", + " 'bert': {\n", + " 'model': 'v40/relevancy/bert-base-relevancy.pb',\n", + " 'quantized': 'v40/relevancy/bert-base-relevancy.pb.quantized',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v40/relevancy/tiny-bert-relevancy.pb',\n", + " 'quantized': 'v40/relevancy/tiny-bert-relevancy.pb.quantized',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v40/relevancy/albert-base-relevancy.pb',\n", + " 'quantized': 'v40/relevancy/albert-base-relevancy.pb.quantized',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v40/relevancy/albert-tiny-relevancy.pb',\n", + " 'quantized': 'v40/relevancy/albert-tiny-relevancy.pb.quantized',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v40/relevancy/xlnet-base-relevancy.pb',\n", + " 'quantized': 'v40/relevancy/xlnet-base-relevancy.pb.quantized',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v40/relevancy/alxlnet-base-relevancy.pb',\n", + " 'quantized': 'v40/relevancy/alxlnet-base-relevancy.pb.quantized',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_RELEVANCY.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_RELEVANCY[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bert-base-relevancy.pb',\n", + " 'tiny-bert-relevancy.pb',\n", + " 'xlnet-base-relevancy.pb',\n", + " 'albert-tiny-relevancy.pb',\n", + " 'albert-base-relevancy.pb',\n", + " 'alxlnet-base-relevancy.pb']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "bert-base-relevancy.pb ['Placeholder', 'Placeholder_1']\n", + "tiny-bert-relevancy.pb ['Placeholder', 'Placeholder_1']\n", + "xlnet-base-relevancy.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-tiny-relevancy.pb ['Placeholder', 'Placeholder_1']\n", + "albert-base-relevancy.pb ['Placeholder', 'Placeholder_1']\n", + "alxlnet-base-relevancy.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " \n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits', 'logits_seq'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# def load_graph(frozen_graph_filename, **kwargs):\n", + "# with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + "# # to fix import T5\n", + "# for node in graph_def.node:\n", + "# if node.op == 'RefSwitch':\n", + "# node.op = 'Switch'\n", + "# for index in xrange(len(node.input)):\n", + "# if 'moving_' in node.input[index]:\n", + "# node.input[index] = node.input[index] + '/read'\n", + "# elif node.op == 'AssignSub':\n", + "# node.op = 'Sub'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'AssignAdd':\n", + "# node.op = 'Add'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'Assign':\n", + "# node.op = 'Identity'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# if 'validate_shape' in node.attr:\n", + "# del node.attr['validate_shape']\n", + "# if len(node.input) == 2:\n", + "# node.input[0] = node.input[1]\n", + "# del node.input[1]\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "# return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# g = load_graph('xlnet-base-emotion.pb.quantized')\n", + "# x = g.get_tensor_by_name('import/Placeholder:0')\n", + "# x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "# x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "# logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],\n", + "# x_len2: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['xlnet-base-relevancy.pb.quantized',\n", + " 'alxlnet-base-relevancy.pb.quantized',\n", + " 'bert-base-relevancy.pb.quantized',\n", + " 'albert-tiny-relevancy.pb.quantized',\n", + " 'tiny-bert-relevancy.pb.quantized',\n", + " 'albert-base-relevancy.pb.quantized']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-sentiment-model.ipynb b/session/quantization/quantize-sentiment-model.ipynb new file mode 100644 index 00000000..418ef0d5 --- /dev/null +++ b/session/quantization/quantize-sentiment-model.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_SENTIMENT = {\n", + " 'multinomial': {\n", + " 'model': 'v34/sentiment/multinomial.pkl',\n", + " 'vector': 'v34/sentiment/tfidf.pkl',\n", + " 'bpe': 'v34/sentiment/bpe.model',\n", + " },\n", + " 'bert': {\n", + " 'model': 'v34/sentiment/bert-base-sentiment.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/sentiment/tiny-bert-sentiment.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " 'quantized': 'v40/sentiment/quantized-tiny-bert-sentiment.pb',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/sentiment/albert-base-sentiment.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/sentiment/albert-tiny-sentiment.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/sentiment/xlnet-base-sentiment.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/sentiment/alxlnet-base-sentiment.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_SENTIMENT.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_SENTIMENT[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-tiny-sentiment.pb',\n", + " 'xlnet-base-sentiment.pb',\n", + " 'albert-base-sentiment.pb',\n", + " 'tiny-bert-sentiment.pb',\n", + " 'bert-base-sentiment.pb',\n", + " 'alxlnet-base-sentiment.pb']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# with tf.gfile.GFile('alxlnet-base-sentiment.pb', \"rb\") as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "\n", + "# op = graph.get_operations()\n", + "# x = []\n", + "# for i in op:\n", + "# try:\n", + "# #if 'pooler' in i.values()[0].name:\n", + "# x.append(i.values())\n", + "# except:\n", + "# pass\n", + " \n", + "# x[-100:]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "albert-tiny-sentiment.pb ['Placeholder', 'Placeholder_1']\n", + "xlnet-base-sentiment.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-base-sentiment.pb ['Placeholder', 'Placeholder_1']\n", + "tiny-bert-sentiment.pb ['Placeholder', 'Placeholder_1']\n", + "bert-base-sentiment.pb ['Placeholder', 'Placeholder_1']\n", + "alxlnet-base-sentiment.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " \n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits', 'logits_seq'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# def load_graph(frozen_graph_filename, **kwargs):\n", + "# with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + "# # to fix import T5\n", + "# for node in graph_def.node:\n", + "# if node.op == 'RefSwitch':\n", + "# node.op = 'Switch'\n", + "# for index in xrange(len(node.input)):\n", + "# if 'moving_' in node.input[index]:\n", + "# node.input[index] = node.input[index] + '/read'\n", + "# elif node.op == 'AssignSub':\n", + "# node.op = 'Sub'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'AssignAdd':\n", + "# node.op = 'Add'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'Assign':\n", + "# node.op = 'Identity'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# if 'validate_shape' in node.attr:\n", + "# del node.attr['validate_shape']\n", + "# if len(node.input) == 2:\n", + "# node.input[0] = node.input[1]\n", + "# del node.input[1]\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "# return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# g = load_graph('test.pb')\n", + "# x = g.get_tensor_by_name('import/Placeholder:0')\n", + "# x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "# logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-base-sentiment.pb.quantized',\n", + " 'xlnet-base-sentiment.pb.quantized',\n", + " 'albert-tiny-sentiment.pb.quantized',\n", + " 'bert-base-sentiment.pb.quantized',\n", + " 'alxlnet-base-sentiment.pb.quantized',\n", + " 'tiny-bert-sentiment.pb.quantized']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-similarity-model.ipynb b/session/quantization/quantize-similarity-model.ipynb new file mode 100644 index 00000000..6fab7a24 --- /dev/null +++ b/session/quantization/quantize-similarity-model.ipynb @@ -0,0 +1,623 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_SIMILARITY = {\n", + " 'bert': {\n", + " 'model': 'v36/similarity/bert-base-similarity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v36/similarity/tiny-bert-similarity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v36/similarity/albert-base-similarity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v36/similarity/albert-tiny-similarity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v36/similarity/xlnet-base-similarity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v36/similarity/alxlnet-base-similarity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_SIMILARITY.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_SIMILARITY[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# with tf.gfile.GFile('tiny-bert-similarity.pb', \"rb\") as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "\n", + "# op = graph.get_operations()\n", + "# x = []\n", + "# for i in op:\n", + "# try:\n", + "# if i.values()[0].shape[-1] == 312:\n", + "# #if 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add' in i.values()[0].name:\n", + "# x.append(i.values())\n", + "# except Exception as e:\n", + "# pass\n", + " \n", + "# x[-100:]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "mapping = {'albert-base-similarity.pb': 'import/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1:0',\n", + " 'albert-tiny-similarity.pb': 'import/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1:0',\n", + " 'bert-base-similarity.pb': 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0',\n", + " 'tiny-bert-similarity.pb': 'import/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0',\n", + " 'xlnet-base-similarity.pb': 'import/model/transformer/layer_11/ff/LayerNorm/batchnorm/add_1:0',\n", + " 'alxlnet-base-similarity.pb': 'import/model/transformer/layer_shared_11/ff/LayerNorm/batchnorm/add_1:0'}" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-tiny-similarity.pb',\n", + " 'xlnet-base-similarity.pb',\n", + " 'albert-base-similarity.pb',\n", + " 'bert-base-similarity.pb',\n", + " 'alxlnet-base-similarity.pb',\n", + " 'tiny-bert-similarity.pb']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "albert-tiny-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "xlnet-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "bert-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "alxlnet-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "tiny-bert-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " \n", + " if 'bert' in pb:\n", + " outputs = ['logits', 'bert/pooler/dense/BiasAdd']\n", + " \n", + " if 'xlnet'in pb:\n", + " outputs = ['logits', 'model_1/sequnece_summary/summary/BiasAdd']\n", + " \n", + " a = [mapping[pb].replace('import/','').replace(':0','')]\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " outputs + a, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "g = load_graph('tiny-bert-similarity.pb')\n", + "x = g.get_tensor_by_name('import/Placeholder:0')\n", + "segment_ids = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "input_masks = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "logits = g.get_tensor_by_name(mapping['tiny-bert-similarity.pb'])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "logits" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/malaya/function/__init__.py:50: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", + "\n" + ] + } + ], + "source": [ + "import malaya\n", + "\n", + "model = malaya.similarity.transformer(model = 'alxlnet')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from malaya.text.bpe import xlnet_tokenization\n", + "import numpy as np\n", + "\n", + "r = xlnet_tokenization(model._tokenizer, ['benci', 'suka', 'hodoh la', 'sakai bodoh la la la la'])\n", + "batch_x = r[0]\n", + "batch_mask = r[1]\n", + "batch_segment = np.array(r[2])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 1, 2, 4, 4, 4, 4, 4, 4],\n", + " [1, 1, 2, 4, 4, 4, 4, 4, 4],\n", + " [1, 1, 1, 2, 4, 4, 4, 4, 4],\n", + " [1, 1, 1, 1, 1, 1, 1, 1, 2]])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch_segment[batch_segment == 0 ] = 1\n", + "batch_segment" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 9)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(batch_x).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 70 ms, sys: 4.86 ms, total: 74.9 ms\n", + "Wall time: 15.9 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "(36, 312)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "l = test_sess.run(logits, feed_dict = {x: batch_x,\n", + " segment_ids: batch_segment,\n", + " input_masks: batch_mask})\n", + "l.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(l.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 9, 312)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l.reshape((*np.array(batch_x).shape,-1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%time\n", + "l = test_sess.run(logits, feed_dict = {x: batch_x,\n", + " segment_ids: batch_segment,\n", + " input_masks: batch_mask})\n", + "l.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-base-similarity.pb.quantized',\n", + " 'albert-tiny-similarity.pb.quantized',\n", + " 'bert-base-similarity.pb.quantized',\n", + " 'xlnet-base-similarity.pb.quantized',\n", + " 'tiny-bert-similarity.pb.quantized',\n", + " 'alxlnet-base-similarity.pb.quantized']" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-stem-model.ipynb b/session/quantization/quantize-stem-model.ipynb new file mode 100644 index 00000000..1835d65f --- /dev/null +++ b/session/quantization/quantize-stem-model.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# !wget https://f000.backblazeb2.com/file/malaya-model/v34/stem/model.pb" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from tensorflow.contrib.seq2seq.python.ops import beam_search_ops\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['pretrained-speakernet.pb',\n", + " 'pretrained-vggvox-v1.pb',\n", + " 'pretrained-vggvox-v2.pb',\n", + " 'pretrained-deep-speaker.pb',\n", + " 'model.pb']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in ['model.pb']:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['Placeholder'],\n", + " ['decode_1/greedy', 'decode_2/beam'], transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-subjectivity-model.ipynb b/session/quantization/quantize-subjectivity-model.ipynb new file mode 100644 index 00000000..6a481b2d --- /dev/null +++ b/session/quantization/quantize-subjectivity-model.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_SUBJECTIVE = {\n", + " 'multinomial': {\n", + " 'model': 'v34/subjective/multinomial.pkl',\n", + " 'vector': 'v34/subjective/tfidf.pkl',\n", + " 'bpe': 'v34/subjective/bpe.model',\n", + " },\n", + " 'bert': {\n", + " 'model': 'v34/subjective/bert-base-subjective.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/subjective/tiny-bert-subjective.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/subjective/albert-base-subjective.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/subjective/albert-tiny-subjective.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/subjective/xlnet-base-subjective.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/subjective/alxlnet-base-subjective.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_SUBJECTIVE.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_SUBJECTIVE[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['albert-base-subjective.pb',\n", + " 'xlnet-base-subjective.pb',\n", + " 'albert-tiny-subjective.pb',\n", + " 'bert-base-subjective.pb',\n", + " 'alxlnet-base-subjective.pb',\n", + " 'tiny-bert-subjective.pb']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "albert-base-subjective.pb ['Placeholder', 'Placeholder_1']\n", + "xlnet-base-subjective.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-tiny-subjective.pb ['Placeholder', 'Placeholder_1']\n", + "bert-base-subjective.pb ['Placeholder', 'Placeholder_1']\n", + "alxlnet-base-subjective.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "tiny-bert-subjective.pb ['Placeholder', 'Placeholder_1']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " \n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits', 'logits_seq'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# def load_graph(frozen_graph_filename, **kwargs):\n", + "# with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + "# # to fix import T5\n", + "# for node in graph_def.node:\n", + "# if node.op == 'RefSwitch':\n", + "# node.op = 'Switch'\n", + "# for index in xrange(len(node.input)):\n", + "# if 'moving_' in node.input[index]:\n", + "# node.input[index] = node.input[index] + '/read'\n", + "# elif node.op == 'AssignSub':\n", + "# node.op = 'Sub'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'AssignAdd':\n", + "# node.op = 'Add'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'Assign':\n", + "# node.op = 'Identity'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# if 'validate_shape' in node.attr:\n", + "# del node.attr['validate_shape']\n", + "# if len(node.input) == 2:\n", + "# node.input[0] = node.input[1]\n", + "# del node.input[1]\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "# return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# g = load_graph('xlnet-base-emotion.pb.quantized')\n", + "# x = g.get_tensor_by_name('import/Placeholder:0')\n", + "# x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "# x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "# logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],\n", + "# x_len2: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['xlnet-base-subjective.pb.quantized',\n", + " 'alxlnet-base-subjective.pb.quantized',\n", + " 'albert-base-subjective.pb.quantized',\n", + " 'bert-base-subjective.pb.quantized',\n", + " 'albert-tiny-subjective.pb.quantized',\n", + " 'tiny-bert-subjective.pb.quantized']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-summarization-abstractive-model-transformer.ipynb b/session/quantization/quantize-summarization-abstractive-model-transformer.ipynb new file mode 100644 index 00000000..c16c6b16 --- /dev/null +++ b/session/quantization/quantize-summarization-abstractive-model-transformer.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-11-01 22:13:38-- https://f000.backblazeb2.com/file/malaya-model/v39/summarization/base.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 831717841 (793M) [application/octet-stream]\n", + "Saving to: ‘base.pb’\n", + "\n", + "base.pb 100%[===================>] 793.19M 14.1MB/s in 76s \n", + "\n", + "2020-11-01 22:14:55 (10.5 MB/s) - ‘base.pb’ saved [831717841/831717841]\n", + "\n", + "--2020-11-01 22:14:56-- https://f000.backblazeb2.com/file/malaya-model/v39/summarization/small.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 378870799 (361M) [application/octet-stream]\n", + "Saving to: ‘small.pb’\n", + "\n", + "small.pb 100%[===================>] 361.32M 12.2MB/s in 38s \n", + "\n", + "2020-11-01 22:15:35 (9.61 MB/s) - ‘small.pb’ saved [378870799/378870799]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://f000.backblazeb2.com/file/malaya-model/v39/summarization/base.pb\n", + "!wget https://f000.backblazeb2.com/file/malaya-model/v39/summarization/small.pb" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb', 'base.pb']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow_text\n", + "import tf_sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "small.pb\n", + "base.pb\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_constants(ignore_errors=true)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " print(pb)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['Placeholder', 'Placeholder_2'],\n", + " ['greedy', 'beam', 'nucleus'], transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "downloading frozen /home/husein/Malaya/summarize/transformer/small model\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "362MB [00:29, 12.4MB/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "downloading frozen /home/husein/Malaya/summarize/transformer/small vocab\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "132%|██████████| 1.00/0.76 [00:01<00:00, 1.04s/MB]\n", + "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/malaya/function/__init__.py:50: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", + "\n", + "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/malaya/function/__init__.py:65: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead.\n", + "\n" + ] + } + ], + "source": [ + "import malaya\n", + "\n", + "model = malaya.summarization.abstractive.transformer(model = 'small')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "string = 'KUALA LUMPUR: Presiden Perancis Emmanuel Macron tidak menampakkan beliau seorang sosok yang bertamadun, selar Tun Dr Mahathir Mohamad menerusi kemas kini terbaharu di blognya. Bekas Perdana Menteri itu mendakwa, pemerintah tertinggi Perancis itu bersikap primitif kerana menuduh orang Islam terlibat dalam pembunuhan guru yang menghina Islam, malah menegaskan tindakan membunuh bukan ajaran Islam. Jelas Dr Mahathir, sejarah membuktikan bahawa orang Perancis pernah membunuh jutaan manusia, yang ramai mangsanya terdiri dari orang Islam.'" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Presiden Perancis tidak menampakkan figur seorang lelaki yang bertanggungjawab']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summarize([string])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "e = model._tokenizer.encode(f'ringkasan: {string}')\n", + "e = e + [1]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "g = load_graph('small.pb.quantized')\n", + "x = g.get_tensor_by_name('import/Placeholder:0')\n", + "logits = g.get_tensor_by_name('import/greedy:0')\n", + "test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "l = test_sess.run(logits, feed_dict = {x: [e]})" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Presiden Perancis Emmanuel Macron tidak menampakkan sosok yang menyimpang, selar Tun M'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model._tokenizer.decode(l[0].tolist())" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb.quantized', 'base.pb.quantized']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-summarization-abstractive-model.ipynb b/session/quantization/quantize-summarization-abstractive-model.ipynb new file mode 100644 index 00000000..75f0248e --- /dev/null +++ b/session/quantization/quantize-summarization-abstractive-model.ipynb @@ -0,0 +1,386 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-11-01 17:33:39-- https://f000.backblazeb2.com/file/malaya-model/v38/summarize/base.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 1252381816 (1.2G) [application/octet-stream]\n", + "Saving to: ‘base.pb’\n", + "\n", + "base.pb 100%[===================>] 1.17G 7.82MB/s in 1m 50s \n", + "\n", + "2020-11-01 17:35:31 (10.9 MB/s) - ‘base.pb’ saved [1252381816/1252381816]\n", + "\n", + "--2020-11-01 17:35:32-- https://f000.backblazeb2.com/file/malaya-model/v38/summarize/small.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 355538101 (339M) [application/octet-stream]\n", + "Saving to: ‘small.pb’\n", + "\n", + "small.pb 100%[===================>] 339.07M 13.9MB/s in 29s \n", + "\n", + "2020-11-01 17:36:03 (11.7 MB/s) - ‘small.pb’ saved [355538101/355538101]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://f000.backblazeb2.com/file/malaya-model/v38/summarize/base.pb\n", + "!wget https://f000.backblazeb2.com/file/malaya-model/v38/summarize/small.pb" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb', 'base.pb']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow_text\n", + "import tf_sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "small.pb\n", + "base.pb\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_constants(ignore_errors=true)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + "# 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " print(pb)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['inputs'],\n", + " ['SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp'], transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "def load_graph(frozen_graph_filename, **kwargs):\n", + " with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + " graph_def = tf.GraphDef()\n", + " graph_def.ParseFromString(f.read())\n", + "\n", + " # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + " # to fix import T5\n", + " for node in graph_def.node:\n", + " if node.op == 'RefSwitch':\n", + " node.op = 'Switch'\n", + " for index in xrange(len(node.input)):\n", + " if 'moving_' in node.input[index]:\n", + " node.input[index] = node.input[index] + '/read'\n", + " elif node.op == 'AssignSub':\n", + " node.op = 'Sub'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'AssignAdd':\n", + " node.op = 'Add'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " elif node.op == 'Assign':\n", + " node.op = 'Identity'\n", + " if 'use_locking' in node.attr:\n", + " del node.attr['use_locking']\n", + " if 'validate_shape' in node.attr:\n", + " del node.attr['validate_shape']\n", + " if len(node.input) == 2:\n", + " node.input[0] = node.input[1]\n", + " del node.input[1]\n", + "\n", + " with tf.Graph().as_default() as graph:\n", + " tf.import_graph_def(graph_def)\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "g = load_graph('base.pb.quantized')\n", + "x = g.get_tensor_by_name('import/inputs:0')\n", + "logits = g.get_tensor_by_name('import/SentenceTokenizer_1/SentenceTokenizer/SentencepieceDetokenizeOp:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1min 5s, sys: 12.8 s, total: 1min 18s\n", + "Wall time: 14.8 s\n" + ] + }, + { + "data": { + "text/plain": [ + "array([b'Presiden Perancis Emmanuel Macron tidak menunjukkan dia seorang yang bertamadun, kata Dr Mahathir. Macron mengatakan kerajaannya bersikap primitif dalam menuduh orang Islam melakukan pembunuhan. Dr Mahathir: Sejarah membuktikan bahawa orang Perancis pernah membunuh berjuta-juta orang'],\n", + " dtype=object)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "test_sess.run(logits, feed_dict = {x: ['ringkasan: KUALA LUMPUR: Presiden Perancis Emmanuel Macron tidak menampakkan beliau seorang sosok yang bertamadun, selar Tun Dr Mahathir Mohamad menerusi kemas kini terbaharu di blognya. Bekas Perdana Menteri itu mendakwa, pemerintah tertinggi Perancis itu bersikap primitif kerana menuduh orang Islam terlibat dalam pembunuhan guru yang menghina Islam, malah menegaskan tindakan membunuh bukan ajaran Islam. Jelas Dr Mahathir, sejarah membuktikan bahawa orang Perancis pernah membunuh jutaan manusia, yang ramai mangsanya terdiri dari orang Islam.']})" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb.quantized', 'base.pb.quantized']" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-toxicity-model.ipynb b/session/quantization/quantize-toxicity-model.ipynb new file mode 100644 index 00000000..44e33da9 --- /dev/null +++ b/session/quantization/quantize-toxicity-model.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "S3_PATH_TOXIC = {\n", + " 'multinomial': {\n", + " 'model': 'v34/toxicity/multinomial.pkl',\n", + " 'vector': 'v34/toxicity/tfidf.pkl',\n", + " 'bpe': 'v34/toxicity/bpe.model',\n", + " },\n", + " 'bert': {\n", + " 'model': 'v34/toxicity/bert-base-toxicity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'tiny-bert': {\n", + " 'model': 'v34/toxicity/tiny-bert-toxicity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.bert.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.bert.model',\n", + " },\n", + " 'albert': {\n", + " 'model': 'v34/toxicity/albert-base-toxicity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'tiny-albert': {\n", + " 'model': 'v34/toxicity/albert-tiny-toxicity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v10.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v10.model',\n", + " },\n", + " 'xlnet': {\n", + " 'model': 'v34/toxicity/xlnet-base-toxicity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + " 'alxlnet': {\n", + " 'model': 'v34/toxicity/alxlnet-base-toxicity.pb',\n", + " 'vocab': 'tokenizer/sp10m.cased.v9.vocab',\n", + " 'tokenizer': 'tokenizer/sp10m.cased.v9.model',\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert\n", + "tiny-bert\n", + "albert\n", + "tiny-albert\n", + "xlnet\n", + "alxlnet\n" + ] + } + ], + "source": [ + "for k in S3_PATH_TOXIC.keys():\n", + " if k != 'multinomial':\n", + " print(k)\n", + " os.system(f\"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_TOXIC[k]['model']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tiny-bert-toxicity.pb',\n", + " 'alxlnet-base-toxicity.pb',\n", + " 'albert-tiny-toxicity.pb',\n", + " 'xlnet-base-toxicity.pb',\n", + " 'albert-base-toxicity.pb',\n", + " 'bert-base-toxicity.pb']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "tiny-bert-toxicity.pb ['Placeholder', 'Placeholder_1']\n", + "alxlnet-base-toxicity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-tiny-toxicity.pb ['Placeholder', 'Placeholder_1']\n", + "xlnet-base-toxicity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + "albert-base-toxicity.pb ['Placeholder', 'Placeholder_1']\n", + "bert-base-toxicity.pb ['Placeholder', 'Placeholder_1']\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " if 'bert' in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1']\n", + " outputs = ['dense/BiasAdd']\n", + " \n", + " if 'xlnet'in pb:\n", + " inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']\n", + " outputs = ['transpose_3']\n", + " \n", + " print(pb, inputs)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " inputs,\n", + " ['logits', 'logits_seq'] + outputs, transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# def load_graph(frozen_graph_filename, **kwargs):\n", + "# with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:\n", + "# graph_def = tf.GraphDef()\n", + "# graph_def.ParseFromString(f.read())\n", + "\n", + "# # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091\n", + "# # to fix import T5\n", + "# for node in graph_def.node:\n", + "# if node.op == 'RefSwitch':\n", + "# node.op = 'Switch'\n", + "# for index in xrange(len(node.input)):\n", + "# if 'moving_' in node.input[index]:\n", + "# node.input[index] = node.input[index] + '/read'\n", + "# elif node.op == 'AssignSub':\n", + "# node.op = 'Sub'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'AssignAdd':\n", + "# node.op = 'Add'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# elif node.op == 'Assign':\n", + "# node.op = 'Identity'\n", + "# if 'use_locking' in node.attr:\n", + "# del node.attr['use_locking']\n", + "# if 'validate_shape' in node.attr:\n", + "# del node.attr['validate_shape']\n", + "# if len(node.input) == 2:\n", + "# node.input[0] = node.input[1]\n", + "# del node.input[1]\n", + "\n", + "# with tf.Graph().as_default() as graph:\n", + "# tf.import_graph_def(graph_def)\n", + "# return graph" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# g = load_graph('xlnet-base-emotion.pb.quantized')\n", + "# x = g.get_tensor_by_name('import/Placeholder:0')\n", + "# x_len = g.get_tensor_by_name('import/Placeholder_1:0')\n", + "# x_len2 = g.get_tensor_by_name('import/Placeholder_2:0')\n", + "# logits = g.get_tensor_by_name('import/logits:0')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# x, x_len, logits" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# test_sess = tf.InteractiveSession(graph = g)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]],\n", + "# x_len2: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# %%time\n", + "# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tiny-bert-toxicity.pb.quantized',\n", + " 'bert-base-toxicity.pb.quantized',\n", + " 'alxlnet-base-toxicity.pb.quantized',\n", + " 'albert-tiny-toxicity.pb.quantized',\n", + " 'xlnet-base-toxicity.pb.quantized',\n", + " 'albert-base-toxicity.pb.quantized']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/session/quantization/quantize-true-case-model.ipynb b/session/quantization/quantize-true-case-model.ipynb new file mode 100644 index 00000000..a71fa319 --- /dev/null +++ b/session/quantization/quantize-true-case-model.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_VISIBLE_DEVICES'] = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-11-12 14:47:18-- https://f000.backblazeb2.com/file/malaya-model/v39/true-case/base.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 245299065 (234M) [application/octet-stream]\n", + "Saving to: ‘base.pb’\n", + "\n", + "base.pb 100%[===================>] 233.93M 13.4MB/s in 19s \n", + "\n", + "2020-11-12 14:47:39 (12.2 MB/s) - ‘base.pb’ saved [245299065/245299065]\n", + "\n", + "--2020-11-12 14:47:39-- https://f000.backblazeb2.com/file/malaya-model/v39/true-case/small.pb\n", + "Resolving f000.backblazeb2.com (f000.backblazeb2.com)... 104.153.233.177\n", + "Connecting to f000.backblazeb2.com (f000.backblazeb2.com)|104.153.233.177|:443... connected.\n", + "HTTP request sent, awaiting response... 200 \n", + "Length: 48764573 (47M) [application/octet-stream]\n", + "Saving to: ‘small.pb’\n", + "\n", + "small.pb 100%[===================>] 46.50M 13.4MB/s in 4.5s \n", + "\n", + "2020-11-12 14:47:46 (10.3 MB/s) - ‘small.pb’ saved [48764573/48764573]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://f000.backblazeb2.com/file/malaya-model/v39/true-case/base.pb\n", + "!wget https://f000.backblazeb2.com/file/malaya-model/v39/true-case/small.pb" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.tools.graph_transforms import TransformGraph\n", + "from glob import glob\n", + "tf.set_random_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb', 'base.pb']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pbs = glob('*.pb')\n", + "pbs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow_text\n", + "import tf_sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From :11: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.gfile.GFile.\n", + "small.pb\n", + "base.pb\n" + ] + } + ], + "source": [ + "transforms = ['add_default_attributes',\n", + " 'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',\n", + " 'fold_batch_norms',\n", + " 'fold_old_batch_norms',\n", + " 'quantize_weights(fallback_min=-10, fallback_max=10)',\n", + " 'strip_unused_nodes',\n", + " 'sort_by_execution_order']\n", + "\n", + "for pb in pbs:\n", + " input_graph_def = tf.GraphDef()\n", + " with tf.gfile.FastGFile(pb, 'rb') as f:\n", + " input_graph_def.ParseFromString(f.read())\n", + " \n", + " print(pb)\n", + " \n", + " transformed_graph_def = TransformGraph(input_graph_def, \n", + " ['Placeholder'],\n", + " ['greedy', 'beam'], transforms)\n", + " \n", + " with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:\n", + " f.write(transformed_graph_def.SerializeToString())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['small.pb.quantized', 'base.pb.quantized']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantized = glob('*.pb.quantized')\n", + "quantized" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rm: cannot remove '*.pb*': No such file or directory\r\n" + ] + } + ], + "source": [ + "!rm *.pb*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(\n", + "# graph_def_file='test.pb',\n", + "# input_arrays=['Placeholder', 'Placeholder_1'],\n", + "# input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},\n", + "# output_arrays=['logits'],\n", + "# )\n", + "# # converter.allow_custom_ops=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# converter.experimental_new_converter = True\n", + "# tflite_model = converter.convert()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.target_spec.supported_types = [tf.float16]\n", + "# converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, \n", + "# tf.lite.OpsSet.SELECT_TF_OPS]\n", + "# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n", + "# tflite_model = converter.convert()\n", + "\n", + "# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:\n", + "# f.write(tflite_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')\n", + "# interpreter.allocate_tensors()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}