diff --git a/notebooks/03_anatomy_of_a_quantizer.ipynb b/notebooks/03_anatomy_of_a_quantizer.ipynb index b31bae5e4..21a0b54f4 100644 --- a/notebooks/03_anatomy_of_a_quantizer.ipynb +++ b/notebooks/03_anatomy_of_a_quantizer.ipynb @@ -248,10 +248,10 @@ { "data": { "text/plain": [ - "(tensor([[-0.1000, 0.1000, -0.1000, 0.1000],\n", - " [ 0.1000, 0.1000, 0.1000, 0.1000],\n", + "(tensor([[-0.1000, -0.1000, 0.1000, 0.1000],\n", " [ 0.1000, -0.1000, 0.1000, 0.1000],\n", - " [-0.1000, -0.1000, -0.1000, 0.1000]], grad_fn=),\n", + " [-0.1000, -0.1000, 0.1000, 0.1000],\n", + " [ 0.1000, -0.1000, -0.1000, -0.1000]], grad_fn=),\n", " tensor(0.1000, grad_fn=),\n", " tensor(0.),\n", " tensor(1.))" @@ -293,10 +293,10 @@ { "data": { "text/plain": [ - "(tensor([[ 0.1000, 0.1000, 0.1000, 0.1000],\n", - " [ 0.1000, -0.1000, 0.1000, 0.1000],\n", - " [ 0.1000, 0.1000, 0.1000, -0.1000],\n", - " [-0.1000, 0.1000, 0.1000, -0.1000]], grad_fn=),\n", + "(tensor([[ 0.1000, -0.1000, -0.1000, -0.1000],\n", + " [ 0.1000, -0.1000, -0.1000, 0.1000],\n", + " [-0.1000, -0.1000, -0.1000, 0.1000],\n", + " [ 0.1000, -0.1000, 0.1000, 0.1000]], grad_fn=),\n", " tensor(0.1000, grad_fn=),\n", " tensor(0.),\n", " tensor(1.))" @@ -343,10 +343,10 @@ { "data": { "text/plain": [ - "(tensor([[-1., 1., 1., -1.],\n", - " [-1., 1., -1., 1.],\n", - " [-1., 1., 1., 1.],\n", - " [ 1., 1., 1., 1.]], grad_fn=),\n", + "(tensor([[-1., -1., -1., 1.],\n", + " [-1., 1., -1., -1.],\n", + " [-1., -1., -1., -1.],\n", + " [-1., 1., -1., 1.]], grad_fn=),\n", " tensor(1., grad_fn=),\n", " tensor(0.),\n", " tensor(1.))" @@ -380,10 +380,10 @@ { "data": { "text/plain": [ - "(tensor([[ 0.1000, -0.1000, -0.1000, 0.1000],\n", - " [ 0.1000, 0.1000, -0.1000, 0.1000],\n", - " [-0.1000, 0.1000, 0.1000, -0.1000],\n", - " [-0.1000, -0.1000, 0.1000, 0.1000]], grad_fn=),\n", + "(tensor([[-0.1000, 0.1000, -0.1000, -0.1000],\n", + " [-0.1000, -0.1000, 0.1000, -0.1000],\n", + " [-0.1000, -0.1000, -0.1000, 0.1000],\n", + " [-0.1000, 0.1000, -0.1000, 0.1000]], grad_fn=),\n", " tensor(0.1000, grad_fn=),\n", " tensor(0.),\n", " tensor(1.))" @@ -445,46 +445,22 @@ "cell_type": "code", "execution_count": 11, "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'NoneType' object cannot be interpreted as an integer", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/giuseppe/Documents/git/brevitas/notebooks/03_anatomy_of_a_quantizer.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbrevitas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mnn\u001b[39;00m \u001b[39mimport\u001b[39;00m QuantConv2d\n\u001b[1;32m 3\u001b[0m binary_weight_quant_conv \u001b[39m=\u001b[39m QuantConv2d(\u001b[39m3\u001b[39m, \u001b[39m2\u001b[39m, (\u001b[39m3\u001b[39m,\u001b[39m3\u001b[39m), weight_quant\u001b[39m=\u001b[39mMyBinaryWeightQuantizer)\n\u001b[0;32m----> 4\u001b[0m quant_weight \u001b[39m=\u001b[39m binary_weight_quant_conv\u001b[39m.\u001b[39;49mquant_weight()\n\u001b[1;32m 5\u001b[0m quant_weight\n", - "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/mixin/parameter.py:103\u001b[0m, in \u001b[0;36mQuantWeightMixin.quant_weight\u001b[0;34m(self, quant_input, subtensor_slice_list)\u001b[0m\n\u001b[1;32m 100\u001b[0m out \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mweight_quant(\n\u001b[1;32m 101\u001b[0m weights_to_quantize[weight_slice_tuple], input_bit_width, input_is_signed)\n\u001b[1;32m 102\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 103\u001b[0m out \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mweight_quant(weights_to_quantize[weight_slice_tuple])\n\u001b[1;32m 104\u001b[0m \u001b[39mif\u001b[39;00m subtensor_slice_list \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 105\u001b[0m \u001b[39m# Restore the quantizer behaviour to full tensor quantization\u001b[39;00m\n\u001b[1;32m 106\u001b[0m \u001b[39m# The modules to slice should have been cached already at this point\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cached_sub_tensor_slice_list_modules \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m, \u001b[39m\"\u001b[39m\u001b[39mMissing cache of modules to slice.\u001b[39m\u001b[39m\"\u001b[39m\n", - "File \u001b[0;32m~/miniconda3/envs/torch_2.1/lib/python3.11/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/miniconda3/envs/torch_2.1/lib/python3.11/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/proxy/parameter_quant.py:101\u001b[0m, in \u001b[0;36mWeightQuantProxyFromInjector.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 99\u001b[0m impl \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexport_handler \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexport_mode \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtensor_quant\n\u001b[1;32m 100\u001b[0m out, scale, zero_point, bit_width \u001b[39m=\u001b[39m impl(x)\n\u001b[0;32m--> 101\u001b[0m \u001b[39mreturn\u001b[39;00m QuantTensor(out, scale, zero_point, bit_width, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mis_signed, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtraining)\n\u001b[1;32m 102\u001b[0m \u001b[39melse\u001b[39;00m: \u001b[39m# quantization disabled\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[39mreturn\u001b[39;00m x\n", - "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/quant_tensor/__init__.py:71\u001b[0m, in \u001b[0;36mQuantTensor.__new__\u001b[0;34m(cls, value, scale, zero_point, bit_width, signed, training)\u001b[0m\n\u001b[1;32m 69\u001b[0m bit_width \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mtensor(bit_width, dtype\u001b[39m=\u001b[39mtorch\u001b[39m.\u001b[39mfloat)\n\u001b[1;32m 70\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(signed, torch\u001b[39m.\u001b[39mTensor):\n\u001b[0;32m---> 71\u001b[0m signed \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39;49mtensor(signed, dtype\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mbool)\n\u001b[1;32m 72\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(training, torch\u001b[39m.\u001b[39mTensor):\n\u001b[1;32m 73\u001b[0m training \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mtensor(training, dtype\u001b[39m=\u001b[39mtorch\u001b[39m.\u001b[39mbool)\n", - "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object cannot be interpreted as an integer" - ] - } - ], + "outputs": [], "source": [ "from brevitas.nn import QuantConv2d\n", "\n", "binary_weight_quant_conv = QuantConv2d(3, 2, (3,3), weight_quant=MyBinaryWeightQuantizer)\n", - "quant_weight = binary_weight_quant_conv.quant_weight()\n", - "quant_weight" + "try:\n", + " quant_weight = binary_weight_quant_conv.quant_weight()\n", + "except TypeError:\n", + " pass\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note however how the `QuantTensor` is not properly formed, as the `signed` attribute is `None`. This means that `quant_weight` is not considered valid, as the affine quantization invariant cannot be computed:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "assert not quant_weight.is_valid" + "Note however that we cannot compute the quantized weight, as the `signed` attribute is `None`." ] }, { @@ -502,30 +478,30 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[[[ 0.1000, -0.1000, 0.1000],\n", - " [ 0.1000, -0.1000, 0.1000],\n", - " [-0.1000, -0.1000, 0.1000]],\n", + "QuantTensor(value=tensor([[[[-0.1000, -0.1000, 0.1000],\n", + " [ 0.1000, 0.1000, 0.1000],\n", + " [ 0.1000, -0.1000, 0.1000]],\n", "\n", - " [[ 0.1000, -0.1000, 0.1000],\n", - " [-0.1000, 0.1000, -0.1000],\n", + " [[-0.1000, 0.1000, -0.1000],\n", + " [-0.1000, -0.1000, -0.1000],\n", " [-0.1000, 0.1000, -0.1000]],\n", "\n", " [[ 0.1000, 0.1000, 0.1000],\n", " [-0.1000, 0.1000, 0.1000],\n", - " [ 0.1000, 0.1000, -0.1000]]],\n", - "\n", + " [-0.1000, -0.1000, 0.1000]]],\n", "\n", - " [[[ 0.1000, 0.1000, 0.1000],\n", - " [ 0.1000, -0.1000, -0.1000],\n", - " [-0.1000, 0.1000, -0.1000]],\n", "\n", - " [[ 0.1000, 0.1000, -0.1000],\n", - " [-0.1000, 0.1000, 0.1000],\n", + " [[[-0.1000, 0.1000, -0.1000],\n", + " [-0.1000, -0.1000, 0.1000],\n", " [-0.1000, 0.1000, 0.1000]],\n", "\n", " [[-0.1000, -0.1000, -0.1000],\n", " [ 0.1000, 0.1000, 0.1000],\n", - " [-0.1000, 0.1000, -0.1000]]]], grad_fn=), scale=tensor(0.1000, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + " [-0.1000, -0.1000, 0.1000]],\n", + "\n", + " [[ 0.1000, -0.1000, -0.1000],\n", + " [-0.1000, 0.1000, -0.1000],\n", + " [ 0.1000, 0.1000, -0.1000]]]], grad_fn=), scale=tensor(0.1000, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, "execution_count": 12, @@ -562,39 +538,39 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[[[ 0.1000, -0.1000, 0.1000],\n", - " [-0.1000, -0.1000, -0.1000],\n", - " [ 0.1000, 0.1000, 0.1000]],\n", + "QuantTensor(value=tensor([[[[ 0.1000, 0.1000, 0.1000],\n", + " [ 0.1000, -0.1000, 0.1000],\n", + " [-0.1000, 0.1000, -0.1000]],\n", "\n", - " [[ 0.1000, 0.1000, 0.1000],\n", - " [-0.1000, 0.1000, -0.1000],\n", - " [ 0.1000, -0.1000, -0.1000]],\n", + " [[-0.1000, -0.1000, -0.1000],\n", + " [ 0.1000, -0.1000, 0.1000],\n", + " [-0.1000, 0.1000, -0.1000]],\n", "\n", - " [[ 0.1000, -0.1000, 0.1000],\n", - " [ 0.1000, 0.1000, -0.1000],\n", - " [-0.1000, 0.1000, 0.1000]]],\n", + " [[-0.1000, -0.1000, 0.1000],\n", + " [ 0.1000, -0.1000, 0.1000],\n", + " [ 0.1000, -0.1000, -0.1000]]],\n", "\n", "\n", - " [[[-0.1000, -0.1000, 0.1000],\n", - " [ 0.1000, 0.1000, -0.1000],\n", + " [[[ 0.1000, -0.1000, 0.1000],\n", + " [ 0.1000, 0.1000, 0.1000],\n", " [ 0.1000, -0.1000, 0.1000]],\n", "\n", - " [[ 0.1000, 0.1000, -0.1000],\n", - " [-0.1000, -0.1000, 0.1000],\n", - " [ 0.1000, -0.1000, -0.1000]],\n", - "\n", - " [[ 0.1000, 0.1000, -0.1000],\n", + " [[-0.1000, 0.1000, 0.1000],\n", " [ 0.1000, -0.1000, 0.1000],\n", - " [-0.1000, 0.1000, 0.1000]]]], grad_fn=), scale=tensor(0.1000, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + " [ 0.1000, -0.1000, 0.1000]],\n", + "\n", + " [[-0.1000, -0.1000, -0.1000],\n", + " [-0.1000, 0.1000, -0.1000],\n", + " [-0.1000, 0.1000, -0.1000]]]], grad_fn=), scale=tensor(0.1000, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -624,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -638,13 +614,13 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[-0.1000, -0.1000, 0.1000, -0.1000],\n", - " [-0.1000, 0.1000, -0.1000, -0.1000],\n", - " [-0.1000, 0.1000, 0.1000, -0.1000],\n", - " [ 0.1000, -0.1000, -0.1000, -0.1000]], grad_fn=), scale=tensor(0.1000, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + "QuantTensor(value=tensor([[ 0.1000, -0.1000, 0.1000, -0.1000],\n", + " [-0.1000, -0.1000, 0.1000, -0.1000],\n", + " [-0.1000, -0.1000, 0.1000, -0.1000],\n", + " [ 0.1000, -0.1000, 0.1000, 0.1000]], grad_fn=), scale=tensor(0.1000, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -670,19 +646,19 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[-0.0010, -0.0010, -0.0010, 0.0010],\n", - " [ 0.0010, 0.0010, -0.0010, 0.0010],\n", - " [-0.0010, -0.0010, 0.0010, -0.0010],\n", - " [-0.0010, -0.0010, -0.0010, -0.0010]], grad_fn=), scale=tensor(0.0010, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + "QuantTensor(value=tensor([[-0.0010, 0.0010, 0.0010, -0.0010],\n", + " [-0.0010, 0.0010, -0.0010, -0.0010],\n", + " [ 0.0010, -0.0010, -0.0010, -0.0010],\n", + " [ 0.0010, -0.0010, 0.0010, -0.0010]], grad_fn=), scale=tensor(0.0010, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -708,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -732,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": { "scrolled": true }, @@ -740,33 +716,33 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[[[-0.1918, 0.1918, 0.1918],\n", - " [ 0.1918, 0.1918, 0.1918],\n", - " [-0.1918, -0.1918, 0.1918]],\n", + "QuantTensor(value=tensor([[[[ 0.1904, -0.1904, -0.1904],\n", + " [-0.1904, 0.1904, -0.1904],\n", + " [-0.1904, 0.1904, 0.1904]],\n", "\n", - " [[-0.1918, -0.1918, 0.1918],\n", - " [-0.1918, 0.1918, -0.1918],\n", - " [ 0.1918, 0.1918, 0.1918]],\n", + " [[-0.1904, 0.1904, -0.1904],\n", + " [ 0.1904, -0.1904, -0.1904],\n", + " [ 0.1904, 0.1904, -0.1904]],\n", "\n", - " [[-0.1918, 0.1918, 0.1918],\n", - " [ 0.1918, -0.1918, -0.1918],\n", - " [ 0.1918, 0.1918, 0.1918]]],\n", + " [[-0.1904, -0.1904, 0.1904],\n", + " [-0.1904, -0.1904, -0.1904],\n", + " [ 0.1904, 0.1904, -0.1904]]],\n", "\n", "\n", - " [[[ 0.1918, -0.1918, 0.1918],\n", - " [-0.1918, -0.1918, 0.1918],\n", - " [ 0.1918, 0.1918, 0.1918]],\n", + " [[[-0.1904, 0.1904, 0.1904],\n", + " [ 0.1904, -0.1904, -0.1904],\n", + " [ 0.1904, 0.1904, 0.1904]],\n", "\n", - " [[ 0.1918, 0.1918, 0.1918],\n", - " [ 0.1918, -0.1918, -0.1918],\n", - " [ 0.1918, 0.1918, 0.1918]],\n", + " [[ 0.1904, -0.1904, 0.1904],\n", + " [ 0.1904, 0.1904, 0.1904],\n", + " [ 0.1904, -0.1904, -0.1904]],\n", "\n", - " [[-0.1918, 0.1918, -0.1918],\n", - " [ 0.1918, -0.1918, 0.1918],\n", - " [ 0.1918, -0.1918, 0.1918]]]], grad_fn=), scale=tensor(0.1918, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + " [[-0.1904, 0.1904, 0.1904],\n", + " [-0.1904, -0.1904, -0.1904],\n", + " [-0.1904, -0.1904, 0.1904]]]], grad_fn=), scale=tensor(0.1904, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -785,7 +761,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -794,7 +770,7 @@ "True" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -812,16 +788,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor(0.1860, grad_fn=)" + "tensor(0.1876, grad_fn=)" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -856,7 +832,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/giuseppe/Documents/git/brevitas/notebooks/03_anatomy_of_a_quantizer.ipynb Cell 46\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m param_from_max_quant_conv\u001b[39m.\u001b[39;49mload_state_dict(float_conv\u001b[39m.\u001b[39;49mstate_dict())\n", + "\u001b[1;32m/home/giuseppe/Documents/git/brevitas/notebooks/03_anatomy_of_a_quantizer.ipynb Cell 45\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m param_from_max_quant_conv\u001b[39m.\u001b[39;49mload_state_dict(float_conv\u001b[39m.\u001b[39;49mstate_dict())\n", "File \u001b[0;32m~/miniconda3/envs/torch_2.1/lib/python3.11/site-packages/torch/nn/modules/module.py:2152\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[0;34m(self, state_dict, strict, assign)\u001b[0m\n\u001b[1;32m 2147\u001b[0m error_msgs\u001b[39m.\u001b[39minsert(\n\u001b[1;32m 2148\u001b[0m \u001b[39m0\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mMissing key(s) in state_dict: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m. \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 2149\u001b[0m \u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mk\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m missing_keys)))\n\u001b[1;32m 2151\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(error_msgs) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m-> 2152\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mError(s) in loading state_dict for \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m{}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 2153\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(error_msgs)))\n\u001b[1;32m 2154\u001b[0m \u001b[39mreturn\u001b[39;00m _IncompatibleKeys(missing_keys, unexpected_keys)\n", "\u001b[0;31mRuntimeError\u001b[0m: Error(s) in loading state_dict for QuantConv2d:\n\tMissing key(s) in state_dict: \"weight_quant.tensor_quant.scaling_impl.value\". " ] @@ -914,30 +890,30 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[[[-0.1860, 0.1860, 0.1860],\n", - " [-0.1860, 0.1860, -0.1860],\n", - " [-0.1860, 0.1860, -0.1860]],\n", + "QuantTensor(value=tensor([[[[ 0.1876, 0.1876, -0.1876],\n", + " [ 0.1876, -0.1876, 0.1876],\n", + " [-0.1876, 0.1876, -0.1876]],\n", "\n", - " [[ 0.1860, -0.1860, 0.1860],\n", - " [-0.1860, 0.1860, 0.1860],\n", - " [ 0.1860, -0.1860, -0.1860]],\n", + " [[-0.1876, 0.1876, 0.1876],\n", + " [-0.1876, 0.1876, -0.1876],\n", + " [ 0.1876, -0.1876, -0.1876]],\n", "\n", - " [[-0.1860, -0.1860, -0.1860],\n", - " [-0.1860, 0.1860, 0.1860],\n", - " [ 0.1860, 0.1860, -0.1860]]],\n", + " [[-0.1876, -0.1876, -0.1876],\n", + " [ 0.1876, 0.1876, 0.1876],\n", + " [ 0.1876, -0.1876, -0.1876]]],\n", "\n", "\n", - " [[[ 0.1860, -0.1860, 0.1860],\n", - " [-0.1860, -0.1860, 0.1860],\n", - " [-0.1860, 0.1860, -0.1860]],\n", + " [[[-0.1876, -0.1876, -0.1876],\n", + " [-0.1876, 0.1876, -0.1876],\n", + " [-0.1876, -0.1876, -0.1876]],\n", "\n", - " [[ 0.1860, -0.1860, 0.1860],\n", - " [ 0.1860, -0.1860, -0.1860],\n", - " [ 0.1860, 0.1860, 0.1860]],\n", + " [[-0.1876, -0.1876, -0.1876],\n", + " [-0.1876, 0.1876, 0.1876],\n", + " [-0.1876, -0.1876, -0.1876]],\n", "\n", - " [[ 0.1860, -0.1860, -0.1860],\n", - " [-0.1860, -0.1860, -0.1860],\n", - " [-0.1860, 0.1860, 0.1860]]]], grad_fn=), scale=tensor(0.1860, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + " [[ 0.1876, 0.1876, -0.1876],\n", + " [-0.1876, -0.1876, 0.1876],\n", + " [ 0.1876, 0.1876, 0.1876]]]], grad_fn=), scale=tensor(0.1876, grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, "execution_count": 24, @@ -1224,33 +1200,33 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[[[ 0.1876, -0.1876, 0.1876],\n", - " [ 0.1876, 0.1876, -0.1876],\n", - " [ 0.1876, -0.1876, -0.1876]],\n", + "QuantTensor(value=tensor([[[[-0.1903, 0.1903, -0.1903],\n", + " [ 0.1903, 0.1903, -0.1903],\n", + " [-0.1903, -0.1903, -0.1903]],\n", "\n", - " [[-0.1876, -0.1876, -0.1876],\n", - " [ 0.1876, -0.1876, 0.1876],\n", - " [-0.1876, 0.1876, -0.1876]],\n", + " [[ 0.1903, -0.1903, -0.1903],\n", + " [ 0.1903, 0.1903, -0.1903],\n", + " [ 0.1903, -0.1903, 0.1903]],\n", "\n", - " [[ 0.1876, 0.1876, -0.1876],\n", - " [-0.1876, -0.1876, -0.1876],\n", - " [-0.1876, -0.1876, 0.1876]]],\n", + " [[-0.1903, -0.1903, -0.1903],\n", + " [-0.1903, -0.1903, 0.1903],\n", + " [-0.1903, 0.1903, -0.1903]]],\n", "\n", "\n", - " [[[ 0.1867, 0.1867, -0.1867],\n", - " [-0.1867, 0.1867, 0.1867],\n", - " [-0.1867, -0.1867, 0.1867]],\n", + " [[[ 0.1870, 0.1870, -0.1870],\n", + " [ 0.1870, 0.1870, -0.1870],\n", + " [-0.1870, 0.1870, -0.1870]],\n", "\n", - " [[-0.1867, -0.1867, -0.1867],\n", - " [-0.1867, 0.1867, 0.1867],\n", - " [ 0.1867, 0.1867, -0.1867]],\n", + " [[-0.1870, 0.1870, 0.1870],\n", + " [ 0.1870, 0.1870, 0.1870],\n", + " [ 0.1870, 0.1870, 0.1870]],\n", "\n", - " [[-0.1867, -0.1867, 0.1867],\n", - " [ 0.1867, -0.1867, 0.1867],\n", - " [ 0.1867, 0.1867, -0.1867]]]], grad_fn=), scale=tensor([[[[0.1876]]],\n", + " [[-0.1870, -0.1870, -0.1870],\n", + " [ 0.1870, -0.1870, -0.1870],\n", + " [-0.1870, -0.1870, 0.1870]]]], grad_fn=), scale=tensor([[[[0.1903]]],\n", "\n", "\n", - " [[[0.1867]]]], grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + " [[[0.1870]]]], grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, "execution_count": 33, @@ -1282,33 +1258,33 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[[[-0.1859, 0.1859, 0.1859],\n", - " [-0.1859, 0.1859, -0.1859],\n", - " [-0.1859, 0.1859, -0.1859]],\n", + "QuantTensor(value=tensor([[[[ 0.1873, 0.1873, -0.1873],\n", + " [ 0.1873, -0.1873, 0.1873],\n", + " [-0.1873, 0.1873, -0.1873]],\n", "\n", - " [[ 0.1859, -0.1859, 0.1859],\n", - " [-0.1859, 0.1859, 0.1859],\n", - " [ 0.1859, -0.1859, -0.1859]],\n", + " [[-0.1873, 0.1873, 0.1873],\n", + " [-0.1873, 0.1873, -0.1873],\n", + " [ 0.1873, -0.1873, -0.1873]],\n", "\n", - " [[-0.1859, -0.1859, -0.1859],\n", - " [-0.1859, 0.1859, 0.1859],\n", - " [ 0.1859, 0.1859, -0.1859]]],\n", + " [[-0.1873, -0.1873, -0.1873],\n", + " [ 0.1873, 0.1873, 0.1873],\n", + " [ 0.1873, -0.1873, -0.1873]]],\n", "\n", "\n", - " [[[ 0.1860, -0.1860, 0.1860],\n", - " [-0.1860, -0.1860, 0.1860],\n", - " [-0.1860, 0.1860, -0.1860]],\n", + " [[[-0.1876, -0.1876, -0.1876],\n", + " [-0.1876, 0.1876, -0.1876],\n", + " [-0.1876, -0.1876, -0.1876]],\n", "\n", - " [[ 0.1860, -0.1860, 0.1860],\n", - " [ 0.1860, -0.1860, -0.1860],\n", - " [ 0.1860, 0.1860, 0.1860]],\n", + " [[-0.1876, -0.1876, -0.1876],\n", + " [-0.1876, 0.1876, 0.1876],\n", + " [-0.1876, -0.1876, -0.1876]],\n", "\n", - " [[ 0.1860, -0.1860, -0.1860],\n", - " [-0.1860, -0.1860, -0.1860],\n", - " [-0.1860, 0.1860, 0.1860]]]], grad_fn=), scale=tensor([[[[0.1859]]],\n", + " [[ 0.1876, 0.1876, -0.1876],\n", + " [-0.1876, -0.1876, 0.1876],\n", + " [ 0.1876, 0.1876, 0.1876]]]], grad_fn=), scale=tensor([[[[0.1873]]],\n", "\n", "\n", - " [[[0.1860]]]], grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" + " [[[0.1876]]]], grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" ] }, "execution_count": 34, @@ -1338,10 +1314,10 @@ { "data": { "text/plain": [ - "tensor([[-0.0100, 0.0100, 0.0100, -0.0100],\n", - " [-0.0100, 0.0100, 0.0100, -0.0100],\n", - " [-0.0100, 0.0100, -0.0100, -0.0100],\n", - " [-0.0100, -0.0100, -0.0100, 0.0100]], grad_fn=)" + "tensor([[ 0.0100, 0.0100, -0.0100, -0.0100],\n", + " [ 0.0100, -0.0100, 0.0100, -0.0100],\n", + " [ 0.0100, -0.0100, 0.0100, -0.0100],\n", + " [-0.0100, -0.0100, 0.0100, -0.0100]], grad_fn=)" ] }, "execution_count": 35, @@ -1381,11 +1357,11 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mDependencyError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/giuseppe/Documents/git/brevitas/notebooks/03_anatomy_of_a_quantizer.ipynb Cell 76\u001b[0m line \u001b[0;36m3\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbrevitas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mnn\u001b[39;00m \u001b[39mimport\u001b[39;00m QuantIdentity\n\u001b[0;32m----> 3\u001b[0m quant_identity \u001b[39m=\u001b[39m QuantIdentity(\n\u001b[1;32m 4\u001b[0m act_quant\u001b[39m=\u001b[39;49mAdvancedActQuantizer, is_clamped\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, scaling_per_output_channel\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n", + "\u001b[1;32m/home/giuseppe/Documents/git/brevitas/notebooks/03_anatomy_of_a_quantizer.ipynb Cell 75\u001b[0m line \u001b[0;36m3\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbrevitas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mnn\u001b[39;00m \u001b[39mimport\u001b[39;00m QuantIdentity\n\u001b[0;32m----> 3\u001b[0m quant_identity \u001b[39m=\u001b[39m QuantIdentity(\n\u001b[1;32m 4\u001b[0m act_quant\u001b[39m=\u001b[39;49mAdvancedActQuantizer, is_clamped\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, scaling_per_output_channel\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n", "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/quant_activation.py:113\u001b[0m, in \u001b[0;36mQuantIdentity.__init__\u001b[0;34m(self, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m 109\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 110\u001b[0m act_quant: Optional[ActQuantType] \u001b[39m=\u001b[39m Int8ActPerTensorFloat,\n\u001b[1;32m 111\u001b[0m return_quant_tensor: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 112\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m--> 113\u001b[0m QuantNLAL\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\n\u001b[1;32m 114\u001b[0m \u001b[39mself\u001b[39;49m,\n\u001b[1;32m 115\u001b[0m input_quant\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 116\u001b[0m act_impl\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 117\u001b[0m passthrough_act\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[1;32m 118\u001b[0m act_quant\u001b[39m=\u001b[39;49mact_quant,\n\u001b[1;32m 119\u001b[0m return_quant_tensor\u001b[39m=\u001b[39;49mreturn_quant_tensor,\n\u001b[1;32m 120\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/quant_layer.py:40\u001b[0m, in \u001b[0;36mQuantNonLinearActLayer.__init__\u001b[0;34m(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m 38\u001b[0m QuantLayerMixin\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, return_quant_tensor)\n\u001b[1;32m 39\u001b[0m QuantInputMixin\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, input_quant, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 40\u001b[0m QuantNonLinearActMixin\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\u001b[39mself\u001b[39;49m, act_impl, passthrough_act, act_quant, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/quant_layer.py:37\u001b[0m, in \u001b[0;36mQuantNonLinearActLayer.__init__\u001b[0;34m(self, act_impl, passthrough_act, input_quant, act_quant, return_quant_tensor, **kwargs)\u001b[0m\n\u001b[1;32m 35\u001b[0m QuantLayerMixin\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, return_quant_tensor)\n\u001b[1;32m 36\u001b[0m QuantInputMixin\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, input_quant, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 37\u001b[0m QuantNonLinearActMixin\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\u001b[39mself\u001b[39;49m, act_impl, passthrough_act, act_quant, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/mixin/act.py:118\u001b[0m, in \u001b[0;36mQuantNonLinearActMixin.__init__\u001b[0;34m(self, act_impl, passthrough_act, act_quant, act_proxy_prefix, act_kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\n\u001b[1;32m 108\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 109\u001b[0m act_impl: Optional[Type[Module]],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 113\u001b[0m act_kwargs_prefix\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[1;32m 114\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m prefixed_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 116\u001b[0m act_kwargs_prefix \u001b[39m+\u001b[39m \u001b[39m'\u001b[39m\u001b[39mact_impl\u001b[39m\u001b[39m'\u001b[39m: act_impl,\n\u001b[1;32m 117\u001b[0m act_kwargs_prefix \u001b[39m+\u001b[39m \u001b[39m'\u001b[39m\u001b[39mpassthrough_act\u001b[39m\u001b[39m'\u001b[39m: passthrough_act}\n\u001b[0;32m--> 118\u001b[0m QuantProxyMixin\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\n\u001b[1;32m 119\u001b[0m \u001b[39mself\u001b[39;49m,\n\u001b[1;32m 120\u001b[0m quant\u001b[39m=\u001b[39;49mact_quant,\n\u001b[1;32m 121\u001b[0m proxy_prefix\u001b[39m=\u001b[39;49mact_proxy_prefix,\n\u001b[1;32m 122\u001b[0m kwargs_prefix\u001b[39m=\u001b[39;49mact_kwargs_prefix,\n\u001b[1;32m 123\u001b[0m proxy_protocol\u001b[39m=\u001b[39;49mActQuantProxyProtocol,\n\u001b[1;32m 124\u001b[0m none_quant_injector\u001b[39m=\u001b[39;49mNoneActQuant,\n\u001b[1;32m 125\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mprefixed_kwargs,\n\u001b[1;32m 126\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/mixin/base.py:71\u001b[0m, in \u001b[0;36mQuantProxyMixin.__init__\u001b[0;34m(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m 69\u001b[0m quant_injector \u001b[39m=\u001b[39m quant\n\u001b[1;32m 70\u001b[0m quant_injector \u001b[39m=\u001b[39m quant_injector\u001b[39m.\u001b[39mlet(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mfilter_kwargs(kwargs_prefix, kwargs))\n\u001b[0;32m---> 71\u001b[0m quant \u001b[39m=\u001b[39m quant_injector\u001b[39m.\u001b[39;49mproxy_class(\u001b[39mself\u001b[39;49m, quant_injector)\n\u001b[1;32m 72\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 73\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(quant, proxy_protocol):\n", + "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/nn/mixin/base.py:70\u001b[0m, in \u001b[0;36mQuantProxyMixin.__init__\u001b[0;34m(self, quant, proxy_protocol, none_quant_injector, proxy_prefix, kwargs_prefix, **kwargs)\u001b[0m\n\u001b[1;32m 68\u001b[0m quant_injector \u001b[39m=\u001b[39m quant\n\u001b[1;32m 69\u001b[0m quant_injector \u001b[39m=\u001b[39m quant_injector\u001b[39m.\u001b[39mlet(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mfilter_kwargs(kwargs_prefix, kwargs))\n\u001b[0;32m---> 70\u001b[0m quant \u001b[39m=\u001b[39m quant_injector\u001b[39m.\u001b[39;49mproxy_class(\u001b[39mself\u001b[39;49m, quant_injector)\n\u001b[1;32m 71\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(quant, proxy_protocol):\n", "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/proxy/runtime_quant.py:89\u001b[0m, in \u001b[0;36mActQuantProxyFromInjector.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, quant_layer, quant_injector):\n\u001b[0;32m---> 89\u001b[0m QuantProxyFromInjector\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\u001b[39mself\u001b[39;49m, quant_layer, quant_injector)\n\u001b[1;32m 90\u001b[0m ActQuantProxyProtocol\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m)\n\u001b[1;32m 91\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_passthrough_act \u001b[39m=\u001b[39m _is_passthrough_act(quant_injector)\n", "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/proxy/quant_proxy.py:82\u001b[0m, in \u001b[0;36mQuantProxyFromInjector.__init__\u001b[0;34m(self, quant_layer, quant_injector)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[39m# Use a normal list and not a ModuleList since this is a pointer to parent modules\u001b[39;00m\n\u001b[1;32m 81\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtracked_module_list \u001b[39m=\u001b[39m []\n\u001b[0;32m---> 82\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49madd_tracked_module(quant_layer)\n\u001b[1;32m 83\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdisable_quant \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n", "File \u001b[0;32m~/Documents/git/brevitas/src/brevitas/proxy/quant_proxy.py:120\u001b[0m, in \u001b[0;36mQuantProxyFromInjector.add_tracked_module\u001b[0;34m(self, module)\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtracked_module_list\u001b[39m.\u001b[39mappend(module)\n\u001b[1;32m 119\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mupdate_tracked_modules()\n\u001b[0;32m--> 120\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_tensor_quant()\n\u001b[1;32m 121\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 122\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mTrying to add None as a parent module.\u001b[39m\u001b[39m\"\u001b[39m)\n", @@ -1419,10 +1395,10 @@ { "data": { "text/plain": [ - "QuantTensor(value=tensor([[-0.0100, 0.0100, 0.0100, 0.0100],\n", - " [-0.0100, 0.0100, -0.0100, 0.0100],\n", - " [-0.0100, -0.0100, -0.0100, -0.0100],\n", - " [ 0.0100, 0.0100, 0.0100, -0.0100]], grad_fn=), scale=tensor([[0.0100],\n", + "QuantTensor(value=tensor([[-0.0100, 0.0100, 0.0100, -0.0100],\n", + " [ 0.0100, -0.0100, -0.0100, -0.0100],\n", + " [ 0.0100, -0.0100, 0.0100, -0.0100],\n", + " [ 0.0100, -0.0100, -0.0100, 0.0100]], grad_fn=), scale=tensor([[0.0100],\n", " [0.0100],\n", " [0.0100],\n", " [0.0100]], grad_fn=), zero_point=tensor(0.), bit_width=tensor(1.), signed_t=tensor(True), training_t=tensor(True))" @@ -1446,6 +1422,11 @@ "source": [ "We have seen how powerful dependency injection is. In a way, it's even too expressive. For users that are not interesting in building completely custom quantizers, it can be hard to make sense of how the various components available under `brevitas.core` can be assembled together according to best practices." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": {