From baf755049a1d8d9569e14a231ae0ade76ec82085 Mon Sep 17 00:00:00 2001 From: Guillaume Claret Date: Wed, 5 Jun 2024 22:28:34 +0200 Subject: [PATCH] Deploy website - based on 456e19e7ee350938c28098f4d090b396a3fd14f5 --- 404.html | 2 +- assets/js/{6b1d1fec.7125dbed.js => 6b1d1fec.f368363f.js} | 2 +- assets/js/b2f554cd.b4593d13.js | 1 - assets/js/b2f554cd.e8f027dd.js | 1 + .../js/{runtime~main.9f651554.js => runtime~main.8a70afab.js} | 2 +- blog.html | 2 +- blog/2021/10/10/welcome.html | 2 +- blog/2021/10/27/verification-data-encoding.html | 2 +- blog/2021/11/12/new-blog-posts-and-meetup-talk.html | 2 +- blog/2022/02/02/make-tezos-a-formally-verified-crypto.html | 2 +- blog/2022/06/15/status update-tezos.html | 2 +- blog/2022/06/23/upgrade-coq-of-ocaml-4.14.html | 2 +- blog/2022/12/13/latest-blog-posts-on-tezos.html | 2 +- blog/2023/01/24/current-verification-efforts.html | 2 +- blog/2023/04/26/representation-of-rust-methods-in-coq.html | 2 +- blog/2023/05/28/monad-for-side-effects-in-rust.html | 2 +- blog/2023/08/25/trait-representation-in-coq.html | 2 +- blog/2023/11/08/rust-thir-and-bundled-traits.html | 2 +- blog/2023/11/26/rust-function-body.html | 2 +- blog/2023/12/13/rust-verify-erc-20-smart-contract.html | 2 +- blog/2024/01/04/rust-translating-match.html | 2 +- blog/2024/01/18/update-coq-of-rust.html | 2 +- blog/2024/02/02/formal-verification-for-aleph-zero.html | 2 +- blog/2024/02/14/experiment-coq-of-hs.html | 2 +- blog/2024/02/22/journey-coq-of-go.html | 2 +- blog/2024/02/29/improvements-rust-translation.html | 2 +- blog/2024/03/08/improvements-rust-translation-part-2.html | 2 +- blog/2024/03/22/improvements-rust-translation-part-3.html | 2 +- blog/2024/04/03/monadic-notation-for-rust-translation.html | 2 +- blog/2024/04/26/translation-core-alloc-crates.html | 2 +- blog/2024/05/10/translation-of-python-code.html | 2 +- blog/2024/05/14/translation-of-python-code-simulations.html | 2 +- .../22/translation-of-python-code-simulations-from-trace.html | 2 +- .../06/05/formal-verification-for-software-correctness.html | 4 ++-- .../06/05/software-correctness-from-first-principles.html | 2 +- blog/archive.html | 2 +- blog/atom.xml | 2 +- blog/page/2.html | 2 +- blog/page/3.html | 2 +- blog/rss.xml | 2 +- blog/tags.html | 2 +- blog/tags/4-14.html | 2 +- blog/tags/aleph-zero.html | 2 +- blog/tags/alloc.html | 2 +- blog/tags/coq-of-go.html | 2 +- blog/tags/coq-of-hs.html | 2 +- blog/tags/coq-of-ocaml.html | 2 +- blog/tags/coq-of-python.html | 2 +- blog/tags/coq-of-rust.html | 2 +- blog/tags/coq-of-rust/page/2.html | 2 +- blog/tags/coq-tezos-of-ocaml.html | 2 +- blog/tags/coq.html | 2 +- blog/tags/coq/page/2.html | 2 +- blog/tags/core.html | 2 +- blog/tags/data-encoding.html | 2 +- blog/tags/erc-20.html | 2 +- blog/tags/ethereum.html | 2 +- blog/tags/example.html | 2 +- blog/tags/first-principles.html | 2 +- blog/tags/formal-verification.html | 2 +- blog/tags/go.html | 2 +- blog/tags/haskell.html | 2 +- blog/tags/hir.html | 2 +- blog/tags/ink.html | 2 +- blog/tags/meetup.html | 2 +- blog/tags/mi-cho-coq.html | 2 +- blog/tags/monad.html | 2 +- blog/tags/o-caml.html | 2 +- blog/tags/ocaml.html | 2 +- blog/tags/python.html | 2 +- blog/tags/rust.html | 2 +- blog/tags/rust/page/2.html | 2 +- blog/tags/side-effects.html | 2 +- blog/tags/simulation.html | 2 +- blog/tags/software-correctness.html | 2 +- blog/tags/solidity.html | 2 +- blog/tags/tezos.html | 2 +- blog/tags/thir.html | 2 +- blog/tags/trace.html | 2 +- blog/tags/trait.html | 2 +- blog/tags/translation.html | 2 +- blog/tags/type-script.html | 2 +- blog/tags/welcome.html | 2 +- docs/company/about.html | 2 +- docs/company/careers.html | 2 +- docs/company/claims.html | 2 +- docs/company/intro.html | 2 +- docs/company/press.html | 2 +- docs/coq-of-ocaml/attributes.html | 2 +- docs/coq-of-ocaml/configuration.html | 2 +- docs/coq-of-ocaml/cookbook.html | 2 +- docs/coq-of-ocaml/examples.html | 2 +- docs/coq-of-ocaml/faq.html | 2 +- docs/coq-of-ocaml/gadts.html | 2 +- docs/coq-of-ocaml/install.html | 2 +- docs/coq-of-ocaml/introduction.html | 2 +- docs/coq-of-ocaml/module-system.html | 2 +- docs/coq-of-ocaml/ocaml-core.html | 2 +- docs/coq-of-ocaml/run.html | 2 +- docs/coq-of-ocaml/type-definitions.html | 2 +- docs/coq-of-rust/introduction.html | 2 +- docs/services/ocaml-development.html | 2 +- docs/services/rust-development.html | 2 +- docs/services/solidity-development.html | 2 +- docs/services/typescript-development.html | 2 +- docs/verification/ocaml.html | 2 +- docs/verification/rust.html | 2 +- docs/verification/solidity.html | 2 +- docs/verification/typescript.html | 2 +- index.html | 2 +- 110 files changed, 110 insertions(+), 110 deletions(-) rename assets/js/{6b1d1fec.7125dbed.js => 6b1d1fec.f368363f.js} (96%) delete mode 100644 assets/js/b2f554cd.b4593d13.js create mode 100644 assets/js/b2f554cd.e8f027dd.js rename assets/js/{runtime~main.9f651554.js => runtime~main.8a70afab.js} (99%) diff --git a/404.html b/404.html index 2c7a1a06..8938ae82 100644 --- a/404.html +++ b/404.html @@ -13,7 +13,7 @@ - + diff --git a/assets/js/6b1d1fec.7125dbed.js b/assets/js/6b1d1fec.f368363f.js similarity index 96% rename from assets/js/6b1d1fec.7125dbed.js rename to assets/js/6b1d1fec.f368363f.js index 41080b17..97eca40c 100644 --- a/assets/js/6b1d1fec.7125dbed.js +++ b/assets/js/6b1d1fec.f368363f.js @@ -1 +1 @@ -"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5993],{3331:(e,n,i)=>{i.r(n),i.d(n,{assets:()=>l,contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>a,toc:()=>c});var t=i(4848),s=i(8453);const o={title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},r=void 0,a={permalink:"/blog/2024/06/05/formal-verification-for-software-correctness",source:"@site/blog/2024-06-05-formal-verification-for-software-correctness.md",title:"\ud83e\udd84 Software correctness from first principles",description:"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.",date:"2024-06-05T00:00:00.000Z",formattedDate:"June 5, 2024",tags:[{label:"formal verification",permalink:"/blog/tags/formal-verification"},{label:"software correctness",permalink:"/blog/tags/software-correctness"},{label:"first principles",permalink:"/blog/tags/first-principles"},{label:"example",permalink:"/blog/tags/example"},{label:"Python",permalink:"/blog/tags/python"}],readingTime:7.265,hasTruncateMarker:!0,authors:[],frontMatter:{title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},unlisted:!1,nextItem:{title:"\ud83e\udd84 Software correctness from first principles",permalink:"/blog/2024/06/05/software-correctness-from-first-principles"}},l={authorsImageUrls:[]},c=[{value:"Use of formal verification",id:"use-of-formal-verification",level:2},{value:"Definition of programming languages",id:"definition-of-programming-languages",level:2},{value:"Example to verify",id:"example-to-verify",level:2},{value:"Formal verification",id:"formal-verification",level:2},{value:"Completing the property",id:"completing-the-property",level:2},{value:"Conclusion",id:"conclusion",level:2}];function h(e){const n={a:"a",admonition:"admonition",blockquote:"blockquote",code:"code",em:"em",h2:"h2",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,s.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Formal verification"})," is a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves."]}),"\n",(0,t.jsx)(n.p,{children:"We will present this idea in detail and illustrate how it works for a very simple example."}),"\n",(0,t.jsx)(n.h2,{id:"use-of-formal-verification",children:"Use of formal verification"}),"\n",(0,t.jsx)(n.p,{children:"We typically use formal verification for critical applications, where either:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"life is at stake, like in the case of trains, airplanes, medical devices, or"}),"\n",(0,t.jsx)(n.li,{children:"money is at stake, like in the case of financial applications."}),"\n"]}),"\n",(0,t.jsxs)(n.p,{children:["With formal verification, in theory, ",(0,t.jsx)(n.strong,{children:"we can guarantee that the software will never fail"}),", as we can check ",(0,t.jsx)(n.strong,{children:"all possible cases"})," for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions."]}),"\n",(0,t.jsxs)(n.p,{children:["In this research paper ",(0,t.jsx)(n.a,{href:"https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf",children:"Finding and Understanding Bugs in C Compilers"}),", no bugs were found in the middle-end of the formally verified ",(0,t.jsx)(n.a,{href:"https://en.wikipedia.org/wiki/CompCert",children:"CompCert"})," C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!"]}),"\n",(0,t.jsx)(n.h2,{id:"definition-of-programming-languages",children:"Definition of programming languages"}),"\n",(0,t.jsxs)(n.p,{children:["To be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the ",(0,t.jsx)(n.code,{children:"if"})," statement is ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/compound_stmts.html#if",children:"defined in the reference manual"})," by:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:'if_stmt ::= "if" assignment_expression ":" suite\n ("elif" assignment_expression ":" suite)*\n ["else" ":" suite]\n'})}),"\n",(0,t.jsxs)(n.blockquote,{children:["\n",(0,t.jsx)(n.p,{children:"It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"This means that the Python code:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if condition:\n a\nelse:\n b\n"})}),"\n",(0,t.jsxs)(n.p,{children:["will execute ",(0,t.jsx)(n.code,{children:"a"})," when the ",(0,t.jsx)(n.code,{children:"condition"})," is true, and ",(0,t.jsx)(n.code,{children:"b"})," otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...)."]}),"\n",(0,t.jsx)(n.p,{children:"To make these rules more manageable, we generally split them into two parts:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/grammar.html",children:"grammar"}),"."]}),"\n",(0,t.jsxs)(n.li,{children:["The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the ",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:'In formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating "syntax errors" in case of ill-formed programs.'}),"\n",(0,t.jsx)(n.h2,{id:"example-to-verify",children:"Example to verify"}),"\n",(0,t.jsx)(n.p,{children:"We consider this short Python example of a function returning the maximum number in a list:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n for x in l:\n if x > m:\n m = x\n return m\n"})}),"\n",(0,t.jsxs)(n.p,{children:["We assume that the list ",(0,t.jsx)(n.code,{children:"l"})," is not empty and only contains integers. If we run it on a few examples:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"my_max([1, 2, 3]) # => 3\nmy_max([3, 2, 1]) # => 3\nmy_max([1, 3, 2]) # => 3\n"})}),"\n",(0,t.jsxs)(n.p,{children:["it always returns\xa0",(0,t.jsx)(n.code,{children:"3"}),", the biggest number in the list! But can we make sure this is always the case?"]}),"\n",(0,t.jsxs)(n.p,{children:["We can certainly not run\xa0",(0,t.jsx)(n.code,{children:"my_max"})," on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning."]}),"\n",(0,t.jsx)(n.h2,{id:"formal-verification",children:"Formal verification"}),"\n",(0,t.jsxs)(n.p,{children:["Here is a general specification that we give of the\xa0",(0,t.jsx)(n.code,{children:"my_max"})," function above:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (index : int) (l : list[int]),\n 0 \u2264 index < len(l) \u21d2\n l[index] \u2264 my_max(l)\n"})}),"\n",(0,t.jsxs)(n.p,{children:["It says that for all integer ",(0,t.jsx)(n.code,{children:"index"})," and list of integers ",(0,t.jsx)(n.code,{children:"l"}),", if the index is valid (between\xa0",(0,t.jsx)(n.code,{children:"0"})," and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute."]}),"\n",(0,t.jsxs)(n.p,{children:["To verify this property for all possible list\xa0",(0,t.jsx)(n.code,{children:"l"}),", we reason by induction. A non-empty list is either:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"a list with one element, where the maximum is the only element, or"}),"\n",(0,t.jsx)(n.li,{children:"a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"At the start of the code, we will always have:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n"})}),"\n",(0,t.jsxs)(n.p,{children:["with ",(0,t.jsx)(n.code,{children:"m"})," being equal to the first item of the list. Then:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["If the list has only one element, we iterate only once in the ",(0,t.jsx)(n.code,{children:"for"})," loop, with ",(0,t.jsx)(n.code,{children:"x"})," equal to ",(0,t.jsx)(n.code,{children:"l[0]"}),". The condition:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n"})}),"\n","is then equivalent to:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if l[0] > l[0]:\n"})}),"\n","and is always false. We then return ",(0,t.jsx)(n.code,{children:"m = l[0]"}),", which is the only element of the list, and it verifies our property as:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[0] \u2264 l[0]\n"})}),"\n"]}),"\n",(0,t.jsxs)(n.li,{children:["If the list has at least two elements, we unroll the code execution of the ",(0,t.jsx)(n.code,{children:"for"})," loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[index] \u2264 m\n"})}),"\n","for all ",(0,t.jsx)(n.code,{children:"index"})," between ",(0,t.jsx)(n.code,{children:"0"})," and ",(0,t.jsx)(n.code,{children:"len(l) - 2"}),". When we reach the last element, we have:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n m = x\n"})}),"\n","with ",(0,t.jsx)(n.code,{children:"x"})," being ",(0,t.jsx)(n.code,{children:"l[len(l) - 1]"}),". There are two possibilities. Either ",(0,t.jsx)(n.em,{children:"(i)"})," ",(0,t.jsx)(n.code,{children:"x"})," is less than or equal to ",(0,t.jsx)(n.code,{children:"m"}),", and we do not update ",(0,t.jsx)(n.code,{children:"m"}),", or ",(0,t.jsx)(n.em,{children:"(ii)"})," ",(0,t.jsx)(n.code,{children:"x"})," is greater than ",(0,t.jsx)(n.code,{children:"m"}),", and we update ",(0,t.jsx)(n.code,{children:"m"})," to ",(0,t.jsx)(n.code,{children:"x"}),". In both cases, the property is verified for the last element of the list, as:","\n",(0,t.jsxs)(n.ol,{children:["\n",(0,t.jsxs)(n.li,{children:["In the first case, ",(0,t.jsx)(n.code,{children:"m"})," stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last\xa0",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n",(0,t.jsxs)(n.li,{children:["In the second case, ",(0,t.jsx)(n.code,{children:"m"})," is updated to ",(0,t.jsx)(n.code,{children:"x"}),", which is the last element of the list and a greater value than the original\xa0",(0,t.jsx)(n.code,{children:"m"}),". Then it means that ",(0,t.jsx)(n.code,{children:"m"})," is still larger or equal to all the elements of the list except the last one, being larger that the original\xa0",(0,t.jsx)(n.code,{children:"m"}),", and larger or equal to the last one as it is in fact equals to the last one."]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"We have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code."}),"\n",(0,t.jsxs)(n.p,{children:["In practice, with formal verification, the reasoning above is done in a proof assistance such as ",(0,t.jsx)(n.a,{href:"https://coq.inria.fr/",children:"Coq"})," to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details."]}),"\n",(0,t.jsx)(n.h2,{id:"completing-the-property",children:"Completing the property"}),"\n",(0,t.jsx)(n.p,{children:"An additional property that we did not verify is:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (l : list[int]),\n exists (index : int),\n 0 \u2264 index < len(l) and\n l[index] = my_max(l)\n"})}),"\n",(0,t.jsx)(n.p,{children:"It says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise."}),"\n",(0,t.jsx)(n.admonition,{title:"For more",type:"info",children:(0,t.jsxs)(n.p,{children:["If you want to go into more details for the formal verification of Python programs, you can look at our ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-python",children:"coq-of-python"})," project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-rust",children:"Rust"})," and other languages like ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-ocaml",children:"OCaml"}),". Contact us at\xa0",(0,t.jsx)(n.a,{href:"mailto:contact@formal.land",children:"contact@formal.land"})," to discuss!"]})}),"\n",(0,t.jsx)(n.h2,{id:"conclusion",children:"Conclusion"}),"\n",(0,t.jsxs)(n.p,{children:["We have presented here the idea of ",(0,t.jsx)(n.strong,{children:"formal verification"}),", a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct ",(0,t.jsx)(n.strong,{children:"for all possible lists of integers"}),"."]}),"\n",(0,t.jsx)(n.p,{children:"We will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"})]})}function d(e={}){const{wrapper:n}={...(0,s.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(h,{...e})}):h(e)}},8453:(e,n,i)=>{i.d(n,{R:()=>r,x:()=>a});var t=i(6540);const s={},o=t.createContext(s);function r(e){const n=t.useContext(o);return t.useMemo((function(){return"function"==typeof e?e(n):{...n,...e}}),[n,e])}function a(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(s):e.components||s:r(e.components),t.createElement(o.Provider,{value:n},e.children)}}}]); \ No newline at end of file +"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5993],{3331:(e,n,i)=>{i.r(n),i.d(n,{assets:()=>l,contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>a,toc:()=>c});var t=i(4848),s=i(8453);const o={title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},r=void 0,a={permalink:"/blog/2024/06/05/formal-verification-for-software-correctness",source:"@site/blog/2024-06-05-formal-verification-for-software-correctness.md",title:"\ud83e\udd84 Software correctness from first principles",description:"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.",date:"2024-06-05T00:00:00.000Z",formattedDate:"June 5, 2024",tags:[{label:"formal verification",permalink:"/blog/tags/formal-verification"},{label:"software correctness",permalink:"/blog/tags/software-correctness"},{label:"first principles",permalink:"/blog/tags/first-principles"},{label:"example",permalink:"/blog/tags/example"},{label:"Python",permalink:"/blog/tags/python"}],readingTime:7.265,hasTruncateMarker:!0,authors:[],frontMatter:{title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},unlisted:!1,nextItem:{title:"\ud83e\udd84 Software correctness from first principles",permalink:"/blog/2024/06/05/software-correctness-from-first-principles"}},l={authorsImageUrls:[]},c=[{value:"Use of formal verification",id:"use-of-formal-verification",level:2},{value:"Definition of programming languages",id:"definition-of-programming-languages",level:2},{value:"Example to verify",id:"example-to-verify",level:2},{value:"Formal verification",id:"formal-verification",level:2},{value:"Completing the property",id:"completing-the-property",level:2},{value:"Conclusion",id:"conclusion",level:2}];function h(e){const n={a:"a",admonition:"admonition",blockquote:"blockquote",code:"code",em:"em",h2:"h2",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,s.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Formal verification"})," is a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves."]}),"\n",(0,t.jsx)(n.p,{children:"We will present this idea in detail and illustrate how it works for a very simple example."}),"\n",(0,t.jsx)(n.h2,{id:"use-of-formal-verification",children:"Use of formal verification"}),"\n",(0,t.jsx)(n.p,{children:"We typically use formal verification for critical applications, where either:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"life is at stake, like in the case of trains, airplanes, medical devices, or"}),"\n",(0,t.jsx)(n.li,{children:"money is at stake, like in the case of financial applications."}),"\n"]}),"\n",(0,t.jsxs)(n.p,{children:["With formal verification, in theory, ",(0,t.jsx)(n.strong,{children:"we can guarantee that the software will never fail"}),", as we can check ",(0,t.jsx)(n.strong,{children:"all possible cases"})," for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions."]}),"\n",(0,t.jsxs)(n.p,{children:["In this research paper ",(0,t.jsx)(n.a,{href:"https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf",children:"Finding and Understanding Bugs in C Compilers"}),", no bugs were found in the middle-end of the formally verified ",(0,t.jsx)(n.a,{href:"https://en.wikipedia.org/wiki/CompCert",children:"CompCert"})," C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!"]}),"\n",(0,t.jsx)(n.h2,{id:"definition-of-programming-languages",children:"Definition of programming languages"}),"\n",(0,t.jsxs)(n.p,{children:["To be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the ",(0,t.jsx)(n.code,{children:"if"})," statement is ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/compound_stmts.html#if",children:"defined in the reference manual"})," by:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:'if_stmt ::= "if" assignment_expression ":" suite\n ("elif" assignment_expression ":" suite)*\n ["else" ":" suite]\n'})}),"\n",(0,t.jsxs)(n.blockquote,{children:["\n",(0,t.jsx)(n.p,{children:"It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"This means that the Python code:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if condition:\n a\nelse:\n b\n"})}),"\n",(0,t.jsxs)(n.p,{children:["will execute ",(0,t.jsx)(n.code,{children:"a"})," when the ",(0,t.jsx)(n.code,{children:"condition"})," is true, and ",(0,t.jsx)(n.code,{children:"b"})," otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...)."]}),"\n",(0,t.jsx)(n.p,{children:"To make these rules more manageable, we generally split them into two parts:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/grammar.html",children:"grammar"}),"."]}),"\n",(0,t.jsxs)(n.li,{children:["The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the ",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:'In formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating "syntax errors" in case of ill-formed programs.'}),"\n",(0,t.jsx)(n.h2,{id:"example-to-verify",children:"Example to verify"}),"\n",(0,t.jsx)(n.p,{children:"We consider this short Python example of a function returning the maximum number in a list:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n for x in l:\n if x > m:\n m = x\n return m\n"})}),"\n",(0,t.jsxs)(n.p,{children:["We assume that the list ",(0,t.jsx)(n.code,{children:"l"})," is not empty and only contains integers. If we run it on a few examples:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"my_max([1, 2, 3]) # => 3\nmy_max([3, 2, 1]) # => 3\nmy_max([1, 3, 2]) # => 3\n"})}),"\n",(0,t.jsxs)(n.p,{children:["it always returns\xa0",(0,t.jsx)(n.code,{children:"3"}),", the biggest number in the list! But can we make sure this is always the case?"]}),"\n",(0,t.jsxs)(n.p,{children:["We can certainly not run\xa0",(0,t.jsx)(n.code,{children:"my_max"})," on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning."]}),"\n",(0,t.jsx)(n.h2,{id:"formal-verification",children:"Formal verification"}),"\n",(0,t.jsxs)(n.p,{children:["Here is a general specification that we give of the\xa0",(0,t.jsx)(n.code,{children:"my_max"})," function above:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (index : int) (l : list[int]),\n 0 \u2264 index < len(l) \u21d2\n l[index] \u2264 my_max(l)\n"})}),"\n",(0,t.jsxs)(n.p,{children:["It says that for all integer ",(0,t.jsx)(n.code,{children:"index"})," and list of integers ",(0,t.jsx)(n.code,{children:"l"}),", if the index is valid (between\xa0",(0,t.jsx)(n.code,{children:"0"})," and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute."]}),"\n",(0,t.jsxs)(n.p,{children:["To verify this property for all possible list\xa0",(0,t.jsx)(n.code,{children:"l"}),", we reason by induction. A non-empty list is either:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"a list with one element, where the maximum is the only element, or"}),"\n",(0,t.jsx)(n.li,{children:"a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"At the start of the code, we will always have:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n"})}),"\n",(0,t.jsxs)(n.p,{children:["with ",(0,t.jsx)(n.code,{children:"m"})," being equal to the first item of the list. Then:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["If the list has only one element, we iterate only once in the ",(0,t.jsx)(n.code,{children:"for"})," loop, with ",(0,t.jsx)(n.code,{children:"x"})," equal to ",(0,t.jsx)(n.code,{children:"l[0]"}),". The condition:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n"})}),"\n","is then equivalent to:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if l[0] > l[0]:\n"})}),"\n","and is always false. We then return ",(0,t.jsx)(n.code,{children:"m = l[0]"}),", which is the only element of the list, and it verifies our property as:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[0] \u2264 l[0]\n"})}),"\n"]}),"\n",(0,t.jsxs)(n.li,{children:["If the list has at least two elements, we unroll the code execution of the ",(0,t.jsx)(n.code,{children:"for"})," loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[index] \u2264 m\n"})}),"\n","for all ",(0,t.jsx)(n.code,{children:"index"})," between ",(0,t.jsx)(n.code,{children:"0"})," and ",(0,t.jsx)(n.code,{children:"len(l) - 2"}),". When we reach the last element, we have:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n m = x\n"})}),"\n","with ",(0,t.jsx)(n.code,{children:"x"})," being ",(0,t.jsx)(n.code,{children:"l[len(l) - 1]"}),". There are two possibilities. Either ",(0,t.jsx)(n.em,{children:"(i)"})," ",(0,t.jsx)(n.code,{children:"x"})," is less than or equal to ",(0,t.jsx)(n.code,{children:"m"}),", and we do not update ",(0,t.jsx)(n.code,{children:"m"}),", or ",(0,t.jsx)(n.em,{children:"(ii)"})," ",(0,t.jsx)(n.code,{children:"x"})," is greater than ",(0,t.jsx)(n.code,{children:"m"}),", and we update ",(0,t.jsx)(n.code,{children:"m"})," to ",(0,t.jsx)(n.code,{children:"x"}),". In both cases, the property is verified for the last element of the list, as:","\n",(0,t.jsxs)(n.ol,{children:["\n",(0,t.jsxs)(n.li,{children:["In the first case, ",(0,t.jsx)(n.code,{children:"m"})," stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last\xa0",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n",(0,t.jsxs)(n.li,{children:["In the second case, ",(0,t.jsx)(n.code,{children:"m"})," is updated to ",(0,t.jsx)(n.code,{children:"x"}),", which is the last element of the list and a greater value than the original\xa0",(0,t.jsx)(n.code,{children:"m"}),". Then it means that ",(0,t.jsx)(n.code,{children:"m"})," is still larger or equal to all the elements of the list except the last one, being larger that the original\xa0",(0,t.jsx)(n.code,{children:"m"}),", and larger or equal to the last one as it is in fact equals to the last one."]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"We have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code."}),"\n",(0,t.jsxs)(n.p,{children:["In practice, with formal verification, the reasoning above is done in a proof assistance such as ",(0,t.jsx)(n.a,{href:"https://coq.inria.fr/",children:"Coq"})," to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details."]}),"\n",(0,t.jsx)(n.h2,{id:"completing-the-property",children:"Completing the property"}),"\n",(0,t.jsx)(n.p,{children:"An additional property that we did not verify is:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (l : list[int]),\n exists (index : int),\n 0 \u2264 index < len(l) and\n l[index] = my_max(l)\n"})}),"\n",(0,t.jsx)(n.p,{children:"It says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise."}),"\n",(0,t.jsx)(n.admonition,{title:"For more",type:"info",children:(0,t.jsxs)(n.p,{children:["If you want to go into more details for the formal verification of Python programs, you can look at our ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-python",children:"coq-of-python"})," project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-rust",children:"Rust"})," and other languages like ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-ocaml",children:"OCaml"}),". Contact us at\xa0",(0,t.jsx)(n.a,{href:"mailto:contact@formal.land",children:"contact@formal.land"})," to discuss!"]})}),"\n",(0,t.jsx)(n.h2,{id:"conclusion",children:"Conclusion"}),"\n",(0,t.jsxs)(n.p,{children:["We have presented here the idea of ",(0,t.jsx)(n.strong,{children:"formal verification"}),", a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct ",(0,t.jsx)(n.strong,{children:"for all possible lists of integers"}),"."]}),"\n",(0,t.jsx)(n.p,{children:"We will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and to tell us what subjects you want to see covered!"})]})}function d(e={}){const{wrapper:n}={...(0,s.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(h,{...e})}):h(e)}},8453:(e,n,i)=>{i.d(n,{R:()=>r,x:()=>a});var t=i(6540);const s={},o=t.createContext(s);function r(e){const n=t.useContext(o);return t.useMemo((function(){return"function"==typeof e?e(n):{...n,...e}}),[n,e])}function a(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(s):e.components||s:r(e.components),t.createElement(o.Provider,{value:n},e.children)}}}]); \ No newline at end of file diff --git a/assets/js/b2f554cd.b4593d13.js b/assets/js/b2f554cd.b4593d13.js deleted file mode 100644 index 0a3253ab..00000000 --- a/assets/js/b2f554cd.b4593d13.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5894],{6042:e=>{e.exports=JSON.parse('{"blogPosts":[{"id":"/2024/06/05/formal-verification-for-software-correctness","metadata":{"permalink":"/blog/2024/06/05/formal-verification-for-software-correctness","source":"@site/blog/2024-06-05-formal-verification-for-software-correctness.md","title":"\ud83e\udd84 Software correctness from first principles","description":"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.","date":"2024-06-05T00:00:00.000Z","formattedDate":"June 5, 2024","tags":[{"label":"formal verification","permalink":"/blog/tags/formal-verification"},{"label":"software correctness","permalink":"/blog/tags/software-correctness"},{"label":"first principles","permalink":"/blog/tags/first-principles"},{"label":"example","permalink":"/blog/tags/example"},{"label":"Python","permalink":"/blog/tags/python"}],"readingTime":7.265,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd84 Software correctness from first principles","tags":["formal verification","software correctness","first principles","example","Python"],"authors":[]},"unlisted":false,"nextItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/software-correctness-from-first-principles"}},"content":"**Formal verification** is a technique to verify the absence of bugs in a program by reasoning from **first principles**. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.\\n\\nWe will present this idea in detail and illustrate how it works for a very simple example.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Use of formal verification\\n\\nWe typically use formal verification for critical applications, where either:\\n\\n- life is at stake, like in the case of trains, airplanes, medical devices, or\\n- money is at stake, like in the case of financial applications.\\n\\nWith formal verification, in theory, **we can guarantee that the software will never fail**, as we can check **all possible cases** for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions.\\n\\nIn this research paper [Finding and Understanding Bugs in C Compilers](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf), no bugs were found in the middle-end of the formally verified [CompCert](https://en.wikipedia.org/wiki/CompCert) C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!\\n\\n## Definition of programming languages\\n\\nTo be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the `if` statement is [defined in the reference manual](https://docs.python.org/3/reference/compound_stmts.html#if) by:\\n\\n```python\\nif_stmt ::= \\"if\\" assignment_expression \\":\\" suite\\n (\\"elif\\" assignment_expression \\":\\" suite)*\\n [\\"else\\" \\":\\" suite]\\n```\\n> It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed.\\n\\nThis means that the Python code:\\n\\n```python\\nif condition:\\n a\\nelse:\\n b\\n```\\n\\nwill execute `a` when the `condition` is true, and `b` otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...).\\n\\nTo make these rules more manageable, we generally split them into two parts:\\n\\n- The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the [grammar](https://docs.python.org/3/reference/grammar.html).\\n- The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the `if` statement.\\n\\nIn formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating \\"syntax errors\\" in case of ill-formed programs.\\n\\n## Example to verify\\n\\nWe consider this short Python example of a function returning the maximum number in a list:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n for x in l:\\n if x > m:\\n m = x\\n return m\\n```\\n\\nWe assume that the list `l` is not empty and only contains integers. If we run it on a few examples:\\n\\n```python\\nmy_max([1, 2, 3]) # => 3\\nmy_max([3, 2, 1]) # => 3\\nmy_max([1, 3, 2]) # => 3\\n```\\n\\nit always returns `3`, the biggest number in the list! But can we make sure this is always the case?\\n\\nWe can certainly not run `my_max` on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning.\\n\\n## Formal verification\\n\\nHere is a general specification that we give of the `my_max` function above:\\n\\n```python\\nforall (index : int) (l : list[int]),\\n 0 \u2264 index < len(l) \u21d2\\n l[index] \u2264 my_max(l)\\n```\\n\\nIt says that for all integer `index` and list of integers `l`, if the index is valid (between `0` and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute.\\n\\nTo verify this property for all possible list `l`, we reason by induction. A non-empty list is either:\\n\\n- a list with one element, where the maximum is the only element, or\\n- a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list.\\n\\nAt the start of the code, we will always have:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n```\\n\\nwith `m` being equal to the first item of the list. Then:\\n\\n- If the list has only one element, we iterate only once in the `for` loop, with `x` equal to `l[0]`. The condition:\\n ```python\\n if x > m:\\n ```\\n is then equivalent to:\\n ```python\\n if l[0] > l[0]:\\n ```\\n and is always false. We then return `m = l[0]`, which is the only element of the list, and it verifies our property as:\\n ```python\\n l[0] \u2264 l[0]\\n ```\\n- If the list has at least two elements, we unroll the code execution of the `for` loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:\\n ```python\\n l[index] \u2264 m\\n ```\\n for all `index` between `0` and `len(l) - 2`. When we reach the last element, we have:\\n ```python\\n if x > m:\\n m = x\\n ```\\n with `x` being `l[len(l) - 1]`. There are two possibilities. Either *(i)* `x` is less than or equal to `m`, and we do not update `m`, or *(ii)* `x` is greater than `m`, and we update `m` to `x`. In both cases, the property is verified for the last element of the list, as:\\n 1. In the first case, `m` stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last `if` statement.\\n 2. In the second case, `m` is updated to `x`, which is the last element of the list and a greater value than the original `m`. Then it means that `m` is still larger or equal to all the elements of the list except the last one, being larger that the original `m`, and larger or equal to the last one as it is in fact equals to the last one.\\n\\nWe have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code.\\n\\nIn practice, with formal verification, the reasoning above is done in a proof assistance such as [Coq](https://coq.inria.fr/) to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details.\\n\\n## Completing the property\\n\\nAn additional property that we did not verify is:\\n\\n```python\\nforall (l : list[int]),\\n exists (index : int),\\n 0 \u2264 index < len(l) and\\n l[index] = my_max(l)\\n```\\n\\nIt says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise.\\n\\n:::info For more\\n\\nIf you want to go into more details for the formal verification of Python programs, you can look at our [coq-of-python](https://github.com/formal-land/coq-of-python) project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for [Rust](https://github.com/formal-land/coq-of-rust) and other languages like [OCaml](https://github.com/formal-land/coq-of-ocaml). Contact us at [contact@formal.land](mailto:contact@formal.land) to discuss!\\n\\n:::\\n\\n## Conclusion\\n\\nWe have presented here the idea of **formal verification**, a technique to verify the absence of bugs in a program by reasoning from **first principles**. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct **for all possible lists of integers**.\\n\\nWe will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"},{"id":"/2024/06/05/software-correctness-from-first-principles","metadata":{"permalink":"/blog/2024/06/05/software-correctness-from-first-principles","source":"@site/blog/2024-06-05-software-correctness-from-first-principles.md","title":"\ud83e\udd84 Software correctness from first principles","description":"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.","date":"2024-06-05T00:00:00.000Z","formattedDate":"June 5, 2024","tags":[{"label":"formal verification","permalink":"/blog/tags/formal-verification"},{"label":"software correctness","permalink":"/blog/tags/software-correctness"},{"label":"first principles","permalink":"/blog/tags/first-principles"},{"label":"example","permalink":"/blog/tags/example"},{"label":"Python","permalink":"/blog/tags/python"}],"readingTime":7.26,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd84 Software correctness from first principles","tags":["formal verification","software correctness","first principles","example","Python"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/formal-verification-for-software-correctness"},"nextItem":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace"}},"content":"**Formal verification** is a technique to verify the absence of bugs in a program by reasoning from **first principles**. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.\\n\\nWe will present this idea in detail and illustrate how it works for a very simple example.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Use of formal verification\\n\\nWe typically use formal verification for critical applications, where either:\\n\\n- life is at stake, like in the case of trains, airplanes, medical devices, or\\n- money is at stake, like in the case of financial applications.\\n\\nWith formal verification, in theory, **we can guarantee that the software will never fail**, as we can check **all possible cases** for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions.\\n\\nIn this research paper [Finding and Understanding Bugs in C Compilers](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf), no bugs were found in the middle-end of the formally verified [CompCert](https://en.wikipedia.org/wiki/CompCert) C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!\\n\\n## Definition of programming languages\\n\\nTo be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the `if` statement is [defined in the reference manual](https://docs.python.org/3/reference/compound_stmts.html#if) by:\\n\\n```python\\nif_stmt ::= \\"if\\" assignment_expression \\":\\" suite\\n (\\"elif\\" assignment_expression \\":\\" suite)*\\n [\\"else\\" \\":\\" suite]\\n```\\n> It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed.\\n\\nThis means that the Python code:\\n\\n```python\\nif condition:\\n a\\nelse:\\n b\\n```\\n\\nwill execute `a` when the `condition` is true, and `b` otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...).\\n\\nTo make these rules more manageable, we generally split them into two parts:\\n\\n- The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the [grammar](https://docs.python.org/3/reference/grammar.html).\\n- The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the `if` statement.\\n\\nIn formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating \\"syntax errors\\" in case of ill-formed programs.\\n\\n## Example to verify\\n\\nWe consider this short Python example of a function returning the maximum number in a list:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n for x in l:\\n if x > m:\\n m = x\\n return m\\n```\\n\\nWe assume that the list `l` is not empty and only contains integers. If we run it on a few examples:\\n\\n```python\\nmy_max([1, 2, 3]) # => 3\\nmy_max([3, 2, 1]) # => 3\\nmy_max([1, 3, 2]) # => 3\\n```\\n\\nit always returns `3`, the biggest number in the list! But can we make sure this is always the case?\\n\\nWe can certainly not run `my_max` on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning.\\n\\n## Formal verification\\n\\nHere is a general specification that we give of the `my_max` function above:\\n\\n```python\\nforall (index : int) (l : list[int]),\\n 0 \u2264 index < len(l) \u21d2\\n l[index] \u2264 my_max(l)\\n```\\n\\nIt says that for all integer `index` and list of integers `l`, if the index is valid (between `0` and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute.\\n\\nTo verify this property for all possible list `l`, we reason by induction. A non-empty list is either:\\n\\n- a list with one element, where the maximum is the only element, or\\n- a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list.\\n\\nAt the start of the code, we will always have:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n```\\n\\nwith `m` being equal to the first item of the list. Then:\\n\\n- If the list has only one element, we iterate only once in the `for` loop, with `x` equal to `l[0]`. The condition:\\n ```python\\n if x > m:\\n ```\\n is then equivalent to:\\n ```python\\n if l[0] > l[0]:\\n ```\\n and is always false. We then return `m = l[0]`, which is the only element of the list, and it verifies our property as:\\n ```python\\n l[0] \u2264 l[0]\\n ```\\n- If the list has at least two elements, we unroll the code execution of the `for` loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:\\n ```python\\n l[index] \u2264 m\\n ```\\n for all `index` between `0` and `len(l) - 2`. When we reach the last element, we have:\\n ```python\\n if x > m:\\n m = x\\n ```\\n with `x` being `l[len(l) - 1]`. There are two possibilities. Either *(i)* `x` is less than or equal to `m`, and we do not update `m`, or *(ii)* `x` is greater than `m`, and we update `m` to `x`. In both cases, the property is verified for the last element of the list, as:\\n 1. In the first case, `m` stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last `if` statement.\\n 2. In the second case, `m` is updated to `x`, which is the last element of the list and a greater value than the original `m`. Then it means that `m` is still larger or equal to all the elements of the list except the last one, being larger that the original `m`, and larger or equal to the last one as it is in fact equals to the last one.\\n\\nWe have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code.\\n\\nIn practice, with formal verification, the reasoning above is done in a proof assistance such as [Coq](https://coq.inria.fr/) to help making sure that we did not forget any case and add automation for most simple cases. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, or some third-party person to check it without reading all the details.\\n\\n## Completing the property\\n\\nAn additional property that we did not verify is:\\n\\n```python\\nforall (l : list[int]),\\n exists (index : int),\\n 0 \u2264 index < len(l) and\\n l[index] = my_max(l)\\n```\\n\\nIt says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise.\\n\\n:::info Contact\\n\\nIf you want to go into more details for the formal verification of Python programs, you can look at our [coq-of-python](https://github.com/formal-land/coq-of-python) project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for [Rust](https://github.com/formal-land/coq-of-rust) and other languages like [OCaml](https://github.com/formal-land/coq-of-ocaml). Contact us at [contact@formal.land](mailto:contact@formal.land) to discuss!\\n\\n:::\\n\\n## Conclusion\\n\\nWe have presented here the idea of **formal verification**, a technique to verify the absence of bugs in a program by reasoning from **first principles**. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct **for all possible lists of integers**.\\n\\nWe will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"},{"id":"/2024/05/22/translation-of-python-code-simulations-from-trace","metadata":{"permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace","source":"@site/blog/2024-05-22-translation-of-python-code-simulations-from-trace.md","title":"\ud83d\udc0d Simulation of Python code from traces in Coq","description":"In order to formally verify Python code in Coq our approach is the following:","date":"2024-05-22T00:00:00.000Z","formattedDate":"May 22, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"},{"label":"simulation","permalink":"/blog/tags/simulation"},{"label":"trace","permalink":"/blog/tags/trace"}],"readingTime":8.59,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum","simulation","trace"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/software-correctness-from-first-principles"},"nextItem":{"title":"\ud83d\udc0d Simulation of Python code in Coq","permalink":"/blog/2024/05/14/translation-of-python-code-simulations"}},"content":"In order to formally verify Python code in Coq our approach is the following:\\n\\n1. Import Python code in Coq by running [coq-of-python](https://github.com/formal-land/coq-of-python).\\n2. Write a purely functional simulation in Coq of the code.\\n3. Show that this simulation is equivalent to the translation.\\n4. Verify the simulation.\\n\\nWe will show in this article how we can merge the steps 2. and 3. to save time in the verification process. We do so by relying on the proof mode of Coq and unification.\\n\\nOur mid-term goal is to formally specify the [Ethereum Virtual Machine](https://ethereum.org/en/developers/docs/evm/) (EVM) and prove that this specification is correct according to [reference implementation of the EVM](https://github.com/ethereum/execution-specs) in Python. This would ensure that it is always up-to-date and exhaustive. The code of this project is open-source and available on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python).\\n\\n\x3c!-- truncate --\x3e\\n\\n
\\n ![Python at work](2024-05-22/python.webp)\\n
\\n\\n## Our Python\'s monad \ud83d\udc0d\\n\\nWe put the Python code that we import in Coq in a monad `M` to represent all the features that are hard to express in Coq, mainly the side effects. This monad is a combination of two levels:\\n\\n- `LowM` for the side effects except the control flow.\\n- `M` that adds an error monad on top of `LowM` to handle the control flow (exceptions, `break` instruction, ...).\\n\\n### LowM\\n\\nHere is the definition of the `LowM` monad in [CoqOfPython.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/CoqOfPython.v):\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set -> Set :=\\n | StateAlloc (object : Object.t Value.t) : t (Pointer.t Value.t)\\n | StateRead (mutable : Pointer.Mutable.t Value.t) : t (Object.t Value.t)\\n | StateWrite (mutable : Pointer.Mutable.t Value.t) (update : Object.t Value.t) : t unit\\n | GetInGlobals (globals : Globals.t) (name : string) : t Value.t.\\nEnd Primitive.\\n\\nModule LowM.\\n Inductive t (A : Set) : Set :=\\n | Pure (a : A)\\n | CallPrimitive {B : Set} (primitive : Primitive.t B) (k : B -> t A)\\n | CallClosure {B : Set} (closure : Data.t Value.t) (args kwargs : Value.t) (k : B -> t A)\\n | Impossible.\\n Arguments Pure {_}.\\n Arguments CallPrimitive {_ _}.\\n Arguments CallClosure {_ _}.\\n Arguments Impossible {_}.\\n\\n Fixpoint bind {A B : Set} (e1 : t A) (e2 : A -> t B) : t B :=\\n match e1 with\\n | Pure a => e2 a\\n | CallPrimitive primitive k => CallPrimitive primitive (fun v => bind (k v) e2)\\n | CallClosure closure args kwargs k => CallClosure closure args kwargs (fun a => bind (k a) e2)\\n | Impossible => Impossible\\n end.\\nEnd LowM.\\n```\\n\\nThis is a monad defined by continuation (the variable `k`):\\n\\n- We terminate a computation with the primitive `Pure` and some result `a`, that can be any purely functional expression.\\n- We can call some primitives grouped in `Primitive.t` that are side effects:\\n - `StateAlloc` to allocate a new object in the memory,\\n - `StateRead` to read an object from the memory,\\n - `StateWrite` to write an object in the memory,\\n - `GetInGlobals` to read a global variable, doing name resolution. This is a side effects as function definitions in Python do not need to be ordered.\\n- We can call a closure (an anonymous function) with `CallClosure`. This is required for termination, as we cannot define an eval function on the type of Python values since some do not terminate like the [\u03a9 expression](https://medium.com/@dkeout/why-you-must-actually-understand-the-%CF%89-and-y-combinators-c9204241da7a). See our previous post [Translation of Python code to Coq](/blog/2024/05/10/translation-of-python-code) for our definition of Python values. The combinator `CallClosure` is also very convenient to modularize our proofs: we reason on each closure independently.\\n- We can mark a code path as unreachable with `Impossible`.\\n\\n### M\\n\\nThe final monad `M` is defined as:\\n\\n```coq\\nDefinition M : Set :=\\n LowM.t (Value.t + Exception.t).\\n```\\n\\nIt has no parameters as Python is untyped, so all expressions have the same result type:\\n\\n- either a success value of type `Value.t`,\\n- or an exception of type `Exception.t`, with some special cases to represent a `return`, a `break`, or a `continue` instruction.\\n\\nWe define the monadic bind of `M` like for the error monad:\\n\\n```coq\\nDefinition bind (e1 : M) (e2 : Value.t -> M) : M :=\\n LowM.bind e1 (fun v => match v with\\n | inl v => e2 v\\n | inr e => LowM.Pure (inr e)\\n end).\\n```\\n\\n## Traces \ud83d\udc3e\\n\\nWe define our semantics of a computation `e` of type `M` in [simulations/proofs/CoqOfPython.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/simulations/proofs/CoqOfPython.v) with the predicate:\\n\\n```coq\\n{{ stack, heap | e \u21d3 to_value | P_stack, P_heap }}\\n```\\n\\nthat we call a _run_ or a _trace_, saying that:\\n\\n- starting from the initial state `stack`, `heap`,\\n- the computation `e` terminates with a value,\\n- that is in the image of the function `to_value`,\\n- and with a final stack and heap that satisfy the predicates `P_stack` and `P_heap`.\\n\\nNote that we do not explicit the resulting value and memory state of a computation in this predicate. We only say that it exists and verifies a few properties, that are here for compositionality. We have a purely functional function `evaluate` that can derive the result of a run of a computation:\\n\\n```coq\\nevaluate :\\n forall `{Heap.Trait} {A B : Set}\\n {stack : Stack.t} {heap : Heap} {e : LowM.t B}\\n {to_value : A -> B} {P_stack : Stack.t -> Prop} {P_heap : Heap -> Prop}\\n (run : {{ stack, heap | e \u21d3 to_value | P_stack, P_heap }}),\\n A * { stack : Stack.t | P_stack stack } * { heap : Heap | P_heap heap }\\n```\\n\\nThe function `evaluate` is defined in Coq by a `Fixpoint`. Its result is what we call a _simulation_, which is a purely functional definition equivalent to the orignal computation `e` from Python. It is equivalent by construction.\\n\\n## Building a trace \ud83d\udd28\\n\\nA trace is an inductive in `Set` that we can build with the following constructors:\\n\\n```coq\\nInductive t `{Heap.Trait} {A B : Set}\\n (stack : Stack.t) (heap : Heap)\\n (to_value : A -> B) (P_stack : Stack.t -> Prop) (P_heap : Heap -> Prop) :\\n LowM.t B -> Set :=\\n(* [Pure] primitive *)\\n| Pure\\n (result : A)\\n (result\' : B) :\\n result\' = to_value result ->\\n P_stack stack ->\\n P_heap heap ->\\n {{ stack, heap |\\n LowM.Pure result\' \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* [StateRead] primitive *)\\n| CallPrimitiveStateRead\\n (mutable : Pointer.Mutable.t Value.t)\\n (object : Object.t Value.t)\\n (k : Object.t Value.t -> LowM.t B) :\\n IsRead.t stack heap mutable object ->\\n {{ stack, heap |\\n k object \u21d3\\n to_value\\n | P_stack, P_heap }} ->\\n {{ stack, heap |\\n LowM.CallPrimitive (Primitive.StateRead mutable) k \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* [CallClosure] primitive *)\\n| CallClosure {C : Set}\\n (f : Value.t -> Value.t -> M)\\n (args kwargs : Value.t)\\n (to_value_inter : C -> Value.t + Exception.t)\\n (P_stack_inter : Stack.t -> Prop) (P_heap_inter : Heap -> Prop)\\n (k : Value.t + Exception.t -> LowM.t B) :\\n let closure := Data.Closure f in\\n {{ stack, heap |\\n f args kwargs \u21d3\\n to_value_inter\\n | P_stack_inter, P_heap_inter }} ->\\n (* We quantify over every possible values as we cannot compute the result of the closure here.\\n We only know that it exists and respects some constraints in this inductive definition. *)\\n (forall value_inter stack_inter heap_inter,\\n P_stack_inter stack_inter ->\\n P_heap_inter heap_inter ->\\n {{ stack_inter, heap_inter |\\n k (to_value_inter value_inter) \u21d3\\n to_value\\n | P_stack, P_heap }}\\n ) ->\\n {{ stack, heap |\\n LowM.CallClosure closure args kwargs k \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* ...cases for the other primitives of the monad... *)\\n```\\n\\n### Pure\\n\\nIn the `Pure` case we return the final result of the computation. We check the state fulfills the predicate `P_stack` and `P_heap`, and that the result is the image by the function `to_value` of some `result`.\\n\\n### CallPrimitiveStateRead\\n\\nTo read a value in memory, we rely on another predicate `IsRead` that checks if the `mutable` pointer is valid in the `stack` or `heap` and that the `object` is the value at this pointer. We then call the continuation `k` with this object. We have similar rules for allocating a new object in memory and writing at a pointer.\\n\\nNote that we parameterize all our semantics by `` `{Heap.Trait}`` that provides a specific `Heap` type with read and write primitives. We can choose the implementation of the memory model that we want to use in our simulations in order to simplify the reasoning.\\n\\n### CallClosure\\n\\nTo call a closure, we first evaluate the closure with the arguments and keyword arguments. We then call the continuation `k` with the result of the closure. We quantify over all possible results of the closure, as we cannot compute it here. This would require to be able to define `Fixpoint` together with `Inductive`, which is not possible in Coq. So we only know that the result of the closure exists, and can use the constraints on its result (the function `to_value` and the predicates `P_stack_inter` and `P_heap_inter`) to build a run of the continuation.\\n\\nThe other constructors are not presented here but are similar to the above. We will also add a monadic primitive for loops with the following idea: we show that a loop terminates by building a trace, as traces are `Inductive` so must be finite. We have no rules for the `Impossible` case so that building the trace of a computation also shows that the `Impossible` calls are in unreachable paths.\\n\\n## Example \ud83d\udd0d\\n\\nWe have applied these technique to a small code example with allocation, memory read, and closure call primitives. We were able to show that the resulting simulation obtained by running `evaluate` on the trace is equal to a simulation written by hand. The proof was just the tactic `reflexivity`. We believe that we can automate most of the tactics used to build a run, except for the allocations were the user needs to make a choice (immediate, stack, or heap allocation, which address, ...).\\n\\nTo continue our experiments we now need to complete our semantics of Python, especially to take into account method and operator calls.\\n\\n## Conclusion\\n\\nWe have presented an alternative way to build simulations of imperative Python code in purely functional Coq code. The idea is to enable faster reasoning over Python code by removing the need to build explicit simulations. We plan to port this technique to other tools like [coq-of-rust](https://github.com/formal-land/coq-of-rust) as well.\\n\\nTo see what we can do for you talk with us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7. For our previous projects, see our [formal verification of the Tezos\' L1](https://formal-land.gitlab.io/coq-tezos-of-ocaml/)!"},{"id":"/2024/05/14/translation-of-python-code-simulations","metadata":{"permalink":"/blog/2024/05/14/translation-of-python-code-simulations","source":"@site/blog/2024-05-14-translation-of-python-code-simulations.md","title":"\ud83d\udc0d Simulation of Python code in Coq","description":"We are continuing to specify the Ethereum Virtual Machine (EVM) in the formal verification language Coq. We are working from the automatic translation in Coq of the reference implementation of the EVM, which is written in the language Python.","date":"2024-05-14T00:00:00.000Z","formattedDate":"May 14, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"}],"readingTime":6.63,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Simulation of Python code in Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace"},"nextItem":{"title":"\ud83d\udc0d Translation of Python code to Coq","permalink":"/blog/2024/05/10/translation-of-python-code"}},"content":"We are continuing to specify the [Ethereum Virtual Machine](https://ethereum.org/en/developers/docs/evm/) (EVM) in the formal verification language [Coq](https://coq.inria.fr/). We are working from the [automatic translation in Coq](https://github.com/formal-land/coq-of-python/tree/main/CoqOfPython/ethereum) of the [reference implementation of the EVM](https://github.com/ethereum/execution-specs), which is written in the language [Python](https://www.python.org/).\\n\\nIn this article, we will see how we specify the EVM in Coq by writing an interpreter that closely mimics the behavior of the Python code. We call that implementation a _simulation_ as it aims to reproduce the behavior of the Python code, the reference.\\n\\nIn contrast to the automatic translation from Python, the simulation is a manual translation written in idiomatic Coq. We expect it to be ten times smaller in lines compared to the automatic translation, and of about the same size as the Python code. This is because the automatic translation needs to encode all the Python specific features in Coq, like variable mutations and the class system.\\n\\nIn the following article, we will show how we can prove that the simulation is correct, meaning that it behaves exactly as the automatic translation.\\n\\nThe code of this project is open-source and available on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python). This work follows a call from [Vitalik Buterin](https://en.wikipedia.org/wiki/Vitalik_Buterin) for more formal verification of the Ethereum\'s code.\\n\\n\x3c!-- truncate --\x3e\\n\\n
\\n ![Python writing simulations](2024-05-14/python_simulation.webp)\\n
\\n\\n## The `add` function \ud83e\uddee\\n\\nWe focus on a simulation for the `add` function in [vm/instructions/arithmetic.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/paris/vm/instructions/arithmetic.py) that implements the addition primitive of the EVM. The Python code is:\\n\\n```python\\ndef add(evm: Evm) -> None:\\n \\"\\"\\"\\n Adds the top two elements of the stack together, and pushes the result back\\n on the stack.\\n\\n Parameters\\n ----------\\n evm :\\n The current EVM frame.\\n\\n \\"\\"\\"\\n # STACK\\n x = pop(evm.stack)\\n y = pop(evm.stack)\\n\\n # GAS\\n charge_gas(evm, GAS_VERY_LOW)\\n\\n # OPERATION\\n result = x.wrapping_add(y)\\n\\n push(evm.stack, result)\\n\\n # PROGRAM COUNTER\\n evm.pc += 1\\n```\\n\\nMost of the functions of the interpreter are written in this style. They take the global state of the interpreter, called `Evm` as input, and mutate it with the effect of the current instruction.\\n\\nThe `Evm` structure is defined as:\\n\\n```python\\n@dataclass\\nclass Evm:\\n \\"\\"\\"The internal state of the virtual machine.\\"\\"\\"\\n\\n pc: Uint\\n stack: List[U256]\\n memory: bytearray\\n code: Bytes\\n gas_left: Uint\\n env: Environment\\n valid_jump_destinations: Set[Uint]\\n logs: Tuple[Log, ...]\\n refund_counter: int\\n running: bool\\n message: Message\\n output: Bytes\\n accounts_to_delete: Set[Address]\\n touched_accounts: Set[Address]\\n return_data: Bytes\\n error: Optional[Exception]\\n accessed_addresses: Set[Address]\\n accessed_storage_keys: Set[Tuple[Address, Bytes32]]\\n```\\n\\nIt contains the current instruction pointer `pc`, the stack of the EVM, the memory, the code, the gas left, ...\\n\\nAs the EVM is a stack-based machine, the addition function does the following:\\n\\n1. It pops the two top elements of the stack `x` and `y`,\\n2. It charges a very low amount of gas,\\n3. It computes the result of the addition `result = x + y`,\\n4. It pushes the result back on the stack,\\n5. It increments the program counter `pc`.\\n\\nNote that all these operations might fail and raise an exception, for example,if the stack is empty when we pop `x`and `y` at the beginning.\\n\\n## Monad for the simulations \ud83e\uddea\\n\\nThe main side-effects that we want to integrate into the Coq simulations are:\\n\\n- the mutation of the global state `Evm`,\\n- the raising of exceptions.\\n\\nFor that, we use a state and error monad `MS?`:\\n\\n```coq\\nModule StateError.\\n Definition t (State Error A : Set) : Set :=\\n State -> (A + Error) * State.\\n\\n Definition return_ {State Error A : Set}\\n (value : A) :\\n t State Error A :=\\n fun state => (inl value, state).\\n\\n Definition bind {State Error A B : Set}\\n (value : t State Error A)\\n (f : A -> t State Error B) :\\n t State Error B :=\\n fun state =>\\n let (value, state) := value state in\\n match value with\\n | inl value => f value state\\n | inr error => (inr error, state)\\n end.\\nEnd StateError.\\n\\nNotation \\"MS?\\" := StateError.t.\\n```\\n\\nWe parametrize it by an equivalent definition in Coq of the type `Evm` and the type of exceptions that we might raise.\\n\\nIn Python the exceptions are a class that is extended as needed to add new kinds of exceptions. We use a closed sum type in Coq to represent the all possible exceptions that might happen in the EVM interpreter.\\n\\nFor the `Evm` state, some functions might actually only modify a part of it. For example, the `pop` function only modifies the `stack` field. We use a mechanism of [lens](https://medium.com/javascript-scene/lenses-b85976cb0534) to specialize the state monad to only modify a part of the state. For example, the `pop` function has the type:\\n\\n```coq\\npop : MS? (list U256.t) Exception.t U256.t\\n```\\n\\nwhere `list U256.t` is the type of the stack, while the `add` function has type:\\n\\n```coq\\nadd : MS? Evm.t Exception.t unit\\n```\\n\\nWe define a lens for the stack in the `Evm` type with:\\n\\n```coq\\nModule Lens.\\n Record t (Big_A A : Set) : Set := {\\n read : Big_A -> A;\\n write : Big_A -> A -> Big_A\\n }.\\nEnd Lens.\\n\\nModule Evm.\\n Module Lens.\\n Definition stack : Lens.t Evm.t (list U256.t) := {|\\n Lens.read := (* ... *);\\n Lens.write := (* ... *);\\n |}.\\n```\\n\\nWe can then lift the `pop` function to be used in a context where the `Evm` state is modified with:\\n\\n```coq\\nletS? x := StateError.lift_lens Evm.Lens.stack pop in\\n```\\n\\n## Typing discipline \ud83d\udc6e\\n\\nWe keep in Coq all the type names from the Python source code. When a new class is created we create a new Coq type. When the class inherits from another one, we add a field in the Coq type to represent the parent class. Thus we work by composition rather than inheritance.\\n\\nHere is an example of the primitive types defined in [base_types.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/base_types.py):\\n\\n```python\\nclass FixedUint(int):\\n MAX_VALUE: ClassVar[\\"FixedUint\\"]\\n\\n # ...\\n\\n def __add__(self: T, right: int) -> T:\\n # ...\\n\\nclass U256(FixedUint):\\n MAX_VALUE = 2**256 - 1\\n\\n # ...\\n```\\n\\nWe simulate it by:\\n\\n```coq\\nModule FixedUint.\\n Record t : Set := {\\n MAX_VALUE : Z;\\n value : Z;\\n }.\\n\\n Definition __add__ (self right_ : t) : M? Exception.t t :=\\n (* ... *).\\nEnd FixedUint.\\n\\nModule U256.\\n Inductive t : Set :=\\n | Make (value : FixedUint.t).\\n\\n Definition of_Z (value : Z) : t :=\\n Make {|\\n FixedUint.MAX_VALUE := 2^256 - 1;\\n FixedUint.value := value;\\n |}.\\n\\n (* ... *)\\nEnd U256.\\n```\\n\\nFor the imports, that are generally written with an explicit list of names:\\n\\n```python\\nfrom ethereum.base_types import U255_CEIL_VALUE, U256, U256_CEIL_VALUE, Uint\\n```\\n\\nwe follow the same pattern in Coq:\\n\\n```coq\\nRequire ethereum.simulations.base_types.\\nDefinition U255_CEIL_VALUE := base_types.U255_CEIL_VALUE.\\nModule U256 := base_types.U256.\\nDefinition U256_CEIL_VALUE := base_types.U256_CEIL_VALUE.\\nModule Uint := base_types.Uint.\\n```\\n\\nThis is a bit more verbose than the usual way in Coq to import a module, but it makes the translation more straightforward.\\n\\n## Final simulation \ud83e\udeb6\\n\\nFinally, our Coq simulation of the `add` function is the following:\\n\\n```coq\\nDefinition add : MS? Evm.t Exception.t unit :=\\n (* STACK *)\\n letS? x := StateError.lift_lens Evm.Lens.stack pop in\\n letS? y := StateError.lift_lens Evm.Lens.stack pop in\\n\\n (* GAS *)\\n letS? _ := charge_gas GAS_VERY_LOW in\\n\\n (* OPERATION *)\\n let result := U256.wrapping_add x y in\\n\\n letS? _ := StateError.lift_lens Evm.Lens.stack (push result) in\\n\\n (* PROGRAM COUNTER *)\\n letS? _ := StateError.lift_lens Evm.Lens.pc (fun pc =>\\n (inl tt, Uint.__add__ pc (Uint.Make 1))) in\\n\\n returnS? tt.\\n```\\n\\nWe believe that it has a size and readability close to the original Python code. You can look at this definition in [vm/instructions/simulations/arithmetic.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/ethereum/paris/vm/instructions/simulations/arithmetic.v). As a reference, the automatic translation is 65 lines long and in [vm/instructions/arithmetic.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/ethereum/paris/vm/instructions/arithmetic.v).\\n\\n## Conclusion\\n\\nWe have seen how to write a simulation for one example of a Python function. We now need to do it for the rest of the code of the interpreter. We will also see in a following article how to prove that the simulation behaves as the automatic translation of the Python code in Coq.\\n\\nFor our formal verification services, reach us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7! To know more about what we have done, see [our previous project](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the verification of the L1 of Tezos."},{"id":"/2024/05/10/translation-of-python-code","metadata":{"permalink":"/blog/2024/05/10/translation-of-python-code","source":"@site/blog/2024-05-10-translation-of-python-code.md","title":"\ud83d\udc0d Translation of Python code to Coq","description":"We are starting to work on a new product, coq-of-python. The idea of this tool is, as you can guess, to translate Python code to the proof system Coq.","date":"2024-05-10T00:00:00.000Z","formattedDate":"May 10, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"}],"readingTime":10.445,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Translation of Python code to Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Simulation of Python code in Coq","permalink":"/blog/2024/05/14/translation-of-python-code-simulations"},"nextItem":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","permalink":"/blog/2024/04/26/translation-core-alloc-crates"}},"content":"We are starting to work on a new product, [coq-of-python](https://github.com/formal-land/coq-of-python). The idea of this tool is, as you can guess, to translate Python code to the [proof system Coq](https://coq.inria.fr/).\\n\\nWe want to import specifications written in Python to a formal system like Coq. In particular, we are interested in the [reference specification](https://github.com/ethereum/execution-specs) of [Ethereum](https://ethereum.org/), which describes how [EVM smart contracts](https://ethereum.org/en/developers/docs/evm/) run. Then, we will be able to use this specification to either formally verify the various implementations of the EVM or smart contracts.\\n\\nAll this effort follows [a Tweet](https://twitter.com/VitalikButerin/status/1759369749887332577) from [Vitalik Buterin](https://en.wikipedia.org/wiki/Vitalik_Buterin) hoping for more formal verification of the Ethereum\'s code:\\n\\n> One application of AI that I am excited about is AI-assisted formal verification of code and bug finding.\\n>\\n> Right now ethereum\'s biggest technical risk probably is bugs in code, and anything that could significantly change the game on that would be amazing.\\n>\\n> — Vitalik Buterin\\n\\nWe will now describe the technical development of `coq-of-python`. For the curious, all the code is on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python).\\n\\n\x3c!-- truncate --\x3e\\n\\n
\\n ![Python with a rooster](2024-05-10/python_rooster.webp)\\n \x3c!--
A python with a rooster
--\x3e\\n
\\n\\n## Reading Python code \ud83d\udcd6\\n\\nA first step we need to do to translate Python code is to read it in a programmatic way. For simplicity and better integration, we chose to write `coq-of-python` in Python.\\n\\nWe use the [ast](https://docs.python.org/3/library/ast.html) module to parse the code and get an abstract syntax tree (AST) of the code. This is a tree representation of the code that we can manipulate in Python. We could have used other representations, such as the Python bytecode, but it seemed too low-level to be understandable by a human.\\n\\nGiven the path to a Python file, we get its AST with the following code:\\n\\n```python\\nimport ast\\n\\ndef read_python_file(path: str) -> ast.Module:\\n with open(path, \\"r\\") as file:\\n return ast.parse(file.read())\\n```\\n\\nThis code is very short, and we benefit from the general elegance of Python. There is no typing or advanced data types in Python, keeping the AST rather small. Here is an extract of it:\\n\\n```\\nexpr = BoolOp(boolop op, expr* values)\\n | NamedExpr(expr target, expr value)\\n | BinOp(expr left, operator op, expr right)\\n | UnaryOp(unaryop op, expr operand)\\n | Lambda(arguments args, expr body)\\n | IfExp(expr test, expr body, expr orelse)\\n | Dict(expr* keys, expr* values)\\n | Set(expr* elts)\\n | ListComp(expr elt, comprehension* generators)\\n | SetComp(expr elt, comprehension* generators)\\n | ... more cases ...\\n```\\n\\nAn expression is described as being of one of several kinds. For example, the application of a binary operator such as:\\n\\n```python\\n1 + 2\\n```\\n\\ncorresponds to the case `BinOp` with `1` as the `left` expression, `+` as the `op` operator, and `2` as the `right` expression.\\n\\n## Outputting Coq code \ud83d\udcdd\\n\\nWe translate each element of the Python\'s AST into a string of Coq code. We keep track of the current indentation level in order to present a nice output. Here is the code to translate the binary operator expressions:\\n\\n```python\\ndef generate_expr(indent, is_with_paren, node: ast.expr):\\n if isinstance(node, ast.BoolOp):\\n ...\\n elif isinstance(node, ast.BinOp):\\n return paren(\\n is_with_paren,\\n generate_operator(node.op) + \\" (|\\\\n\\" +\\n generate_indent(indent + 1) +\\n generate_expr(indent + 1, False, node.left) + \\",\\\\n\\" +\\n generate_indent(indent + 1) +\\n generate_expr(indent + 1, False, node.right) + \\"\\\\n\\" +\\n generate_indent(indent) + \\"|)\\"\\n )\\n elif ...\\n```\\n\\nWe have the current number of indentation levels in the `indent` variable. We use the flag `is_with_paren` to know whether we should add parenthesis around the current expression if it is the sub-expression of another one.\\n\\nWe apply the `node.op` operator on the two parameters `node.left` and `node.right`. For example, the translation of the Python code `1 + 2` will be:\\n\\n```coq\\nBinOp.add (|\\n Constant.int 1,\\n Constant.int 2\\n|)\\n```\\n\\nWe use a special notation `f (| x1, ..., xn |)` to represent a function application in a monadic context. In the next section, we explain why we need this notation.\\n\\n## Monad and values \ud83d\udd2e\\n\\nOne of the difficulties in translating some code to a language such as Coq is that Coq is purely functional. This means that a function can never modify a variable or raise an exception. The non-purely functional actions are called side-effects.\\n\\nTo solve this issue, we represent the side-effects of the Python code in a [monad]() in Coq. A monad is a special data structure representing the side-effects of a computation. We can chain monadic actions together to represent a sequence of side-effects.\\n\\nWe thus have two Coq types:\\n\\n- `Value.t` for the Python values (there is only one type for all values, as Python is a dynamically typed language),\\n- `M` for the monadic expressions.\\n\\nNote that we do not need to parametrize the monad by the type of the values, as we only have one type of value.\\n\\n### Values\\n\\nAccording to the reference manual of Python on the [data model](https://docs.python.org/3/reference/datamodel.html):\\n\\n> All data in a Python program is represented by objects or by relations between objects.\\n\\n> Every object has an identity, a type and a value. An object\u2019s identity never changes once it has been created; you may think of it as the object\u2019s address in memory.\\n\\n> Like its identity, an object\u2019s type is also unchangeable.\\n\\n> The value of some objects can change. Objects whose value can change are said to be mutable; objects whose value is unchangeable once they are created are called immutable.\\n\\nBy following this description, we propose this formalization for the values:\\n\\n```coq\\nModule Data.\\n Inductive t (Value : Set) : Set :=\\n | Ellipsis\\n | Bool (b : bool)\\n | Integer (z : Z)\\n | Tuple (items : list Value)\\n (* ... various other primitive types like lists, ... *)\\n | Closure {Value M : Set} (f : Value -> Value -> M)\\n | Klass {Value M : Set}\\n (bases : list (string * string))\\n (class_methods : list (string * (Value -> Value -> M)))\\n (methods : list (string * (Value -> Value -> M))).\\nEnd Data.\\n\\nModule Object.\\n Record t {Value : Set} : Set := {\\n internal : option (Data.t Value);\\n fields : list (string * Value);\\n }.\\nEnd Object.\\n\\nModule Pointer.\\n Inductive t (Value : Set) : Set :=\\n | Imm (data : Object.t Value)\\n | Mutable {Address A : Set}\\n (address : Address)\\n (to_object : A -> Object.t Value).\\nEnd Pointer.\\n\\nModule Value.\\n Inductive t : Set :=\\n | Make (globals : string) (klass : string) (value : Pointer.t t).\\nEnd Value.\\n```\\n\\nWe describe a `Value.t` by:\\n\\n- its type, given by a class name `klass` and a module name `globals` from which the class is defined,\\n- its value, given by a pointer to an object.\\n\\nA `Pointer.t` is either an immutable object `Imm` or a mutable object `Mutable` with an address and a function to get the object from what is stored in the memory. This function `to_object` is required as we plan to allow the user to provide its own custom memory model.\\n\\nAn `Object.t` has a list of named fields that we can populate in the `__init__` method of a class. It also has a special `internal` field that we can use to store special kinds of data, like primitive values.\\n\\nIn `Data.t`, we list the various primitive values that we use to define the primitive types of the Python language. We have:\\n\\n- atomic values such as booleans, integers, strings, ...\\n- composite values such as tuples, lists, dictionaries, ...\\n- closures with a function that takes the two arguments `*args` and `**kwargs` and returns a monadic value,\\n- classes with their bases, class methods, and instance methods.\\n\\n### Monad\\n\\nFor now, we axiomatize the monad `M`:\\n\\n```coq\\nParameter M : Set.\\n```\\n\\nWe will see later how to define it, probably by taking some inspiration from our monad from our similar project [coq-of-rust](https://github.com/formal-land/coq-of-rust).\\n\\nTo make the monadic code less heavy, we use a notation inspired by the `async/await` notation of many languages. We believe it to be less heavy than the monadic notation of languages like [Haskell](https://www.haskell.org/). We note:\\n\\n```coq\\nf (| x1, ..., xn |)\\n```\\n\\nto call a function `f` of type:\\n\\n```coq\\nValue.t -> ... -> Value.t -> M\\n```\\n\\nwith the arguments `x1`, ..., `xn` of type `Value.t` and binds its result to the current continuation in the context of the tactic `ltac:(M.monadic ...)`. See our blog post [Monadic notation for the Rust translation](/blog/2024/04/03/monadic-notation-for-rust-translation) for more information.\\n\\nIn summary:\\n\\n- `f (| x1, ..., xn |)` is like `await`,\\n- `ltac:(M.monadic ...)` is like `async`.\\n\\n## Handling of the names \ud83c\udff7\ufe0f\\n\\nNow we talk about how we handle the variable names and link them to their definitions. In the reference manual of Python, the part [Execution model](https://docs.python.org/3/reference/executionmodel.html) gives some information.\\n\\nFor now, we distinguish between two scopes, the global one (top-level definitions) and the local one for variables defined in a function. We might introduce a stack of local scopes to handle nested functions.\\n\\nWe name the global scope with a string, that is the path of the current file. Having absolute names helps us translating each file independently. The only file that a translated file requires is `CoqOfPython.CoqOfPython`, to have the definition of the values and the monad.\\n\\nTo translate `import` statements, we use assertions:\\n\\n```coq\\nAxiom ethereum_crypto_imports_elliptic_curve :\\n IsImported globals \\"ethereum.crypto\\" \\"elliptic_curve\\".\\nAxiom ethereum_crypto_imports_finite_field :\\n IsImported globals \\"ethereum.crypto\\" \\"finite_field\\".\\n```\\n\\nThis represents:\\n\\n```python\\nfrom . import elliptic_curve, finite_field\\n```\\n\\nIt means that in the current global scope `globals` we can use the name `\\"elliptic_curve\\"` from the other global scope `\\"ethereum.crypto\\"`.\\n\\nWe set the local scope at the entry of a function with the call:\\n\\n```coq\\nM.set_locals (| args, kwargs, [ \\"x1\\"; ...; \\"xn\\" ] |)\\n```\\n\\nfor a function whose parameter names are `x1`, ..., `xn`. For uniformity, we always group the function\'s parameters as `*args` and `**kwargs`. We do not yet handle the default values.\\n\\nWhen a user creates or updates a local variable `x` with a value `value`, we run:\\n\\n```coq\\nM.assign_local \\"x\\" value : M\\n```\\n\\nTo read a variable, we have a primitive:\\n\\n```coq\\nM.get_name : string -> string -> M\\n```\\n\\nIt takes as a parameter the name of the current global scope and the name of the variable the are reading. The local scope should be accessible from the monad. For now all these primitives are axiomatized.\\n\\n## Some numbers \ud83d\udcca\\n\\nThe code base that we analyze, the Python specification of Ethereum, contains _28,455 lines_ of Python, excluding comments. When we translate it to Coq we obtain _299,484 lines_. This is a roughly ten times increase.\\n\\nThe generated code completely compiles. For now, we avoid some complex Python expressions, like list comprehension, by generating a dummy expression instead. Having all the code that compiles will allow us to iterate and add support for more Python features with a simple check: making sure that all the code still compiles.\\n\\nAs an example, we translate the following function:\\n\\n```python\\ndef bnf2_to_bnf12(x: BNF2) -> BNF12:\\n \\"\\"\\"\\n Lift a field element in `BNF2` to `BNF12`.\\n \\"\\"\\"\\n return BNF12.from_int(x[0]) + BNF12.from_int(x[1]) * (\\n BNF12.i_plus_9 - BNF12.from_int(9)\\n )\\n```\\n\\nto the Coq code:\\n\\n```coq\\nDefinition bnf2_to_bnf12 : Value.t -> Value.t -> M :=\\n fun (args kwargs : Value.t) => ltac:(M.monadic (\\n let _ := M.set_locals (| args, kwargs, [ \\"x\\" ] |) in\\n let _ := Constant.str \\"\\n Lift a field element in `BNF2` to `BNF12`.\\n \\" in\\n let _ := M.return_ (|\\n BinOp.add (|\\n M.call (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"from_int\\" |),\\n make_list [\\n M.get_subscript (|\\n M.get_name (| globals, \\"x\\" |),\\n Constant.int 0\\n |)\\n ],\\n make_dict []\\n |),\\n BinOp.mult (|\\n M.call (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"from_int\\" |),\\n make_list [\\n M.get_subscript (|\\n M.get_name (| globals, \\"x\\" |),\\n Constant.int 1\\n |)\\n ],\\n make_dict []\\n |),\\n BinOp.sub (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"i_plus_9\\" |),\\n M.call (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"from_int\\" |),\\n make_list [\\n Constant.int 9\\n ],\\n make_dict []\\n |)\\n |)\\n |)\\n |)\\n |) in\\n M.pure Constant.None_)).\\n```\\n\\n## Conclusion\\n\\nWe continue working on the translation from Python to Coq, especially to now add a semantics to the translation. Our next goal is to have a version, written in idiomatic Coq, of the file [src/ethereum/paris/vm/instructions/arithmetic.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/paris/vm/instructions/arithmetic.py), and proven equal to the original code. This will open the door to making a Coq specification of the EVM that is always synchronized to the Python\'s version.\\n\\nFor our services, reach us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7! We want to ensure the blockchain\'s L1 and L2 are bug-free, thanks to a mathematical analysis of the code. See [our previous project](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the L1 of Tezos."},{"id":"/2024/04/26/translation-core-alloc-crates","metadata":{"permalink":"/blog/2024/04/26/translation-core-alloc-crates","source":"@site/blog/2024-04-26-translation-core-alloc-crates.md","title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","description":"We continue our work on formal verification of Rust programs with our tool coq-of-rust, to translate Rust code to the formal proof system Coq. One of the limitation we had was the handling of primitive constructs from the standard library of Rust, like Option::unwrapordefault or all other primitive functions. For each of these functions, we had to make a Coq definition to represent its behavior. This is both tedious and error prone.","date":"2024-04-26T00:00:00.000Z","formattedDate":"April 26, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"core","permalink":"/blog/tags/core"},{"label":"alloc","permalink":"/blog/tags/alloc"}],"readingTime":5.365,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","tags":["coq-of-rust","Rust","Coq","translation","core","alloc"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Translation of Python code to Coq","permalink":"/blog/2024/05/10/translation-of-python-code"},"nextItem":{"title":"\ud83e\udd80 Monadic notation for the Rust translation","permalink":"/blog/2024/04/03/monadic-notation-for-rust-translation"}},"content":"We continue our work on formal verification of [Rust](https://www.rust-lang.org/) programs with our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust), to translate Rust code to the formal proof system [Coq](https://coq.inria.fr/). One of the limitation we had was the handling of primitive constructs from the standard library of Rust, like [Option::unwrap_or_default](https://doc.rust-lang.org/core/option/enum.Option.html#method.unwrap_or_default) or all other primitive functions. For each of these functions, we had to make a Coq definition to represent its behavior. This is both tedious and error prone.\\n\\nTo solve this issue, we worked on the translation of the [core](https://doc.rust-lang.org/core/) and [alloc](https://doc.rust-lang.org/alloc/) crates of Rust using `coq-of-rust`. These are very large code bases, with a lot of unsafe or advanced Rust code. We present what we did to have a \\"best effort\\" translation of these crates. The resulting translation is in the following folders:\\n\\n- [CoqOfRust/alloc](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/alloc)\\n- [CoqOfRust/core](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/core)\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat \u260e\ufe0f!\\n\\n:::\\n\\n
\\n ![Crab with a pen](2024-04-26/crab-in-library.webp)\\n
A crab in a library
\\n
\\n\\n## Initial run \ud83d\udc25\\n\\nAn initial run of `coq-of-rust` on the `alloc` and `core` crates of Rust generated us two files of a few hundred thousands lines of Coq corresponding to the whole translation of these crates. This is a first good news, as it means the tool runs of these large code bases. However the generated Coq code does not compile, even if the errors are very rare (one every few thousands lines).\\n\\nTo get an idea, here is the size of the input Rust code as given by the `cloc` command:\\n\\n- `alloc`: 26,299 lines of Rust code\\n- `core`: 54,192 lines of Rust code\\n\\nGiven that this code uses macros that we expand in our translation, the actual size that we have to translate is even bigger.\\n\\n## Splitting the generated code \ud83e\ude93\\n\\nThe main change we made was to split the output generated by `coq-of-rust` with one file for each input Rust file. This is possible because our translation is insensitive to the order of definitions and context-free. So, even if there are typically cyclic dependencies between the files in Rust, something that is forbidden in Coq, we can still split them.\\n\\nWe get the following sizes as output:\\n\\n- `alloc`: 54 Coq files, 171,783 lines of Coq code\\n- `core`: 190 Coq files, 592,065 lines of Coq code\\n\\nThe advantages of having the code split are:\\n\\n- it is easier to read and navigate in the generated code\\n- it is easier to compile as we can parallelize the compilation\\n- it is easier to debug as we can focus on one file at a time\\n- it is easier to ignore files that do not compile\\n- it will be easier to maintain, as it is easier to follow the diff of a single file\\n\\n## Fixing some bugs \ud83d\udc1e\\n\\nWe had some bugs related to the collisions between module names. These can occur when we choose a name for the module for an `impl` block. We fixed these by adding more information in the module names to make them more unique, like the `where` clauses that were missing. For example, for the implementation of the `Default` trait for the `Mapping` type:\\n\\n```rust\\n#[derive(Default)]\\nstruct Mapping {\\n // ...\\n}\\n```\\n\\nwe were generating the following Coq code:\\n\\n```coq\\nModule Impl_core_default_Default_for_dns_Mapping_K_V.\\n (* ...trait implementation ... *)\\nEnd Impl_core_default_Default_for_dns_Mapping_K_V.\\n```\\n\\nWe now generate:\\n\\n```coq\\nModule Impl_core_default_Default_where_core_default_Default_K_where_core_default_Default_V_for_dns_Mapping_K_V.\\n (* ... *)\\n```\\n\\nwith a module name that includes the `where` clauses of the `impl` block, stating that both `K` and `V` should implement the `Default` trait.\\n\\nHere is the list of files that do not compile in Coq, as of today:\\n\\n- `alloc/boxed.v`\\n- `core/any.v`\\n- `core/array/mod.v`\\n- `core/cmp/bytewise.v`\\n- `core/error.v`\\n- `core/escape.v`\\n- `core/iter/adapters/flatten.v`\\n- `core/net/ip_addr.v`\\n\\nThis represents 4% of the files. Note that in the files that compile there are some unhandled Rust constructs that are axiomatized, so this does not give the whole picture of what we do not support.\\n\\n## Example \ud83d\udd0e\\n\\nHere is the source code of the `unwrap_or_default` method for the `Option` type:\\n\\n```rust\\npub fn unwrap_or_default(self) -> T\\nwhere\\n T: Default,\\n{\\n match self {\\n Some(x) => x,\\n None => T::default(),\\n }\\n}\\n```\\n\\nWe translate it to:\\n\\n```coq\\nDefinition unwrap_or_default (T : Ty.t) (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n let Self : Ty.t := Self T in\\n match \u03c4, \u03b1 with\\n | [], [ self ] =>\\n ltac:(M.monadic\\n (let self := M.alloc (| self |) in\\n M.read (|\\n M.match_operator (|\\n self,\\n [\\n fun \u03b3 =>\\n ltac:(M.monadic\\n (let \u03b30_0 :=\\n M.get_struct_tuple_field_or_break_match (|\\n \u03b3,\\n \\"core::option::Option::Some\\",\\n 0\\n |) in\\n let x := M.copy (| \u03b30_0 |) in\\n x));\\n fun \u03b3 =>\\n ltac:(M.monadic\\n (M.alloc (|\\n M.call_closure (|\\n M.get_trait_method (| \\"core::default::Default\\", T, [], \\"default\\", [] |),\\n []\\n |)\\n |)))\\n ]\\n |)\\n |)))\\n | _, _ => M.impossible\\n end.\\n```\\n\\nWe prove that it is equivalent to the simpler functional code:\\n\\n```coq\\nDefinition unwrap_or_default {T : Set}\\n {_ : core.simulations.default.Default.Trait T}\\n (self : Self T) :\\n T :=\\n match self with\\n | None => core.simulations.default.Default.default (Self := T)\\n | Some x => x\\n end.\\n```\\n\\nThis simpler definition is what we use when verifying code. The proof of equivalence is in [CoqOfRust/core/proofs/option.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/core/proofs/option.v). In case the original source code changes, we are sure to capture these changes thanks to our proof. Because the translation of the `core` library was done automatically, we trust the generated definitions more than definitions that would be done by hand. However, there can still be mistakes or incompleteness in `coq-of-rust`, so we still need to check at proof time that the code makes sense.\\n\\n## Conclusion\\n\\nWe can now work on the verification of Rust programs with more trust in our formalization of the standard library. Our next target is to simplify our proof process, which is still tedious. In particular, showing that simulations are equivalent to the original Rust code requires doing the name resolution, introduction of high-level types, and removal of the side-effects. We would like to split these steps.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) \ud83d\udc8c! Formal verification provides the highest level of safety for critical applications, with a mathematical guarantee of the absence of bugs for a given specification."},{"id":"/2024/04/03/monadic-notation-for-rust-translation","metadata":{"permalink":"/blog/2024/04/03/monadic-notation-for-rust-translation","source":"@site/blog/2024-04-03-monadic-notation-for-rust-translation.md","title":"\ud83e\udd80 Monadic notation for the Rust translation","description":"At Formal Land our mission is to reduce the cost of finding bugs in software. We use formal verification, that is to say mathematical reasoning on code, to make sure we find more bugs than with testing. As part of this effort, we are working on a tool coq-of-rust to translate Rust code to Coq, a proof assistant, to analyze Rust programs. Here we present a technical improvement we made in this tool.","date":"2024-04-03T00:00:00.000Z","formattedDate":"April 3, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"monad","permalink":"/blog/tags/monad"}],"readingTime":5.2,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Monadic notation for the Rust translation","tags":["coq-of-rust","Rust","Coq","translation","monad"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","permalink":"/blog/2024/04/26/translation-core-alloc-crates"},"nextItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","permalink":"/blog/2024/03/22/improvements-rust-translation-part-3"}},"content":"At Formal Land our mission is to reduce the cost of finding bugs in software. We use [formal verification](https://runtimeverification.com/blog/formal-verification-lore), that is to say mathematical reasoning on code, to make sure we find more bugs than with testing. As part of this effort, we are working on a tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) to translate Rust code to Coq, a proof assistant, to analyze Rust programs. Here we present a technical improvement we made in this tool.\\n\\nOne of the challenges of our translation from Rust to Coq is that the generated code is very verbose. The size increase is about ten folds in our examples. A reasons is that we use a monad to represent side effects in Coq, so we need to name each intermediate result and apply the `bind` operator. Here, we will present a monadic notation that prevents naming intermediate results to make the code more readable.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat \u260e\ufe0f!\\n\\n:::\\n\\n
\\n ![Crab with a pen](2024-04-03/crab-writing.webp)\\n
A crab writing
\\n
\\n\\n## Example \ud83d\udd0e\\n\\nHere is the Rust source code that we consider:\\n\\n```rust\\nfn add(a: i32, b: i32) -> i32 {\\n a + b\\n}\\n```\\n\\nBefore, we were generating the following Coq code, with `let*` as the notation for the bind:\\n\\n```coq\\nDefinition add (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \u03c4, \u03b1 with\\n | [], [ a; b ] =>\\n let* a := M.alloc a in\\n let* b := M.alloc b in\\n let* \u03b10 := M.read a in\\n let* \u03b11 := M.read b in\\n BinOp.Panic.add \u03b10 \u03b11\\n | _, _ => M.impossible\\n end.\\n```\\n\\nNow, with the new monadic notation, we generate:\\n\\n```coq\\nDefinition add (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \u03c4, \u03b1 with\\n | [], [ a; b ] =>\\n ltac:(M.monadic\\n (let a := M.alloc (| a |) in\\n let b := M.alloc (| b |) in\\n BinOp.Panic.add (| M.read (| a |), M.read (| b |) |)))\\n | _, _ => M.impossible\\n end.\\n```\\n\\nThe main change is that we do not need to introduce intermediate `let*` expressions with generated names. The code structure is more similar to the original Rust code, with additional calls to memory primitives such as `M.alloc` and `M.read`.\\n\\nThe notation `f (| x1, ..., xn |)` represents the call to the function `f` with the arguments `x1`, ..., `xn` returning a monadic result. We bind the result with the current continuation that goes up to the wrapping `ltac:(M.monadic ...)` tactic. We automatically transform the `let` into a `let*` with the `M.monadic` tactic when needed.\\n\\n## Where do we use this notation? \ud83e\udd14\\n\\nWe use this notation in all the function bodies that we generate, that are all in a monad to represent side effects. We call the `ltac:(M.monadic ...)` tactic at the start of the functions, as well as at the start of closure bodies that are defined inside functions. This also applies to the translation of `if`, `match`, and `loop` expressions, as we represent their bodies as functions.\\n\\nHere is an example of code with a `match` expression:\\n\\n```rust\\nfn add(a: i32, b: i32) -> i32 {\\n match a - b {\\n 0 => a + b,\\n _ => a - b,\\n }\\n}\\n```\\n\\nWe translate it to:\\n\\n```coq\\nDefinition add (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \u03c4, \u03b1 with\\n | [], [ a; b ] =>\\n ltac:(M.monadic\\n (let a := M.alloc (| a |) in\\n let b := M.alloc (| b |) in\\n M.read (|\\n M.match_operator (|\\n M.alloc (| BinOp.Panic.sub (| M.read (| a |), M.read (| b |) |) |),\\n [\\n fun \u03b3 =>\\n ltac:(M.monadic\\n (let _ :=\\n M.is_constant_or_break_match (|\\n M.read (| \u03b3 |),\\n Value.Integer Integer.I32 0\\n |) in\\n M.alloc (|\\n BinOp.Panic.add (| M.read (| a |), M.read (| b |) |)\\n |)));\\n fun \u03b3 =>\\n ltac:(M.monadic (\\n M.alloc (|\\n BinOp.Panic.sub (| M.read (| a |), M.read (| b |) |)\\n |)\\n ))\\n ]\\n |)\\n |)))\\n | _, _ => M.impossible\\n end.\\n```\\n\\nWe see that we call the tactic `M.monadic` for each branch of the `match` expression.\\n\\n## How does it work? \ud83d\udee0\ufe0f\\n\\nThe `M.monadic` tactic is defined in [M.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/M.v). The main part is:\\n\\n```coq showLineNumbers\\nLtac monadic e :=\\n lazymatch e with\\n (* ... *)\\n | context ctxt [M.run ?x] =>\\n lazymatch context ctxt [M.run x] with\\n | M.run x => monadic x\\n | _ =>\\n refine (M.bind _ _);\\n [ monadic x\\n | let v := fresh \\"v\\" in\\n intro v;\\n let y := context ctxt [v] in\\n monadic y\\n ]\\n end\\n (* ... *)\\n end.\\n```\\n\\nIn our translation of Rust, all of the values have the common type `Value.t`. The monadic bind is of type `M -> (Value.t -> M) -> M` where `M` is the type of the monad. The `M.run` function is an axiom that we use as a marker to know where we need to apply `M.bind`. The type of `M.run` is:\\n\\n```coq\\nAxiom run : M -> Value.t.\\n```\\n\\nThe notation for monadic function calls is defined using the `M.run` axiom with:\\n\\n```coq\\nNotation \\"e (| e1 , .. , en |)\\" := (M.run ((.. (e e1) ..) en)).\\n```\\n\\nWhen we encounter a `M.run` (line 4) we apply the `M.bind` (line 8) to the monadic expression `x` (line 9) and its continuation `ctx` that we obtain thanks to the `context` keyword (line 4) of the matching of expressions in Ltac.\\n\\nThere is another case in the `M.monadic` tactic to handle the `let` expressions, that is not shown here.\\n\\n## Conclusion\\n\\nThanks to this new monadic notation, the generated Coq code is more readable and closer to the original Rust code. This should simplify our work in writing proofs on the generated code, as well as debugging the translation.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) \ud83d\udc8c! Formal verification provides the highest level of safety for critical applications, with a mathematical guarantee of the absence of bugs for a given specification."},{"id":"/2024/03/22/improvements-rust-translation-part-3","metadata":{"permalink":"/blog/2024/03/22/improvements-rust-translation-part-3","source":"@site/blog/2024-03-22-improvements-rust-translation-part-3.md","title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","description":"We explained how we started updating our translation tool coq-of-rust in our previous blog post, to support more of the Rust language. Our goal is to provide formal verification for the Rust \ud83e\udd80 language, relying on the proof system Coq \ud83d\udc13. We will see in this post how we continue implementing changes in coq-of-rust to:","date":"2024-03-22T00:00:00.000Z","formattedDate":"March 22, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":10.105,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","tags":["coq-of-rust","Rust","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Monadic notation for the Rust translation","permalink":"/blog/2024/04/03/monadic-notation-for-rust-translation"},"nextItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","permalink":"/blog/2024/03/08/improvements-rust-translation-part-2"}},"content":"We explained how we started updating our translation tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) in our [previous blog post](/blog/2024/03/08/improvements-rust-translation-part-2), to support more of the Rust language. Our goal is to provide formal verification for the Rust \ud83e\udd80 language, relying on the proof system Coq \ud83d\udc13. We will see in this post how we continue implementing changes in `coq-of-rust` to:\\n\\n1. remove the types from the translation,\\n2. be independent of the ordering of the definitions.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::info\\n\\n- Previous post: [Improvements in the Rust translation to Coq, part 2](/blog/2024/03/08/improvements-rust-translation-part-2)\\n\\n:::\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat \u260e\ufe0f!\\n\\n:::\\n\\n## Translating the `dns` example \ud83d\ude80\\n\\nWe continue with our previous example [dns.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/dns.rs), which is composed of around 200 lines of Rust code.\\n\\n### Borrow and dereference\\n\\nThe next error that we encounter when type-checking the Coq translation of `dns.rs` is:\\n\\n```\\nFile \\"./examples/default/examples/ink_contracts/dns.v\\", line 233, characters 22-27:\\nError: The reference deref was not found in the current environment.\\n```\\n\\nIn Rust, we can either take the address of a value with `&`, or dereference a reference with `*`. In our translation, we do not distinguish between the four following pointer types:\\n\\n- `&`\\n- `&mut`\\n- `*const`\\n- `*mut`\\n\\nWe let the user handle these in different ways if it can simplify their proofs, especially regarding the distinction between mutable and non-mutable pointers. It simplifies the definition of our borrowing and dereferencing operators, as we need only two to cover all cases. We even go further: we remove these two operators in the translation, as they are the identity in our case!\\n\\nTo better understand why they are the identity, we need to see that there are two kinds of Rust values in our representation:\\n\\n- the value itself and\\n- the value with its address.\\n\\nThe value itself is useful to compute over the values. For example, we use it to define the primitive addition over integers. The value with its address corresponds to the final Rust expression. Indeed, we can take the address of any sub-expression in Rust with the `&` operator, so each sub-expression should come with its address. When we take the address of an expression, we:\\n\\n- start from a value with its address and go to\\n- a value that is an address to the value above, which we will need to allocate to have an address for it also.\\n\\nThus, the `&` operator behaves as the identity function followed by an allocation. Similarly, the `*` is a memory read followed by the identity function. Since we already use the alloc and read operations to go from a value to a value with its address and the other way around, we do not need to define the `*` and `&` operators in our translation and remove them.\\n\\n### Primitive operators\\n\\nWe now need to distinguish between the function calls, that use the primitive:\\n\\n```coq\\nM.get_function : string -> M\\n```\\n\\nto find the right function to call when defining the semantics of the program (even if the function is defined later), and the calls to primitive operators (`+`, `*`, `!`, ...) that we define in our base library for Rust in Coq. The full list of primitive operators is given by:\\n\\n- [rustc_middle::mir::syntax::BinOp](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/mir/syntax/enum.BinOp.html)\\n- [rustc_middle::thir::LogicalOp](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/thir/enum.LogicalOp.html) (with lazy evaluation of the parameters)\\n- [rustc_middle::mir::syntax::UnOp](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/mir/syntax/enum.UnOp.html)\\n\\nWe adapted the handling of primitive operators from the code we had before and added a few other fixes so that now the `dns.rs` example type-checks in Coq \ud83c\udf8a! We will now focus on fixing the other examples.\\n\\n## Cleaning the code \ud83e\uddfc\\n\\nBut let us first clean the code a bit. All the expressions in the internal [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of `coq-of-rust` are in a wrapper with the current type of the expression:\\n\\n```rust\\npub(crate) struct Expr {\\n pub(crate) kind: Rc,\\n pub(crate) ty: Option>,\\n}\\n\\npub(crate) enum ExprKind {\\n Pure(Rc),\\n LocalVar(String),\\n Var(Path),\\n Constructor(Path),\\n // ... all the cases\\n```\\n\\nHaving access to the type of each sub-expression was useful before annotating the `let` expressions. This is not required anymore, as all the values have the type `Value.t`. Thus, we remove the wrapper `Expr` and rename `ExprKind` into `Expr`. The resulting code is easier to read, as wrapping everything with a type was verbose sometimes.\\n\\nWe also cleaned some translated types that were not used anymore in the code, removed unused `Derive` traits, and removed the monadic translation on the types.\\n\\n
\\n ![Crab in space](2024-03-22/crab-in-space.webp)\\n
A crab safely walking in space thanks to formal verification.
\\n
\\n\\n## Handling the remaining examples\\n\\nTo handle the remaining examples of our test suite (extracted from the snippets of the [Rust by Example](https://doc.rust-lang.org/rust-by-example/) book), we mainly needed to re-implement the pattern matching on the new untyped values. Here is an example of Rust code with matching:\\n\\n```rust\\nfn matching(tuple: (i32, i32)) -> i32 {\\n match tuple {\\n (0, 0) => 0,\\n (_, _) => 1,\\n }\\n}\\n```\\n\\nwith its translation in Coq:\\n\\n```coq showLineNumbers\\nDefinition matching (\ud835\udf0f : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \ud835\udf0f, \u03b1 with\\n | [], [ tuple ] =>\\n let* tuple := M.alloc tuple in\\n let* \u03b10 :=\\n match_operator\\n tuple\\n [\\n fun \u03b3 =>\\n let* \u03b30_0 := M.get_tuple_field \u03b3 0 in\\n let* \u03b30_1 := M.get_tuple_field \u03b3 1 in\\n let* _ :=\\n let* \u03b10 := M.read \u03b30_0 in\\n M.is_constant_or_break_match \u03b10 (Value.Integer Integer.I32 0) in\\n let* _ :=\\n let* \u03b10 := M.read \u03b30_1 in\\n M.is_constant_or_break_match \u03b10 (Value.Integer Integer.I32 0) in\\n M.alloc (Value.Integer Integer.I32 0);\\n fun \u03b3 =>\\n let* \u03b30_0 := M.get_tuple_field \u03b3 0 in\\n let* \u03b30_1 := M.get_tuple_field \u03b3 1 in\\n M.alloc (Value.Integer Integer.I32 1)\\n ] in\\n M.read \u03b10\\n | _, _ => M.impossible\\n end.\\n```\\n\\nHere is a breakdown of how it works:\\n\\n- On line 6 we call the `match_operator` primitive that takes a value to match on, `tuple`, and a list of functions that try to match the value with a pattern and execute some code in case of success. We execute the matching functions successively until one succeeds and we stop. There should be at least one succeeding function as pattern-match in Rust is exhaustive.\\n- On line 10 we get the first element of the tuple. Note that, more precisely, what we get is the address of the first element of `\u03b3` that is the address of the tuple `tuple` given as parameter to the function. Having the address might be required for some operations, like doing subsequent matching by reference or using the `&` operator in the `match`\'s body.\\n- On line 11 we do the same with the second element of the tuple. The indices for `\u03b3` are generated to avoid name clashes. They correspond to the depth of the sub-pattern being considered, followed by the index of the current item in this sub-pattern.\\n- On line 14, we check that the first element of the tuple is `0`. We use the `M.is_constant_or_break_match` primitive that checks if the value is a constant and if it is equal to the expected value. If it is not the case, it exits the current matching function, and the `match_operator` primitive will evaluate the next one, going to line 19.\\n- On line 24 we return the final result. Note that we always do a `M.alloc` followed by `M.read` to return the result. This could be simplified, as immediately reading an allocated value is like running the identity function.\\n\\nBy implementing the new version of the pattern-matching, as well as a few other smaller fixes, we were able to make all the examples type-check again! We now need to fix the proofs we had on the [erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/erc20.v) example, as the generated code changed a lot.\\n\\n## Updating the proofs \ud83d\udc69\u200d\ud83d\ude80\\n\\nUnfortunately, all these changes in the generated code are breaking our proofs. We still want to write our specifications and proofs by first showing a simulation of the Rust code with a simpler and functional definition. Before, with our simulations, we were:\\n\\n- replacing the management of pointers by either stateless functions or functions in a state monad;\\n- simplifying the error handling, especially for code that cannot panic.\\n\\nNow we also have to:\\n\\n- define the types;\\n- add the typing information;\\n- add the trait constraints and resolve the trait instances;\\n- resolve the function or associated function calls.\\n\\nWe have not finished updating the proofs but still merged our work in `main` with the pull request [#472](https://github.com/formal-land/coq-of-rust/pull/472) as this was taking too long. The proof that we want to update is in the file [proofs/erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/proofs/erc20.v) and is about the smart contract [erc20.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/erc20.rs).\\n\\n### Phi operators \ud83c\udfa0\\n\\nOur basic strategy for the proof, in order to handle the untyped Rust values of the new translation, is to define various `\u03c6` operators coming from a user-defined Coq type to a Rust value of type `Value.t`. These translate the data types that we define to represent the Rust types of the original program. Note that we previously had trouble translating the Rust types in the general case, especially for mutually recursive types or types involving a lot of trait manipulations.\\n\\nMore formally, we introduce the Coq typeclass:\\n\\n```coq\\nClass ToValue (A : Set) : Set := {\\n \u03a6 : Ty.t;\\n \u03c6 : A -> Value.t;\\n}.\\nArguments \u03a6 _ {_}.\\n```\\n\\nThis describes how to go from a user-defined type in Coq to the equivalent representation in `Value.t`. In addition to the `\u03c6` operator, we also define the `\u03a6` operator that gives the Rust type of the Coq type. This type is required to give for polymorphic definitions.\\n\\nWe always go from user-defined types to `Value.t`. We write our simulation statements like this:\\n\\n```coq\\n{{env, state |\\n code.example.get_at_index [] [\u03c6 vector; \u03c6 index] \u21d3\\n inl (\u03c6 (simulations.example.get_at_index vector index))\\n| state\'}}\\n```\\n\\nwhere:\\n\\n```coq\\n{{env, state | rust_program \u21d3 simulation_result | state\'}}\\n```\\n\\nis our predicate to state an evaluation of a Rust program to a simulation result. We apply the `\u03c6` operator to the arguments of the Rust program and to the result of the simulation. In some proofs, we set this operator as `Opaque` in order to keep track of it and avoid unwanted reductions.\\n\\n### Traits\\n\\nThe trait definitions, as well as trait constraints, are absent from the generated Coq code. For now, we add them back as follows, for the example of the `Default` trait:\\n\\n1. We define a `Default` typeclass in Coq:\\n\\n ```coq\\n Module Default.\\n Class Trait (Self : Set) : Set := {\\n default : Self;\\n }.\\n End Default.\\n ```\\n\\n2. We define what it means to implement the `Default` trait and have a corresponding simulation:\\n\\n ```coq\\n Module Default.\\n Record TraitHasRun (Self : Set)\\n `{ToValue Self}\\n `{core.simulations.default.Default.Trait Self} :\\n Prop := {\\n default :\\n exists default,\\n IsTraitMethod\\n \\"core::default::Default\\" (\u03a6 Self) []\\n \\"default\\" default /\\\\\\n Run.pure\\n (default [] [])\\n (inl (\u03c6 core.simulations.default.Default.default));\\n }.\\n End Default.\\n ```\\n\\n where `Run.pure` is our simulation predicate for the case where the `state` does not change.\\n\\n3. Finally, we use the `TraitHasRun` predicate as an additional hypothesis for simulation proofs on functions that depend on the `Default` trait in Rust:\\n\\n ```coq\\n (** Simulation proof for `unwrap_or_default` on the type `Option`. *)\\n Lemma run_unwrap_or_default {T : Set}\\n {_ : ToValue T}\\n {_ : core.simulations.default.Default.Trait T}\\n (self : option T) :\\n core.proofs.default.Default.TraitHasRun T ->\\n Run.pure\\n (core.option.Impl_Option_T.unwrap_or_default (\u03a6 T) [] [\u03c6 self])\\n (inl (\u03c6 (core.simulations.option.Impl_Option_T.unwrap_or_default self))).\\n Proof.\\n (* ... *)\\n Qed.\\n ```\\n\\n## Conclusion \u270d\ufe0f\\n\\nWe still have a lot to do, especially in finding the right approach to verify the newly generated Rust code. But we have finalized our new translation mode without types and ordering, which helps to successfully translate many more Rust examples. We also do not need to translate the dependencies of a project anymore before compiling it.\\n\\nOur next target is to translate the whole of Rust\'s standard library (with the help of some axioms for the expressions which we do not handle yet), in order to have a faithful definition of the Rust primitives, such as functions of the [option](https://doc.rust-lang.org/core/option/) and [vec](https://doc.rust-lang.org/alloc/vec/) modules.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) \ud83d\udc8c! Formal verification provides the highest level of safety for critical applications, with a mathematical guarantee of the absence of bugs for a given specification."},{"id":"/2024/03/08/improvements-rust-translation-part-2","metadata":{"permalink":"/blog/2024/03/08/improvements-rust-translation-part-2","source":"@site/blog/2024-03-08-improvements-rust-translation-part-2.md","title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","description":"In our previous blog post, we stated our plan to improve our translation of Rust \ud83e\udd80 to Coq \ud83d\udc13 with coq-of-rust. We also provided a new definition for our Rust monad in Coq, and the definition of a unified type to represent any Rust values. We will now see how we modify the Rust implementation of coq-of-rust to make the generated code use these new definitions.","date":"2024-03-08T00:00:00.000Z","formattedDate":"March 8, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":9.055,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","tags":["coq-of-rust","Rust","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","permalink":"/blog/2024/03/22/improvements-rust-translation-part-3"},"nextItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","permalink":"/blog/2024/02/29/improvements-rust-translation"}},"content":"In our [previous blog post](/blog/2024/02/29/improvements-rust-translation), we stated our plan to improve our translation of Rust \ud83e\udd80 to Coq \ud83d\udc13 with [coq-of-rust](https://github.com/formal-land/coq-of-rust). We also provided a new definition for our Rust monad in Coq, and the definition of a unified type to represent any Rust values. We will now see how we modify the Rust implementation of `coq-of-rust` to make the generated code use these new definitions.\\n\\nWith this new translation strategy, to support more Rust code, we want:\\n\\n1. to remove the types from the translation,\\n2. to avoid the need to order the definitions in the generated Coq code.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::info\\n\\n- Next post: [Improvements in the Rust translation to Coq, part 3](/blog/2024/03/22/improvements-rust-translation-part-3)\\n- Previous post: [Improvements in the Rust translation to Coq, part 1](/blog/2024/02/29/improvements-rust-translation)\\n\\n:::\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat!\\n\\n:::\\n\\n## Implementation of the monad\\n\\nWe implemented the new monad and the type `Value.t` holding any kind of Rust values as described in the previous blog post. For now, we have removed the definitions related to the standard library of Rust (everything except the base definitions such as the integer types). This should not be an issue to type-check the generated Coq code, as the new code should be independent of the ordering of definitions: in particular, it should type-check even if the needed definitions are not yet there.\\n\\nWe added some definitions for the primitive unary and binary operators. These include some operations on the integers such arithmetic operations (with or without overflow, depending on the compilation mode), as well as comparisons (equality, lesser or equal than, ...).\\n\\nNow that the main library file [CoqOfRust/CoqOfRust.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/CoqOfRust.v) compiles in Coq, we can start to test the translation on our examples.\\n\\n## Generating the tests\\n\\nWe generate new snapshots for our translations with:\\n\\n```sh\\ncargo build && time python run_tests.py\\n```\\n\\nThis builds the project `coq-of-rust` (with a lot of warning about unused code for now) and re-generates our snapshots: for each Rust file in the [examples](https://github.com/formal-land/coq-of-rust/tree/main/examples) directory, we generate a Coq file with the same name but the extension `.v`. We generate two versions:\\n\\n- one in axiom mode, where all definitions are axiomatized, to translate libraries, for example, and\\n- one in full definition mode, where we also translate the bodies of the function definitions.\\n\\n## Axiom mode\\n\\nWe first try to type-check and fix the code generated in axiom mode.\\n\\n### Type aliases\\n\\nWe have a first error for type aliases that we do not translate properly. We need access to the fully qualified name of the alias. We do that by combining calls to the functions:\\n\\n- [crate_name](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.crate_name) to get the name of the current crate and\\n- [def_path](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.def_path) to get the whole definition path without the crate name.\\n\\nAs a result, for the file [examples/ink_contracts/basic_contract_caller.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/basic_contract_caller.rs), we translate the type alias:\\n\\n```rust\\ntype Hash = [u8; 32];\\n```\\n\\ninto the Coq code:\\n\\n```coq\\nAxiom Hash :\\n (Ty.path \\"basic_contract_caller::Hash\\") =\\n (Ty.apply (Ty.path \\"array\\") [Ty.path \\"u8\\"]).\\n```\\n\\nThen, during the proofs, we will be able to substitute the type `Hash` by its definition when it appears. Note that we now translate types by values of the type `Ty.t`, so there should be no difficulties in rewriting types.\\n\\nWe should add the length of the array in the type. This is not done yet.\\n\\n### Traits\\n\\nIn axiom mode, we remove most of the trait definitions. Instead, with our new translation model, the traits are mostly unique names (the absolute path of the trait definition). The main use of traits is to distinguish them from other traits, to know which trait implementation to use when calling a trait\'s method. We still translate the provided methods (that are default methods in the trait definition) to axioms and add a predicate stating that they are associated with the current trait. For example, we translate the following Rust trait:\\n\\n```rust\\n// crate `my_crate`\\n\\ntrait Animal {\\n fn new(name: &\'static str) -> Self;\\n\\n fn name(&self) -> &\'static str;\\n fn noise(&self) -> &\'static str;\\n\\n fn talk(&self) {\\n println!(\\"{} says {}\\", self.name(), self.noise());\\n }\\n}\\n```\\n\\nto the Coq code:\\n\\n```coq\\n(* Trait *)\\nModule Animal.\\n Parameter talk : (list Ty.t) -> (list Value.t) -> M.\\n\\n Axiom ProvidedMethod_talk : M.IsProvidedMethod \\"my_crate::Animal\\" talk.\\nEnd Animal.\\n```\\n\\nWe realize with this example that the translation in axiom mode generates very few errors, as we remove all the type definitions and all the function axioms have the same signature:\\n\\n```coq\\n(* A list of types that can be empty for non-polymorphic functions,\\n a list of parameters, and a return value in the monad `M`. *)\\nlist Ty.t -> list Value.t -> M\\n```\\n\\nso the type-checking of these axioms never fails. We thus jump to the full definition mode as this is where our new approach might fail.\\n\\n## Definition mode\\n\\nWe now try to type-check the generated Coq code in full definition mode. We start with the [dns.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/dns.rs) smart contract example.\\n\\n### Polymorphic trait implementation\\n\\nThis example is interesting, as it contains polymorphic implementations, such as for the [mock](https://en.wikipedia.org/wiki/Mock_object) type `Mapping`:\\n\\n```rust\\n#[derive(Default)]\\nstruct Mapping {\\n _key: core::marker::PhantomData,\\n _value: core::marker::PhantomData,\\n}\\n```\\n\\nthat implements the [Default](https://doc.rust-lang.org/core/default/trait.Default.html) trait on the type `Mapping` for two type parameters `K` and `V`. We translate it to:\\n\\n```coq showLineNumbers\\n(* Struct Mapping *)\\n\\nModule Impl_core_default_Default_for_dns_Mapping_K_V.\\n (*\\n Default\\n *)\\n Definition default (\ud835\udf0f : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \ud835\udf0f, \u03b1 with\\n | [ Self; K; V ], [] =>\\n let* \u03b10 :=\\n M.get_method\\n \\"core::default::Default\\"\\n \\"default\\"\\n [ (* Self *) Ty.apply (Ty.path \\"core::marker::PhantomData\\") [ K ] ] in\\n let* \u03b11 := M.call \u03b10 [] in\\n let* \u03b12 :=\\n M.get_method\\n \\"core::default::Default\\"\\n \\"default\\"\\n [ (* Self *) Ty.apply (Ty.path \\"core::marker::PhantomData\\") [ V ] ] in\\n let* \u03b13 := M.call \u03b12 [] in\\n M.pure\\n (Value.StructRecord \\"dns::Mapping\\" [ (\\"_key\\", \u03b11); (\\"_value\\", \u03b13) ])\\n | _, _ => M.impossible\\n end.\\n\\n Axiom Implements :\\n forall (K V : Ty.t),\\n M.IsTraitInstance\\n \\"core::default::Default\\"\\n (* Self *) (Ty.apply (Ty.path \\"dns::Mapping\\") [ K; V ])\\n []\\n [ (\\"default\\", InstanceField.Method default) ]\\n [ K; V ].\\nEnd Impl_core_default_Default_for_dns_Mapping_K_V.\\n```\\n\\nHere are the interesting bits of this code:\\n\\n- On line 1, we translate the `Mapping` type into a single comment, as the types disappear in our translation and become just markers. The marker for `Mapping` is its absolute name `Ty.path \\"dns::Mapping\\"`.\\n- On line 7, the function `default` takes a list of types `\ud835\udf0f` as a parameter in case it is polymorphic. Here, this method is not polymorphic, but we still add the `\ud835\udf0f` parameter for uniformity. We also take three additional type parameters:\\n\\n - `Self`\\n - `K`\\n - `V`\\n\\n that represent the `Self` type on which the trait is implemented, and the two type parameters of the `Mapping` type. These will be provided when calling the `default` method.\\n\\n- On line 11, we use the primitive `M.get_method` (axiomatized for now) to get the method `default` of the trait `core::default::Default` for the type `core::marker::PhantomData`. Here, we see that having access to the type `K` in the body of the `default` function is useful, as it helps us to disambiguate between the various implementations of the `Default` trait instances that we call. Here, we provide the `Self` type of the trait in a list of a single element. If the `Default` trait or the `default` method were polymorphic, we would also append these type parameters in this list.\\n- On line 15, we call the `default` method instance that we found with an empty list of arguments.\\n- On line 23, we build a value of type `Mapping` with the two fields `_key` and `_value` initialized with the results of the two calls to the `default` method. We use the `Value.StructRecord` constructor to build the value, and its result is of type `Value.t` like all other Rust values.\\n- On line 24, we eliminate a case with a wrong number of type and value arguments. This should never happen as the arity of all the function calls is checked by the Rust type-checker.\\n- On line 27, we state that we have a new instance of the `Default` trait for the `Mapping` type, with the `default` method implemented by the `default` function. This is true for any values of the types `K` and `V`.\\n- On line 34, we specify that `[K, V]` are the type parameters of this implementation that should be given as extra parameters when calling the `default` method of this instance, together with the `Self` type.\\n\\n### Polymorphic implementation\\n\\nNext, we have a polymorphic implementation of mock associated functions for the `Mapping` type:\\n\\n```rust\\nimpl Mapping {\\n fn contains(&self, _key: &K) -> bool {\\n unimplemented!()\\n }\\n\\n // ...\\n```\\n\\nWe translate it to:\\n\\n```coq showLineNumbers\\nModule Impl_dns_Mapping_K_V.\\n Definition Self (K V : Ty.t) : Ty.t :=\\n Ty.apply (Ty.path \\"dns::Mapping\\") [ K; V ].\\n\\n (*\\n fn contains(&self, _key: &K) -> bool {\\n unimplemented!()\\n }\\n *)\\n Definition contains (\ud835\udf0f : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \ud835\udf0f, \u03b1 with\\n | [ Self; K; V ], [ self; _key ] =>\\n let* self := M.alloc self in\\n let* _key := M.alloc _key in\\n let* \u03b10 := M.var \\"core::panicking::panic\\" in\\n let* \u03b11 := M.read (mk_str \\"not implemented\\") in\\n let* \u03b12 := M.call \u03b10 [ \u03b11 ] in\\n never_to_any \u03b12\\n | _, _ => M.impossible\\n end.\\n\\n Axiom AssociatedFunction_contains :\\n forall (K V : Ty.t),\\n M.IsAssociatedFunction (Self K V) \\"contains\\" contains [ K; V ].\\n\\n (* ... *)\\n```\\n\\nWe follow a similar approach as for the translation of trait implementations, especially regarding the handling of polymorphic type variables. Here are some differences:\\n\\n- On line 2, we define a `Self` type as a function of the type parameters `K` and `V`. This is useful for avoiding repeating the same type expression later.\\n- On line 22, we use the predicate `M.IsAssociatedFunction` to state that we have a new associated function `contains` for the `Mapping` type, with the `contains` method implemented by the `contains` function. This is true for any values of the types `K` and `V`. Like for the trait implementations, we explicit the list `[K, V]` that will be given as an extra parameter to the function `contains`.\\n\\n## Conclusion\\n\\nIn the next blog post, we will see how we continue to translate the examples in full definition mode. There is still a lot to do to get to the same level of Rust support as before, but we are hopeful that our new approach will be more robust and easier to maintain.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land)! Formal verification provides the highest level of safety for critical applications. See the [White House report on secure software development](https://www.whitehouse.gov/wp-content/uploads/2024/02/Final-ONCD-Technical-Report.pdf) for more on the importance of formal verification."},{"id":"/2024/02/29/improvements-rust-translation","metadata":{"permalink":"/blog/2024/02/29/improvements-rust-translation","source":"@site/blog/2024-02-29-improvements-rust-translation.md","title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","description":"Our tool coq-of-rust is translating Rust \ud83e\udd80 programs to the proof system Coq \ud83d\udc13 to do formal verification on Rust programs. Even if we are able to verify realistic code, such as an ERC-20 smart contract, coq-of-rust still has some limitations:","date":"2024-02-29T00:00:00.000Z","formattedDate":"February 29, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":12.655,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","tags":["coq-of-rust","Rust","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","permalink":"/blog/2024/03/08/improvements-rust-translation-part-2"},"nextItem":{"title":"\ud83e\uddab Translating Go to Coq, part 1","permalink":"/blog/2024/02/22/journey-coq-of-go"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) is translating Rust \ud83e\udd80 programs to the proof system Coq \ud83d\udc13 to do formal verification on Rust programs. Even if we are able to verify realistic code, such as an [ERC-20 smart contract](http://localhost:3000/blog/2023/12/13/rust-verify-erc-20-smart-contract), `coq-of-rust` still has some limitations:\\n\\n- fragile trait handling\\n- difficulties in ordering the definitions, in their order of dependencies as required by Coq\\n\\nWe will present how we plan to improve our tool to address these limitations.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::info\\n\\n- Next post: [Improvements in the Rust translation to Coq, part 2](/blog/2024/03/08/improvements-rust-translation-part-2)\\n\\n:::\\n\\n## Introduction\\n\\nAs emphasized in the [recent report from the White House](https://www.whitehouse.gov/wp-content/uploads/2024/02/Final-ONCD-Technical-Report.pdf), memory safety and formal verification are keys to ensure secure and correct software. Rust provides memory safety and we provide formal verification on top of it with `coq-of-rust`.\\n\\nWe will take the Rust [serde](https://github.com/serde-rs/serde) serialization library to have an example of code to translate in Coq. This is a popular Rust library that is used in almost all projects, either as a direct or transitive dependency. Serialization has a simple specification (being a bijection between the data and its serialized form) and is a good candidate for formal verification. We might verify this library afterwards if there is a need.\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency in order to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase totally bug-free. Contact us at [contact@formal.land](mailto:contact@formal.land) to chat! We offer a free audit to assess the feasibility of formal verification on your case.\\n\\n:::\\n\\n:::note Goal\\n\\nOur company goal is to make formal verification accessible to all projects, reducing its cost to 20% of the development cost. There should be no reason to have bugs in end-user products!\\n\\n:::\\n\\n## Warnings\\n\\nWe start by running the command:\\n\\n```sh\\ncargo coq-of-rust\\n```\\n\\nin the `serde` directory. We get a lot of warnings, but the translation does not panic as it tries to always produce something for debugging purposes. We have two kinds of warnings.\\n\\n### Constants in patterns\\n\\nThe warning is the following:\\n\\n```\\nwarning: Constants in patterns are not yet supported.\\n --\x3e serde/src/de/mod.rs:2277:13\\n |\\n2277 | 0 => panic!(), // special case elsewhere\\n | ^\\n```\\n\\nThe reason why we did not handle constants in patterns is that they are represented in a special format in the Rust compiler that was not obvious to handle. The definition of [rustc_middle::mir::consts::Const](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/mir/consts/enum.Const.html) representing the constants in patterns is:\\n\\n```rust\\npub enum Const<\'tcx> {\\n Ty(Const<\'tcx>),\\n Unevaluated(UnevaluatedConst<\'tcx>, Ty<\'tcx>),\\n Val(ConstValue<\'tcx>, Ty<\'tcx>),\\n}\\n```\\n\\nThere are three cases, and each contains several more cases. To fix this issue, we added the code to handle the signed and unsigned integers, which are enough for our `serde` example. We will need to add other cases later, especially for the strings. This allowed us to discover and fix a bug in our handling of patterns for tuples with elision `..`, like in the example:\\n\\n```rust\\nfn main() {\\n let triple = (0, -2, 3);\\n\\n match triple {\\n (0, y, z) => println!(\\"First is `0`, `y` is {:?}, and `z` is {:?}\\", y, z),\\n (1, ..) => println!(\\"First is `1` and the rest doesn\'t matter\\"),\\n (.., 2) => println!(\\"last is `2` and the rest doesn\'t matter\\"),\\n (3, .., 4) => println!(\\"First is `3`, last is `4`, and the rest doesn\'t matter\\"),\\n _ => println!(\\"It doesn\'t matter what they are\\"),\\n }\\n}\\n```\\n\\nThese changes are in the pull-request [coq-of-rust#470](https://github.com/formal-land/coq-of-rust/pull/470).\\n\\n### Unimplemented `parent_kind`\\n\\nWe get a second form of warning:\\n\\n```\\nunimplemented parent_kind: Struct\\nexpression: Expr {\\n kind: ZstLiteral {\\n user_ty: None,\\n },\\n ty: FnDef(\\n DefId(2:31137 ~ core[10bc]::cmp::Reverse::{constructor#0}),\\n [\\n T/#1,\\n ],\\n ),\\n temp_lifetime: Some(\\n Node(14),\\n ),\\n span: serde/src/de/impls.rs:778:22: 778:29 (#0),\\n}\\n```\\n\\nThis is for some cases of expressions [rustc_middle::thir::ExprKind::ZstLiteral](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/thir/enum.ExprKind.html#variant.ZstLiteral) in the Rust\'s [THIR representation](https://rustc-dev-guide.rust-lang.org/thir.html) that we do not handle. If we look at the `span` field, we see that it appears in the source in the file `serde/src/de/impls.rs` at line 778:\\n\\n```rust\\nforwarded_impl! {\\n (T), Reverse, Reverse // Here is the error\\n}\\n```\\n\\nThis is not very informative as this code is generated by a macro. Another similar kind of expression appears later:\\n\\n```rust\\nimpl<\'de, T> Deserialize<\'de> for Wrapping\\nwhere\\n T: Deserialize<\'de>,\\n{\\n fn deserialize(deserializer: D) -> Result\\n where\\n D: Deserializer<\'de>,\\n {\\n Deserialize::deserialize(deserializer).map(\\n // Here is the error:\\n Wrapping\\n )\\n }\\n}\\n```\\n\\nThe `Wrapping` term is the constructor of a structure, used as a function. We add the support of this case in the pull-request [coq-of-rust#471](https://github.com/formal-land/coq-of-rust/pull/471).\\n\\n## Coq errors\\n\\nWhen we type-check the generated Coq code, we quickly get an error:\\n\\n```coq\\n(* Generated by coq-of-rust *)\\nRequire Import CoqOfRust.CoqOfRust.\\n\\nModule lib.\\n Module core.\\n\\n End core.\\nEnd lib.\\n\\nModule macros.\\n\\nEnd macros.\\n\\nModule integer128.\\n\\nEnd integer128.\\n\\nModule de.\\n Module value.\\n Module Error.\\n Section Error.\\n Record t : Set := {\\n (* Here is the error: *)\\n err : ltac:(serde.de.value.ErrorImpl);\\n }.\\n\\n (* 180.000 more lines! *)\\n```\\n\\nThe reason is that `serde.de.value.ErrorImpl` is not yet defined here. In Coq, we must order the definitions in the order of dependencies to ensure that there are no non-terminating definitions with infinite recursive calls and to preserve the consistency of the system.\\n\\nThis issue does not seem easy to us, as in a Rust crate, everything can depend on each other:\\n\\n- types\\n- definitions\\n- traits\\n- `impl` blocks\\n\\nOur current solutions are:\\n\\n1. **To reorder the definitions in the source Rust code**, so that they appear in the right order for Coq. This is technically the simplest solution (no changes in `coq-of-rust`), but it is not very practical. Indeed, reordering elements in a big project generates a lot of conflicts in the version control system, especially if we cannot upstream the changes to the original project.\\n2. **To use a configuration file** to specify the order of the definitions. This works in a lot of cases, but we need to write this file manually and have it complete to compile the whole crate in Coq, even if we are interested in verifying a small part of the code. There are also some cases that are hard to entangle, in particular with traits that can depend on both types and definitions, that themselves may depend on traits.\\n\\nIn order to handle large projects, such as `serde`, we need to find a more definitive solution to handle the order of dependencies.\\n\\n## Plan for the order of definitions\\n\\nOur idea is to use a more verbose, but simpler translation, to generate Coq code that is not sensitive to the ordering of Rust. In addition, we should have a more robust mechanism for the traits, as there are still some edge cases that we do not handle well.\\n\\nOur main ingredients are:\\n\\n1. Generating an untyped code, where all Rust values become part of a single and shared `Value` type. With this approach, we can represent mutually recursive Rust types, that are generally hard to translate in a sound manner to Coq. We should also avoid a lot of errors on the Coq side related to type inference.\\n2. Adding an indirection level to all function calls, as any function call might refer to a definition that appears later in the code.\\n\\nThese ingredients have some drawbacks:\\n\\n- By removing the types, we will obtain a code that is less readable. It might contain translation errors that will be harder to spot. We will need to add the types back during the specification of the code.\\n- We will need to add error cases corresponding to type errors at runtime, as we will not have the type system to ensure that functions expecting a certain type of value receive it. We know from the Rust type checker that these errors should not happen, but we will need to prove it in Coq.\\n- We will have to resolve the indirections in the calls at proof time, or with other mechanisms, that will be more complex than the current translation.\\n- We will still need to have a translation of the types (as values), to guide the inference of trait instances.\\n\\n## Definition of a new monad\\n\\nWe rework our definitions of values, pointers and monad to represent the effects, taking into account the fact that we remove the types from the translation. Here are the main definitions that we are planning to use. We have not tested them yet as we need to update the translation to Coq to use them. We will do that just after.\\n\\n### Pointers\\n\\n```coq\\nModule Pointer.\\n Module Index.\\n Inductive t : Set :=\\n | Tuple (index : Z)\\n | Array (index : Z)\\n | StructRecord (constructor field : string)\\n | StructTuple (constructor : string) (index : Z).\\n End Index.\\n\\n Module Path.\\n Definition t : Set := list Index.t.\\n End Path.\\n\\n Inductive t (Value : Set) : Set :=\\n | Immediate (value : Value)\\n | Mutable {Address : Set} (address : Address) (path : Path.t).\\n Arguments Immediate {_}.\\n Arguments Mutable {_ _}.\\nEnd Pointer.\\n```\\n\\nA pointer is either:\\n\\n- a pointer to an immutable data, that is directly represented by its data;\\n- a pointer to a mutable data, that is inside a cell at a certain address in the memory. The exact location in the cell is given by the path.\\n\\nThe type of `Address` is not enforced yet, but we will do it when defining the semantics.\\n\\n### Values\\n\\n```coq\\nModule Value.\\n Inductive t : Set :=\\n | Bool : bool -> t\\n | Integer : Integer.t -> Z -> t\\n (** For now we do not know how to represent floats so we use a string *)\\n | Float : string -> t\\n | UnicodeChar : Z -> t\\n | String : string -> t\\n | Tuple : list t -> t\\n | Array : list t -> t\\n | StructRecord : string -> list (string * t) -> t\\n | StructTuple : string -> list t -> t\\n | Pointer : Pointer.t t -> t\\n (** The two existential types of the closure must be [Value.t] and [M]. We\\n cannot enforce this constraint there yet, but we will do when defining the\\n semantics. *)\\n | Closure : {\'(t, M) : Set * Set @ t -> M} -> t.\\nEnd Value.\\n```\\n\\nHere, this type aims to represent any Rust value. We might add a few cases later to represent the `dyn` values, for example. Most of the cases of this type are as expected:\\n\\n- The constructor `StructRecord` is for constructors of `struct` or `enum` with named fields.\\n- The constructor `StructTuple` is for constructors of `struct` or `enum` with unnamed fields.\\n- The constructor `Pointer` is for pointers to data, that could be either `&`, `&mut`, `*const`, or `*mut`.\\n- The constructor `Closure` is for closures (anonymous functions). To prevent errors with the positivity checker of Coq, we use an existential type for the type `Value.t` (as well as `M`, which will be defined later). Note that we are using impredicative `Set` in Coq, and `{A : Set @ P A}` is our notation for existential `Set` in `Set`. Without impredicative sets, we could have issues with the universe levels. The fact that these existential types are always `Value.t` and `M` will be enforced when defining the semantics.\\n\\n### Monad\'s primitives\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set :=\\n | StateAlloc (value : Value.t)\\n | StateRead {Address : Set} (address : Address)\\n | StateWrite {Address : Set} (address : Address) (value : Value.t)\\n | EnvRead.\\nEnd Primitive.\\n```\\n\\nHere are the IO calls to the system that the monad can make. This list might be extended later. For now, we mainly have primitives to access the memory.\\n\\n### Monad: base\\n\\n```coq\\nModule LowM.\\n Inductive t (A : Set) : Set :=\\n | Pure : A -> t A\\n | CallPrimitive : Primitive.t -> (Value.t -> t A) -> t A\\n | Loop : t A -> (A -> bool) -> (A -> t A) -> t A\\n | Impossible : t A\\n (** This constructor is not strictly necessary, but is used as a marker for\\n functions calls in the generated code, to help the tactics to recognize\\n points where we can compose about functions. *)\\n | Call : t A -> (A -> t A) -> t A.\\n Arguments Pure {_}.\\n Arguments CallPrimitive {_}.\\n Arguments Loop {_}.\\n Arguments Impossible {_}.\\n Arguments Call {_}.\\n\\n Fixpoint let_ {A : Set} (e1 : t A) (f : A -> t A) : t A :=\\n match e1 with\\n | Pure v => f v\\n | CallPrimitive primitive k =>\\n CallPrimitive primitive (fun v => let_ (k v) f)\\n | Loop body is_break k =>\\n Loop body is_break (fun v => let_ (k v) f)\\n | Impossible => Impossible\\n | Call e k =>\\n Call e (fun v => let_ (k v) f)\\n end.\\nEnd LowM.\\n```\\n\\nThis is the first layer of our monad, very similar to what we had before. We remove the cast operation, as now everything has the same type. We use a style by continuation, but we also define a `let_` function to have a \\"bind\\" operator. Note that we always have the same type as parameter, so this is not really a monad as the \\"bind\\" operator should have the type:\\n\\n```coq\\nforall {A B : Set}, M A -> (A -> M B) -> M B\\n```\\n\\nAlways having the same type is enough for us as we use a single type of all Rust values.\\n\\n### Monad: with exceptions\\n\\nWe have the same type as before for the exceptions, representing the panics and all the special control flow operations such as `continue`, `return`, and `break`:\\n\\n```coq\\nModule Exception.\\n Inductive t : Set :=\\n (** exceptions for Rust\'s `return` *)\\n | Return : Value.t -> t\\n (** exceptions for Rust\'s `continue` *)\\n | Continue : t\\n (** exceptions for Rust\'s `break` *)\\n | Break : t\\n (** escape from a match branch once we know that it is not valid *)\\n | BreakMatch : t\\n | Panic : string -> t.\\nEnd Exception.\\n```\\n\\nOur final monad definition is a thin wrapper around `LowM`, to add an error monad to propagate the exceptions:\\n\\n```coq\\nDefinition M : Set :=\\n LowM.t (Value.t + Exception.t).\\n\\nDefinition let_ (e1 : M) (e2 : Value.t -> M) : M :=\\n LowM.let_ e1 (fun v1 =>\\n match v1 with\\n | inl v1 => e2 v1\\n | inr error => LowM.Pure (inr error)\\n end).\\n```\\n\\nOnce again, this is not really a monad as the type of the values that we compute is always the same, and we do not need more. Having a definition in two steps (`LowM` and `M`) is useful to separate the part that can be defined by computation (the `M` part) from the part whose semantics can only be given by inductive predicates (the `LowM` part).\\n\\n## Conclusion\\n\\nNext, we will see how we can use this new definition of Rust values, whether it works to translate our examples, and most importantly, how to modify `coq-of-rust` to generate terms without types.\\n\\nIf you are interested in formally verifying Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) or go to our [GitHub repository](https://github.com/formal-land/coq-of-rust) for `coq-of-rust`."},{"id":"/2024/02/22/journey-coq-of-go","metadata":{"permalink":"/blog/2024/02/22/journey-coq-of-go","source":"@site/blog/2024-02-22-journey-coq-of-go.md","title":"\ud83e\uddab Translating Go to Coq, part 1","description":"In this blog post, we present our development steps to build a tool to translate Go programs to the proof system Coq.","date":"2024-02-22T00:00:00.000Z","formattedDate":"February 22, 2024","tags":[{"label":"coq-of-go","permalink":"/blog/tags/coq-of-go"},{"label":"Go","permalink":"/blog/tags/go"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":12.03,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\uddab Translating Go to Coq, part 1","tags":["coq-of-go","Go","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","permalink":"/blog/2024/02/29/improvements-rust-translation"},"nextItem":{"title":"\u03bb Experiment on translation from Haskell to Coq","permalink":"/blog/2024/02/14/experiment-coq-of-hs"}},"content":"In this blog post, we present our development steps to build a tool to translate Go programs to the proof system Coq.\\n\\nThe goal is to formally verify Go programs to make them totally bug-free. It is actually possible to make a program totally bug-free, as [formal verification](https://en.wikipedia.org/wiki/Formal_verification) can cover all execution cases and kinds of properties thanks to the use of mathematical methods. This corresponds to the highest level of the [Evaluation Assurance Levels](https://en.wikipedia.org/wiki/Evaluation_Assurance_Level) used for critical applications, such as the space industry.\\n\\nAll the code of our work is available on GitHub at [github.com/formal-land/coq-of-go-experiment](https://github.com/formal-land/coq-of-go-experiment).\\n\\n\x3c!-- truncate --\x3e\\n\\n## Introduction\\n\\nWe believe that there are not yet a lot of formal verification tools for Go. We can cite [Goose](https://github.com/tchajed/goose), which is working by translation from Go to the proof system Coq. We will follow a similar approach, translating the Go language to our favorite proof system Coq. In contrast to Goose, we plan to support the whole Go language, even at the expense of the simplicity of the translation.\\n\\nFor that, we target the translation of the [SSA form of Go](https://pkg.go.dev/golang.org/x/tools/go/ssa) of Go instead of the [Go AST](https://pkg.go.dev/go/ast). The SSA form is a more low-level representation of Go, so we hope to capture the semantics of the whole Go language more easily. This should be at the expense of the simplicity of the generated translation, but we hope that having full language support outweighs this.\\n\\nGo is an interesting target as:\\n\\n- this is quite a popular language,\\n- it is focusing on simplicity, with a reduced set of language features,\\n- a lot of critical backend applications are written in Go, including for very large companies (Google, Netflix, Uber, Twitch, etc.).\\n\\nAmong interesting properties that we can verify are:\\n\\n- the absence of reachable `panic` in the code,\\n- the absence of race conditions or deadlocks,\\n- the backward compatibility from release to release, for parts of the code whose behavior is not supposed to change,\\n- the strict application of business rules.\\n\\n:::tip Contact\\n\\nYou can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase totally bug-free. Contact us at [contact@formal.land](mailto:contact@formal.land) to chat! We offer a free audit to assess the feasibility of formal verification on your case.\\n\\n:::\\n\\n:::note Goal\\n\\nOur company goal is to make formal verification accessible to all projects, reducing its cost to 20% of the development cost. There should be no reason to have bugs in end-user products!\\n\\n:::\\n\\n![Mole and Rooster](2024-02-22/mole_rooster.webp)\\n\\n## First target\\n\\nOur first target is to achieve the formal verification _including all the dependencies_ of the hello world program:\\n\\n```go\\npackage main\\n\\nimport \\"fmt\\"\\n\\nfunc main() {\\n\\tfmt.Println(\\"Hello, World!\\")\\n}\\n```\\n\\nWhat we want to show about this code is that it does a single and only thing: outputting the string \\"Hello, World!\\" to the standard output. Its only dependency is the `fmt` package, but when we look at the transitive dependencies of this package:\\n\\n```sh\\ngo list -f \'{{ .Deps }}\' fmt\\n```\\n\\nwe get around forty packages:\\n\\n```\\nerrors\\ninternal/abi\\ninternal/bytealg\\ninternal/coverage/rtcov\\ninternal/cpu\\ninternal/fmtsort\\ninternal/goarch\\ninternal/godebugs\\ninternal/goexperiment\\ninternal/goos\\ninternal/itoa\\ninternal/oserror\\ninternal/poll\\ninternal/race\\ninternal/reflectlite\\ninternal/safefilepath\\ninternal/syscall/execenv\\ninternal/syscall/unix\\ninternal/testlog\\ninternal/unsafeheader\\nio\\nio/fs\\nmath\\nmath/bits\\nos\\npath\\nreflect\\nruntime\\nruntime/internal/atomic\\nruntime/internal/math\\nruntime/internal/sys\\nruntime/internal/syscall\\nsort\\nstrconv\\nsync\\nsync/atomic\\nsyscall\\ntime\\nunicode\\nunicode/utf8\\nunsafe\\n```\\n\\nWe will need to translate all these packages to meaningful Coq code.\\n\\n## The start\\n\\nWe made the `coq-of-go` tool, with everything in a single file [main.go](https://github.com/formal-land/coq-of-go-experiment/blob/main/main.go) for now. We retrieve the SSA form of a Go package provided as a command line parameter (code without the error handling):\\n\\n```go\\nfunc main() {\\n\\tpackageToTranslate := os.Args[1]\\n\\tcfg := &packages.Config{Mode: packages.LoadSyntax}\\n\\tinitial, _ := packages.Load(cfg, packageToTranslate)\\n\\t_, pkgs := ssautil.Packages(initial, 0)\\n\\tpkgs[0].Build()\\n\\tmembers := pkgs[0].Members\\n```\\n\\n:::note SSA form\\n\\nThe [SSA form](https://en.wikipedia.org/wiki/Static_single-assignment_form) of a program is generally used internally by compilers to have a simple representation to work on. The [LLVM](https://llvm.org/) language is such an example. In SSA, each variable is assigned exactly once and the control flow is explicit, with jumps or conditional jumps to labels. There are no `for` loops, `if` statements, or non-primitive expressions.\\n\\n:::\\n\\nThen we iterate over all the SSA `members`, and directly print the corresponding Coq code to the standard output. We do not use an intermediate representation or make intermediate passes. We do not even do pretty-printing (splitting lines that are too long at the right place, and introducing indentation)! This should not be necessary as the SSA code cannot nest sub-expressions or statements. We still try to print a readable Coq code, as it will be used in the proofs.\\n\\nThere are four kinds of SSA members:\\n\\n- named constants,\\n- globals,\\n- types,\\n- functions.\\n\\nNamed constants and globals are similar, and are for top-level variables whose value is either known at compile-time or computed at the program\'s init. Types are for type definitions. We will focus on functions, as this is where the code is.\\n\\n## Functions\\n\\nThe SSA functions in Go are described by the type [`ssa.Function`](https://pkg.go.dev/golang.org/x/tools/go/ssa#Function):\\n\\n```go\\ntype Function struct {\\n\\tSignature *types.Signature\\n\\n\\t// source information\\n\\tSynthetic string // provenance of synthetic function; \\"\\" for true source functions\\n\\n\\tPkg *Package // enclosing package; nil for shared funcs (wrappers and error.Error)\\n\\tProg *Program // enclosing program\\n\\n\\tParams []*Parameter // function parameters; for methods, includes receiver\\n\\tFreeVars []*FreeVar // free variables whose values must be supplied by closure\\n\\tLocals []*Alloc // frame-allocated variables of this function\\n\\tBlocks []*BasicBlock // basic blocks of the function; nil => external\\n\\tRecover *BasicBlock // optional; control transfers here after recovered panic\\n\\tAnonFuncs []*Function // anonymous functions directly beneath this one\\n\\t// contains filtered or unexported fields\\n}\\n```\\n\\nThe main part of interest for us is `Blocks`. A block is a sequence of instructions, and the control flow is explicit. The last instruction of a block is a jump to another block, or a return. The first instructions of a block can be the special `Phi` instruction, which is used to merge control flow from different branches.\\n\\nWe decided to write a first version to see what the SSA code of Go looks like when printed in Coq, without thinking about generating a well-typed code. This looks like this:\\n\\n```coq\\nwith MakeUint64 (\u03b1 : list Val.t) : M (list Val.t) :=\\n M.Thunk (\\n match \u03b1 with\\n | [x] =>\\n M.Thunk (M.EvalBody [(0,\\n let* \\"t0\\" := Instr.BinOp x \\"<\\" (Val.Lit (Lit.Int 9223372036854775808)) in\\n Instr.If (Register.read \\"t0\\") 1 2\\n );\\n (1,\\n let* \\"t1\\" := Instr.Convert x in\\n let* \\"t2\\" := Instr.ChangeType (Register.read \\"t1\\") in\\n let* \\"t3\\" := Instr.MakeInterface (Register.read \\"t2\\") in\\n M.Return [(Register.read \\"t3\\")]\\n );\\n (2,\\n let* \\"t4\\" := Instr.Alloc (* complit *) Alloc.Local \\"*go/constant.intVal\\" in\\n let* \\"t5\\" := Instr.FieldAddr (Register.read \\"t4\\") 0 in\\n let* \\"t6\\" := Instr.Call (CallKind.Function (newInt [])) in\\n let* \\"t7\\" := Instr.Call (CallKind.Function (TODO_method [(Register.read \\"t6\\"); x])) in\\n do* Instr.Store (Register.read \\"t5\\") (Register.read \\"t7\\") in\\n let* \\"t8\\" := Instr.UnOp \\"*\\" (Register.read \\"t4\\") in\\n let* \\"t9\\" := Instr.MakeInterface (Register.read \\"t8\\") in\\n M.Return [(Register.read \\"t9\\")]\\n )])\\n | _ => M.Thunk (M.EvalBody [])\\n end)\\n```\\n\\nfor a source Go code (from the [go/constant](https://pkg.go.dev/go/constant) package):\\n\\n```go\\n// MakeUint64 returns the [Int] value for x.\\nfunc MakeUint64(x uint64) Value {\\n\\tif x < 1<<63 {\\n\\t\\treturn int64Val(int64(x))\\n\\t}\\n\\treturn intVal{newInt().SetUint64(x)}\\n}\\n```\\n\\nThere are three blocks of code, labeled with `0`, `1`, and `2`. The first block ends with a conditional jump `If` corresponding to the `if` statement in the Go code. The following blocks are corresponding to the two possible branches of the `if` statement. They both end with a `Return` instruction, corresponding to the `return` statement in the Go code. They run various primitive instructions that we have translated as we can.\\n\\nThe generated Coq code is still readable but more verbose than the original Go code. We will later develop proof techniques using simulations to enable the user to define equivalent but simpler versions of the translation. Being able to define simulations of an imperative program is also important for the proofs, as we can rewrite the code in functional style to make it easier to reason about.\\n\\n## Type-checking\\n\\nFrom there, a second step is to have a generated code that type-checks, forgetting about making a code with sound semantics for now. We generate the various Coq definitions that are needed in a header of the generated code, using axioms for all the definitions. For example, for the allocations we do:\\n\\n```coq\\nModule Alloc.\\n Inductive t : Set :=\\n | Heap\\n | Local.\\nEnd Alloc.\\n\\nModule Instr.\\n Parameter Alloc : Alloc.t -> string -> M Val.t.\\n```\\n\\nThe `Inductive` keyword in Coq defines a type with two constructors `Heap` and `Local`. The `Parameter` keyword defines an axiomatized definition, where we only provide the type but not the definition itself. The `Instr.Alloc` instruction takes as parameters an allocation mode `Alloc.t` and a string and returns an `M Val.t` value.\\n\\n### Representation of values\\n\\nWe make the choice to remove the types while doing the translation, as the type system of Go is probably incompatible with the one of Coq in many ways. We thus translate everything to a single type `Val.t` in Coq to represent all kinds of possible Go values. The downside of this approach is that is makes the generated code less readable and less safe, as types are useful to track the correct use of values.\\n\\nFor now, we define the `Val.t` type as:\\n\\n```coq\\nModule Val.\\n Inductive t : Set :=\\n | Lit (_ : Lit.t)\\n | Tuple (_ : list t).\\nEnd Val.\\n```\\n\\nwith the literals `Lit.t` as:\\n\\n```coq\\nModule Lit.\\n Inductive t : Set :=\\n | Bool (_ : bool)\\n | Int (_ : Z)\\n | Float (_ : Rational)\\n | Complex (_ _ : Rational)\\n | String (_ : string)\\n | Nil.\\nEnd Lit.\\n```\\n\\nWe plan to refine this type and add more cases as we improve `coq-of-go`. Structures, pointers, and closures are missing for now.\\n\\n### Monadic style\\n\\nIn order to represent the side-effects of the Go code, we use a [monadic style](). This is a standard approach to represent side-effects like mutations, exceptions, or non-termination in a purely function language such as Coq. We choose to use:\\n\\n- A free monad, where all the primitives are constructor of the inductive type `M` of the monad. This simplifies the manipulation of the monad by allowing to compute on it and by delegating the actual implementation of the monadic primitives for later.\\n- A co-inductive type, to allow potentially non-terminating programs. Co-inductive types are like lazy definitions in Haskell where it is possible to make an infinite list for example, as long as only a finite number of elements are consumed.\\n\\nIn that sense, we follow the approach in the paper [Modular, Compositional, and Executable Formal Semantics for LLVM IR](https://cambium.inria.fr/~eyoon/paper/vir.pdf), that is using a co-inductive free monad (interaction tree) to formalize a reasonable subset of the LLVM language that is also an SSA representation but with more low-level instructions than Go.\\n\\nOur definition for `M` for now is:\\n\\n```coq\\nModule M.\\n CoInductive t (A : Set) : Set :=\\n | Return (_ : A)\\n | Bind {B : Set} (_ : t B) (_ : B -> t A)\\n | Thunk (_ : t A)\\n | EvalBody (_ : list (Z * t A)).\\n Arguments Return {A}.\\n Arguments Bind {A B}.\\n Arguments Thunk {A}.\\n Arguments EvalBody {A}.\\nEnd M.\\nDefinition M : Set -> Set := M.t.\\n```\\n\\nWe define all the functions that we translate as mutually recursive with the `CoFixpoint ... with ...` keyword of Coq. Thus, we do not have to preserve the ordering of definitions that is required by Coq or care for recursive or mutually recursive functions in Go.\\n\\nHowever, we did not achieve to make the type-checker of Coq happy for our `CoFixpoint` as many definitions are axiomatized, and the type-checker of Coq wants their definitions to know if they produce co-inductive constructors. So, for now, we admit this step by disabling the termination checker with this flag:\\n\\n```coq\\nLocal Unset Guard Checking.\\n```\\n\\n## Next\\n\\nWhen we translate our hello world example we get the Coq code:\\n\\n```coq\\nCoFixpoint Main (\u03b1 : list Val.t) : M (list Val.t) :=\\n M.Thunk (\\n match \u03b1 with\\n | [] =>\\n M.Thunk (M.EvalBody [(0,\\n let* \\"t0\\" := Instr.Alloc (* varargs *) Alloc.Heap \\"*[1]any\\" in\\n let* \\"t1\\" := Instr.IndexAddr (Register.read \\"t0\\") (Val.Lit (Lit.Int 0)) in\\n let* \\"t2\\" := Instr.MakeInterface (Val.Lit (Lit.String \\"Hello, World!\\")) in\\n do* Instr.Store (Register.read \\"t1\\") (Register.read \\"t2\\") in\\n let* \\"t3\\" := Instr.Slice (Register.read \\"t0\\") None None in\\n let* \\"t4\\" := Instr.Call (CallKind.Function (fmt.Println [(Register.read \\"t3\\")])) in\\n M.Return []\\n )])\\n | _ => M.Thunk (M.EvalBody [])\\n end)\\n\\nwith init (\u03b1 : list Val.t) : M (list Val.t) :=\\n M.Thunk (\\n match \u03b1 with\\n | [] =>\\n M.Thunk (M.EvalBody [(0,\\n let* \\"t0\\" := Instr.UnOp \\"*\\" (Register.read \\"init$guard\\") in\\n Instr.If (Register.read \\"t0\\") 2 1\\n );\\n (1,\\n do* Instr.Store (Register.read \\"init$guard\\") (Val.Lit (Lit.Bool true)) in\\n let* \\"t1\\" := Instr.Call (CallKind.Function (fmt.init [])) in\\n Instr.Jump 2\\n );\\n (2,\\n M.Return []\\n )])\\n | _ => M.Thunk (M.EvalBody [])\\n end).\\n```\\n\\nThe `init` function, which is automatically generated by the Go compiler to initialize global variables, does not do much here. It checks whether it was already called or not reading the `init$guard` variable, and if not, it calls the `fmt.init` function. The `Main` function is the one that we are interested in. It allocates a variable to store the string \\"Hello, World!\\", and then calls the `fmt.Println` function to print it.\\n\\nFrom there, to continue the project we have two possibilities:\\n\\n1. Give actual definitions to each primitive instruction that is used in this example (for now, everything is axiomatized).\\n2. Translate all the transitive dependencies of the hello world program to Coq, and make sure that we can compile everything together.\\n\\nFor the next step, we choose to follow the second possibility as we are more confident in being able to define the semantics of the instructions, which is purely done on the Coq side, than in being able to use the Go compiler\'s APIs to retrieve the definitions of all the dependencies and related them together.\\n\\n## Conclusion\\n\\nWe have presented the beginning of our journey to translate Go programs to Coq, to build a formal verification tool for Go. The translation type-checks on the few examples we have tried but has no semantics. We will follow by handling the translation of dependencies of a package.\\n\\nIf you are interested in this project, please contact us at [contact@formal.land](mailto:contact@formal.land) or go to our [GitHub repository](https://github.com/formal-land/coq-of-go-experiment)."},{"id":"/2024/02/14/experiment-coq-of-hs","metadata":{"permalink":"/blog/2024/02/14/experiment-coq-of-hs","source":"@site/blog/2024-02-14-experiment-coq-of-hs.md","title":"\u03bb Experiment on translation from Haskell to Coq","description":"We present an experiment coq-of-hs that we have made on the translation of Haskell programs to the proof system Coq \ud83d\udc13. The goal is to formally verify Haskell programs to make them totally bug-free.","date":"2024-02-14T00:00:00.000Z","formattedDate":"February 14, 2024","tags":[{"label":"coq-of-hs","permalink":"/blog/tags/coq-of-hs"},{"label":"Haskell","permalink":"/blog/tags/haskell"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":4.365,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\u03bb Experiment on translation from Haskell to Coq","tags":["coq-of-hs","Haskell","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\uddab Translating Go to Coq, part 1","permalink":"/blog/2024/02/22/journey-coq-of-go"},"nextItem":{"title":"The importance of formal verification","permalink":"/blog/2024/02/02/formal-verification-for-aleph-zero"}},"content":"We present an experiment [coq-of-hs](https://github.com/formal-land/coq-of-hs-experiment) that we have made on the translation of [Haskell](https://www.haskell.org/) programs to the proof system [Coq \ud83d\udc13](https://coq.inria.fr/). The goal is to formally verify Haskell programs to make them totally bug-free.\\n\\nIndeed, even with the use of a strict type system, there can still be bugs for properties that cannot be expressed with types. An example of such a property is the backward compatibility of an API endpoint for the new release of a web service when there has been code refactoring. Only formal verification can cover all execution cases and kinds of properties.\\n\\nThe code of the tool is at: [github.com/formal-land/coq-of-hs-experiment](https://github.com/formal-land/coq-of-hs-experiment) (AGPL license)\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Contact\\n\\nWe propose tools to make your codebase totally bug-free. Contact us at [contact@formal.land](mailto:contact@formal.land) for more information! We offer a free audit to assess the feasibility of formal verification for your case.\\n\\n:::\\n\\n:::info Info\\n\\nWe estimate that the cost of formal verification should be 20% of the development cost. There are no reasons to still have bugs today!\\n\\n:::\\n\\n![Haskell Logo](2024-02-14/haskell_logo.svg)\\n\\n## Goal of the experiment\\n\\nThere are already some tools to formally verify Haskell programs:\\n\\n- [\ud83d\udc13 hs-to-coq](https://github.com/plclub/hs-to-coq) translation from Haskell to Coq\\n- [\ud83d\udca7 Liquid Haskell](https://en.wikipedia.org/wiki/Liquid_Haskell) verification using [SMT solvers](https://en.wikipedia.org/wiki/Satisfiability_modulo_theories)\\n\\nIn this experiment, we want to check the feasibility of translation from Haskell to Coq:\\n\\n- \ud83d\udc4d covering all the language without manual configuration or code changes,\\n- \ud83d\udc4e even if this is at the cost of a more verbose and low-level translation.\\n\\n## Example\\n\\nHere is an example of a Haskell function:\\n\\n```haskell\\nfixObvious :: (a -> a) -> a\\nfixObvious f = f (fixObvious f)\\n```\\n\\nthat `coq-of-hs` translates to this valid Coq code:\\n\\n```coq\\nCoFixpoint fixObvious : Val.t :=\\n (Val.Lam (fun (f : Val.t) => (Val.App f (Val.App fixObvious f)))).\\n```\\n\\n## Infrastructure\\n\\nWe read the [Haskell Core](https://serokell.io/blog/haskell-to-core) representation of Haskell using the GHC plugin system. Thus, we read the exact same code version as the one that is compiled down to assembly code by [GHC](https://www.haskell.org/ghc/), to take into account all compilation options.\\n\\nHaskell Core is an intermediate representation of Haskell that is close to the lambda calculus and used by the Haskell compiler for various optimizations passes. Here are all the constructors of the `Expr` type of Haskell Core:\\n\\n```haskell\\ndata Expr b\\n = Var Id\\n | Lit Literal\\n | App (Expr b) (Arg b)\\n | Lam b (Expr b)\\n | Let (Bind b) (Expr b)\\n | Case (Expr b) b Type [Alt b]\\n | Cast (Expr b) Coercion\\n | Tick (Tickish Id) (Expr b)\\n | Type Type\\n | Coercion Coercion\\n```\\n\\nThis paper [System FC, as implemented in GHC](https://repository.brynmawr.edu/cgi/viewcontent.cgi?article=1015&context=compsci_pubs) presents it as [System F](https://en.wikipedia.org/wiki/System_F) plus coercions. We translate Haskell code to an untyped version of the lambda calculus in Coq, with co-induction to allow for infinite data structures:\\n\\n```coq\\nModule Val.\\n #[bypass_check(positivity)]\\n CoInductive t : Set :=\\n | Lit (_ : Lit.t)\\n | Con (_ : string) (_ : list t)\\n | App (_ _ : t)\\n | Lam (_ : t -> t)\\n | Case (_ : t) (_ : t -> list (Case.t t))\\n | Impossible.\\nEnd Val.\\n```\\n\\nWe make the translation by induction over the Haskell Core representation, and we translate each constructor to a corresponding constructor of the Coq representation. We pretty-print the Coq code directly without using an intermediate representation. We use the [prettyprinter](https://github.com/quchen/prettyprinter) package with the two main following primitives:\\n\\n```haskell\\nconcatNest :: [Doc ()] -> Doc ()\\nconcatNest = group . nest 2 . vsep\\n\\nconcatGroup :: [Doc ()] -> Doc ()\\nconcatGroup = group . vsep\\n```\\n\\nto display a sub-term with or without indentation when splitting lines that are too long. This translation works well on all the Haskell expressions that we have tested.\\n\\n## Missing features\\n\\n### Semantics\\n\\nWe have not yet defined a semantics. For now, the terms that we generate in Coq are purely descriptive. We will wait to have examples of things to verify to define semantics that are practical to use.\\n\\n### Type-classes\\n\\nWe have not yet translated typeclasses. The Haskell Core language hides most of the typeclasses-related code. For example, it represents instances as additional function parameters for functions that have a typeclass constraints. But we still need to declare the functions corresponding to the member of the typeclasses, what we have not done yet.\\n\\n### Multi-file projects\\n\\nWe have not yet implemented the translation of multi-file projects. We have only tested the translation of a single-file project.\\n\\n### Standard library\\n\\nSimilarly to the handling of multi-file projects, we have not yet tested the translation of projects using external libraries or translating the base library of Haskell.\\n\\n### Strict positivity\\n\\nWe had to turn off the strict positivity condition for the definition of `Val.t` in Coq with:\\n\\n```coq\\n#[bypass_check(positivity)]\\n```\\n\\nThis is for to the case:\\n\\n```coq\\n| Lam (_ : t -> t)\\n```\\n\\nwhere `t` appears as a parameter of a function (negative position). We do not know if this causes any problem in practice, on values that correspond to well-typed Haskell programs.\\n\\n## Conclusion\\n\\nWe have presented an experiment on the translation of Haskell programs to Coq. If you are interested in this project, please get in touch with us at [contact@formal.land](mailto:contact@formal.land) or go to the [GitHub repository](https://github.com/formal-land/coq-of-hs-experiment) of the project."},{"id":"/2024/02/02/formal-verification-for-aleph-zero","metadata":{"permalink":"/blog/2024/02/02/formal-verification-for-aleph-zero","source":"@site/blog/2024-02-02-formal-verification-for-aleph-zero.md","title":"The importance of formal verification","description":"Ensuring Flawless Software in a Flawed World","date":"2024-02-02T00:00:00.000Z","formattedDate":"February 2, 2024","tags":[],"readingTime":5.53,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"The importance of formal verification","authors":[]},"unlisted":false,"prevItem":{"title":"\u03bb Experiment on translation from Haskell to Coq","permalink":"/blog/2024/02/14/experiment-coq-of-hs"},"nextItem":{"title":"Upgrade the Rust version of coq-of-rust","permalink":"/blog/2024/01/18/update-coq-of-rust"}},"content":"> Ensuring Flawless Software in a Flawed World\\n\\nIn this blog post, we present what formal verification is and why this is such a valuable tool to improve the security of your applications.\\n\\n\x3c!-- truncate --\x3e\\n\\n![Formal verification](2024-02-02/formal_verification.png)\\n\\n:::tip Contact\\n\\nIf you want to formally verify your codebase to improve the security of your application, contact us at [contact@formal.land](mailto:contact@formal.land)! We offer a free audit of your codebase to assess the feasibility of formal verification.\\n\\n:::\\n\\n:::info Thanks\\n\\nThe current development of our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust), for the formal verification of Rust code, is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation and its [Ecosystem Funding Program](https://alephzero.org/ecosystem-funding-program). The aim is to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n## What is formal verification?\\n\\nFormal verification is a set of techniques to check for the complete correctness of a program, reasoning at a symbolic level rather than executing a particular instance of the code. By symbolic reasoning, we mean following the values of the variables by tracking their names and constraints, without necessarily giving them an example value. This is what we would do in our heads to understand a code where a variable `username` appears, following which functions it is given to, to know where we use the user name. The concrete user name that we consider is irrelevant, although some people prefer to think with an example.\\n\\nIn formal verification, we rely on precise mathematical reasoning to make sure that there are no mistakes or missing cases. We check this reasoning with a dedicated program ([SMT](https://en.wikipedia.org/wiki/Satisfiability_modulo_theories) solver, [Coq](https://coq.inria.fr/) proof system, ...). Indeed, as programs grow in complexity, it could be easy to forget an `if` branch or an error case.\\n\\nFor example, to say that the following Rust program is valid:\\n\\n```coq\\n/// Return the maximum of [a] and [b]\\nfn get_max(a: u128, b: u128) -> u128 {\\n if a > b {\\n a\\n } else {\\n b\\n }\\n}\\n```\\n\\nwe reason on two cases (reasoning by disjunction):\\n\\n- `a > b` where `a` is the maximum,\\n- `a <= b` where `b` is the maximum,\\n\\nwith the values of `a` and `b` being irrelevant (symbolic). In both cases, we can conclude that `get_max` returns the maximum.\\n\\nThis is in contrast with testing, where we need to execute the program with all possible instances of `a` and `b` to check that the program is correct with 100% certainty. This is infeasible in this case as the type `u128` is too large to be tested exhaustively: there are `2^256` possible values for `a` and `b`, meaning `115792089237316195423570985008687907853269984665640564039457584007913129639936` possible values!\\n\\nA program is shown correct with respect to an expected behavior, called a _formal specification_. This is expressed in a mathematical language to be non-ambiguous. For example, we can specify the behavior of the previous program as:\\n\\n```\\nFORALL (a b : u128),\\n (get_max a b = a OR get_max a b = b) AND\\n (get_max a b >= a AND get_max a b >= b)\\n```\\n\\nstating that we indeed return the maximum of `a` and `b`.\\n\\nWhen a program is formally verified, we are mathematically sure it will always follow its specifications. This is a way to eliminate all bugs, as long as we have a complete specification of what it is supposed to do or not do. This corresponds to the highest level of Evaluation Assurance Level, [EAL7](https://en.wikipedia.org/wiki/Evaluation_Assurance_Level#EAL7:_Formally_Verified_Design_and_Tested). This is used for critical applications, such as space rocket software, where a single bug can be extremely expensive (the loss of a rocket!).\\n\\nThere are various formal verification tools, such as the proof system [Coq](https://coq.inria.fr/). The C compiler [CompCert](https://en.wikipedia.org/wiki/CompCert) is an example of large software verified in Coq. It is proven correct, in contrast to most other C compilers that contain [subtle bugs](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf). CompCert is now used by Airbus to compile C programs embedded in planes \ud83d\udeeb.\\n\\n## Why is it such a useful tool?\\n\\nFormal verification is extremely useful as it can anticipate all the bugs by exploring all possible execution cases of a program. Here is a quote from [Edsger W. Dijkstra](https://en.wikipedia.org/wiki/Formal_verification):\\n\\n> Program testing can be used to show the presence of bugs, but never to show their absence!\\n\\nIt offers the possibility to make software that never fails. This is often required for applications with human life at stake, such as planes or medical devices. But it can also be useful for applications where a single bug can be extremely expensive, such as financial applications.\\n\\nSmart contracts are a good example of such applications. They are programs that are executed on a blockchain and are used to manage assets worth billions of dollars. A single bug in a smart contract can lead to the loss of all the assets managed by the contract. In the first half of 2023, some estimate that attacks on web3 platforms resulted in a loss of [$655.61 million](https://www.linkedin.com/pulse/h1-2023-global-web3-security-report-aml-analysis-crypto-regulatory/), with most of these losses due to bugs in smart contracts. These bugs could be prevented using formally verified smart contracts.\\n\\nFinally, formal verification is useful to improve the quality of a program by enforcing the need to use:\\n\\n- clear programming constructs,\\n- an explicit specification of the behavior of the program.\\n\\n## Comparison of formal verification and testing\\n\\nCompared to testing, formal verification is more complex as:\\n\\n- it typically takes much more time to formally verify a program than to test it on a reasonable set of inputs,\\n- it requires a formal specification of the program, which is not always available,\\n- it requires some specific expertise to use the formal verification tools and to write the specifications.\\n\\nIn addition, formal verification assumes a certain model of the environment of the program, which is not always accurate. When actually executing the code, we also exercise all the dependencies (libraries, operating system, network, ...) that might cause issues at runtime.\\n\\nHowever, formal verification is the only way to have an exhaustive check of the program. It verifies all corner cases, such as integer overflows, or hard-to-reproduce issues, such as concurrency bugs. We recommend combining both approaches as they do not catch the same kinds of bugs.\\n\\nAt [Formal Land](https://formal.land/), we consider it critical to lower the cost of formal verification to apply it to a larger scope of programs and prevent more bugs and attacks. We work on the formal verification of Rust with [coq-of-rust](https://github.com/formal-land/coq-of-rust) and OCaml with [coq-of-ocaml](https://github.com/formal-land/coq-of-ocaml).\\n\\n## Conclusion\\n\\nFormal verification is a powerful tool to improve the security of your applications. It is the only way to prevent all bugs by exploring all possible executions of your programs. It complements existing testing methods. It is particularly useful for critical applications, such as smart contracts, where a single bug can be extremely expensive."},{"id":"/2024/01/18/update-coq-of-rust","metadata":{"permalink":"/blog/2024/01/18/update-coq-of-rust","source":"@site/blog/2024-01-18-update-coq-of-rust.md","title":"Upgrade the Rust version of coq-of-rust","description":"We continue our work on the coq-of-rust tool to formally verify Rust programs with the Coq proof assistant. We have upgraded the Rust version that we support, simplified the translation of the traits, and are adding better support for the standard library of Rust.","date":"2024-01-18T00:00:00.000Z","formattedDate":"January 18, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"Aleph-Zero","permalink":"/blog/tags/aleph-zero"}],"readingTime":3.5,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Upgrade the Rust version of coq-of-rust","tags":["coq-of-rust","Rust","Coq","Aleph-Zero"],"authors":[]},"unlisted":false,"prevItem":{"title":"The importance of formal verification","permalink":"/blog/2024/02/02/formal-verification-for-aleph-zero"},"nextItem":{"title":"Translating Rust match patterns to Coq with coq-of-rust","permalink":"/blog/2024/01/04/rust-translating-match"}},"content":"We continue our work on the [coq-of-rust](https://github.com/formal-land/coq-of-rust) tool to formally verify Rust programs with the [Coq proof assistant](https://coq.inria.fr/). We have upgraded the Rust version that we support, simplified the translation of the traits, and are adding better support for the standard library of Rust.\\n\\nOverall, we are now able to translate **about 80%** of the Rust examples from the [Rust by Example](https://doc.rust-lang.org/stable/rust-by-example/) book into valid Coq files. This means we support a large subset of the Rust language.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nTo formally verify your Rust codebase and improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)! Formal verification is the only way to prevent all bugs by exploring all possible executions of your programs \ud83c\udfaf.\\n\\n:::\\n\\n:::info Thanks\\n\\nThis work and the development of [coq-of-rust](https://github.com/formal-land/coq-of-rust) is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation, to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n![Rust rooster](2024-01-18/rooster.png)\\n\\n## Upgrade of the Rust version\\n\\nThe tool `coq-of-rust` is tied to a particular version of the Rust compiler that we use to parse and type-check a `cargo` project. We now support the `nightly-2023-12-15` version of Rust, up from `nightly-2023-04-30`. Most of the changes were minor, but it is good to handle these regularly to have smooth upgrades. The corresponding pull request is [coq-of-rust/pull/445](https://github.com/formal-land/coq-of-rust/pull/445). We also got more [Clippy](https://github.com/rust-lang/rust-clippy) warnings thanks to the new version of Rust.\\n\\n## Simplify the translation of traits\\n\\nThe traits of Rust are similar to the [type-classes of Coq](https://coq.inria.fr/refman/addendum/type-classes.html). This is how we translate traits to Coq.\\n\\nBut there are a lot of subtle differences between the two languages. The type-class inference mechanism of Coq does not work all the time on generated Rust code, even when adding a lot of code annotations. We think that the only reliable way to translate Rust traits would be to explicit the implementations inferred by the Rust compiler, but the Rust compiler currently throws away this information.\\n\\nInstead, our new solution is to use a Coq tactic:\\n\\n```coq\\n(** Try first to infer the trait instance, and if unsuccessful, delegate it at\\n proof time. *)\\nLtac get_method method :=\\n exact (M.pure (method _)) ||\\n exact (M.get_method method).\\n```\\n\\nthat first tries to infer the trait instance for a particular method, and if it fails, delegates its definition to the user at proof time. This is a bit unsafe, as a user could provide invalid instances at proof time, by giving some custom instance definitions instead of the ones generated by `coq-of-rust`. So, one should be careful to only apply generated instances to fill the hole made by this tactic in case of failure. We believe this to be a reasonable assumption that we could enforce someday if needed.\\n\\nWe are also starting to remove the trait constraints on polymorphic functions (the `where` clauses). We start by doing it in our manual definition of the standard library of Rust. The rationale is that we can provide the actual trait instances at proof time by having the right hypothesis replicating the constraints of the `where` clauses. Having fewer `where` clauses reduces the complexity of the type inference of Coq on the generated code. There are still some cases that we need to clarify, for example, the handling of [associated types](https://doc.rust-lang.org/rust-by-example/generics/assoc_items/types.html) in the absence of traits.\\n\\n## Handling more of the standard library\\n\\nWe have a definition of the standard library of Rust, mainly composed of axiomatized[^1] definitions, in these three folders:\\n\\n- [CoqOfRust/alloc](https://github.com/formal-land/coq-of-rust/tree/main/CoqOfRust/alloc)\\n- [CoqOfRust/core](https://github.com/formal-land/coq-of-rust/tree/main/CoqOfRust/core)\\n- [CoqOfRust/std](https://github.com/formal-land/coq-of-rust/tree/main/CoqOfRust/std)\\n\\nBy adding more of these axioms, as well as with some small changes to the `coq-of-rust` tool, we are now able to successfully translate around 80% of the examples of the [Rust by Example](https://doc.rust-lang.org/stable/rust-by-example/) book. There can still be some challenges on larger programs, but this showcases the good support of `coq-of-rust` for the Rust language.\\n\\n## Conclusion\\n\\nWe are continuing to improve our tool `coq-of-rust` to support more of the Rust language and are making good progress. If you need to improve the security of critical applications written in Rust, contact us at [contact@formal.land](mailto:contact@formal.land) to start formally verifying your code!\\n\\n[^1]: An axiom in Coq is either a theorem whose proof is admitted, or a function/constant definition left for latter. This is the equivalent in Rust of the `todo!` macro."},{"id":"/2024/01/04/rust-translating-match","metadata":{"permalink":"/blog/2024/01/04/rust-translating-match","source":"@site/blog/2024-01-04-rust-translating-match.md","title":"Translating Rust match patterns to Coq with coq-of-rust","description":"Our tool coq-of-rust enables formal verification of \ud83e\udd80 Rust code to make sure that a program has no bugs. This technique checks all possible execution paths using mathematical techniques. This is important for example to ensure the security of smart contracts written in Rust language.","date":"2024-01-04T00:00:00.000Z","formattedDate":"January 4, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"Aleph-Zero","permalink":"/blog/tags/aleph-zero"}],"readingTime":6.005,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Translating Rust match patterns to Coq with coq-of-rust","tags":["coq-of-rust","Rust","Coq","Aleph-Zero"],"authors":[]},"unlisted":false,"prevItem":{"title":"Upgrade the Rust version of coq-of-rust","permalink":"/blog/2024/01/18/update-coq-of-rust"},"nextItem":{"title":"Verifying an ERC-20 smart contract in Rust","permalink":"/blog/2023/12/13/rust-verify-erc-20-smart-contract"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) enables [formal verification](https://en.wikipedia.org/wiki/Formal_verification) of [\ud83e\udd80 Rust](https://www.rust-lang.org/) code to make sure that a program has no bugs. This technique checks all possible execution paths using mathematical techniques. This is important for example to ensure the security of smart contracts written in Rust language.\\n\\nOur tool `coq-of-rust` works by translating Rust programs to the general proof system [\ud83d\udc13 Coq](https://coq.inria.fr/). Here we explain how we translate[ `match` patterns](https://doc.rust-lang.org/book/ch06-02-match.html) from Rust to Coq. The specificity of Rust patterns is to be able to match values either by value or reference.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nTo formally verify your Rust codebase and improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)! Formal verification is the only way to prevent all bugs by exploring all possible executions of your program.\\n\\n:::\\n\\n:::info Thanks\\n\\nThis work and the development of [coq-of-rust](https://github.com/formal-land/coq-of-rust) is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation, to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n![Rust rooster](2024-01-04/rust-rooster.png)\\n\\n## Rust example \ud83e\udd80\\n\\nTo illustrate the pattern matching in Rust, we will use the following example featuring a match by reference:\\n\\n```rust\\npub(crate) fn is_option_equal(\\n is_equal: fn(x: &A, y: &A) -> bool,\\n lhs: Option,\\n rhs: &A,\\n) -> bool {\\n match lhs {\\n None => false,\\n Some(ref value) => is_equal(value, rhs),\\n }\\n}\\n```\\n\\nWe take a function `is_equal` as a parameter, operating only on references to the type `A`. We apply it to compare two values `lhs` and `rhs`:\\n\\n- if `lhs` is `None`, we return `false`,\\n- if `lhs` is `Some`, we get its value by reference and apply `is_equal`.\\n\\nWhen we apply the pattern:\\n\\n```rust\\nSome(ref value) => ...\\n```\\n\\nwe do something interesting: we read the value of `lhs` to know if we are in a `Some` case but leave it in place and return `value` the reference to its content.\\n\\nTo simulate this behavior in Coq, we need to match in two steps:\\n\\n1. match the value of `lhs` to know if we are in a `Some` case or not,\\n2. if we are in a `Some` case, create the reference to the content of a `Some` case based on the reference to `lhs`.\\n\\n## Coq translation \ud83d\udc13\\n\\nThe Coq translation that our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) generates is the following:\\n\\n```coq\\nDefinition is_option_equal\\n {A : Set}\\n (is_equal : (ref A) -> (ref A) -> M bool.t)\\n (lhs : core.option.Option.t A)\\n (rhs : ref A)\\n : M bool.t :=\\n let* is_equal := M.alloc is_equal in\\n let* lhs := M.alloc lhs in\\n let* rhs := M.alloc rhs in\\n let* \u03b10 : M.Val bool.t :=\\n match_operator\\n lhs\\n [\\n fun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.None => M.alloc false\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t);\\n fun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.Some _ =>\\n let \u03b30_0 := \u03b3.[\\"Some.0\\"] in\\n let* value := M.alloc (borrow \u03b30_0) in\\n let* \u03b10 : (ref A) -> (ref A) -> M bool.t := M.read is_equal in\\n let* \u03b11 : ref A := M.read value in\\n let* \u03b12 : ref A := M.read rhs in\\n let* \u03b13 : bool.t := M.call (\u03b10 \u03b11 \u03b12) in\\n M.alloc \u03b13\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t)\\n ] in\\n M.read \u03b10.\\n```\\n\\nWe run the `match_operator` on `lhs` and the two branches of the `match`. This operator is of type:\\n\\n```coq\\nDefinition match_operator {A B : Set}\\n (scrutinee : A)\\n (arms : list (A -> M B)) :\\n M B :=\\n ...\\n```\\n\\nIt takes a `scrutinee` value to match as a parameter, and runs a sequence of functions `arms` on it. Each function `arms` takes the value of the `scrutinee` and returns a monadic value `M B`. This monadic value can either be a success value if the pattern matches, or a special failure value if the pattern does not match. We evaluate the branches until one succeeds.\\n\\n### `None` branch\\n\\nThe `None` branch is the simplest one. We read the value at the address given by `lhs` (we represent each Rust variable by its address) and match it with the `None` constructor:\\n\\n```coq\\nfun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.None => M.alloc false\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t)\\n```\\n\\nIf it matches, we return `false`. If it does not, we return the special value `M.break_match` to indicate that the pattern does not match.\\n\\n### `Some` branch\\n\\nIn the `Some` branch, we first also read the value at the address given by `lhs` and match it with the `Some` constructor:\\n\\n```coq\\nfun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.Some _ =>\\n let \u03b30_0 := \u03b3.[\\"Some.0\\"] in\\n let* value := M.alloc (borrow \u03b30_0) in\\n let* \u03b10 : (ref A) -> (ref A) -> M bool.t := M.read is_equal in\\n let* \u03b11 : ref A := M.read value in\\n let* \u03b12 : ref A := M.read rhs in\\n let* \u03b13 : bool.t := M.call (\u03b10 \u03b11 \u03b12) in\\n M.alloc \u03b13\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t)\\n```\\n\\nIf we are in that case, we create the value:\\n\\n```coq\\nlet \u03b30_0 := \u03b3.[\\"Some.0\\"] in\\n```\\n\\nwith the address of the first field of the `Some` constructor, relative to the address of `lhs` given in `\u03b3`. We define the operator `.[\\"Some.0\\"]` when we define the option type and generate such definitions for all user-defined enum types.\\n\\nWe then encapsulate the address `\u03b30_0` in a proper Rust reference:\\n\\n```coq\\nlet* value := M.alloc (borrow \u03b30_0) in\\n```\\n\\nof type `ref A` in the original Rust code. Finally, we call the function `is_equal` on the two references `value` and `rhs`, with some boilerplate code to read and allocate the variables.\\n\\n## General translation\\n\\nWe generalize this translation to all patterns by:\\n\\n- flattening all the or patterns `|` so that only patterns with a single choice remain,\\n- evaluating each match branch in order with the `match_operator` operator,\\n- in each branch, evaluating the inner patterns in order. This evaluation might fail at any point if the pattern does not match. In this case, we return the special value `M.break_match` and continue with the next branch.\\n\\nAt least one branch should succeed as the Rust compiler checks that all cases are covered. We still have a special value `M.impossible` in Coq for the case where no patterns match and satisfy the type checker.\\n\\nWe distinguish and handle the following kind of patterns (and all their combinations):\\n\\n- wild patterns `_`,\\n- binding patterns `(ref) name` or `(ref) name as pattern` (the `ref` keyword is optional),\\n- struct patterns `Name { field1: pattern1, ... }` or `Name(pattern1, ...)`\\n- tuple patterns `(pattern1, ...)`,\\n- literal patterns `12`, `true`, ...,\\n- slice patterns `[first, second, tail @ ..]`,\\n- dereference patterns `&pattern`.\\n\\nThis was enough to cover all of our examples. The Rust compiler can also automatically add some `ref` patterns when matching on references. We do not need to handle this case as this is automatically done by the Rust compiler during its compilation to the intermediate [THIR](https://rustc-dev-guide.rust-lang.org/thir.html) representation, and e directly read the THIR code.\\n\\n## Conclusion\\n\\nIn this blog post, we have presented how we translate Rust patterns to the proof system Coq. The difficult part is handling the `ref` patterns, which we do by matching in two steps: matching on the values and then computing the addresses of the sub-fields.\\n\\nIf you have Rust smart contracts or programs to verify, feel free to email us at [contact@formal.land](mailto:contact@formal.land). We will be happy to help!"},{"id":"/2023/12/13/rust-verify-erc-20-smart-contract","metadata":{"permalink":"/blog/2023/12/13/rust-verify-erc-20-smart-contract","source":"@site/blog/2023-12-13-rust-verify-erc-20-smart-contract.md","title":"Verifying an ERC-20 smart contract in Rust","description":"Our tool coq-of-rust enables formal verification of \ud83e\udd80 Rust code to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system \ud83d\udc13 Coq.","date":"2023-12-13T00:00:00.000Z","formattedDate":"December 13, 2023","tags":[{"label":"Aleph-Zero","permalink":"/blog/tags/aleph-zero"},{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"ERC-20","permalink":"/blog/tags/erc-20"},{"label":"ink!","permalink":"/blog/tags/ink"}],"readingTime":20.115,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Verifying an ERC-20 smart contract in Rust","tags":["Aleph-Zero","coq-of-rust","Rust","Coq","ERC-20","ink!"],"authors":[]},"unlisted":false,"prevItem":{"title":"Translating Rust match patterns to Coq with coq-of-rust","permalink":"/blog/2024/01/04/rust-translating-match"},"nextItem":{"title":"Translation of function bodies from Rust to Coq","permalink":"/blog/2023/11/26/rust-function-body"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) enables formal verification of [\ud83e\udd80 Rust](https://www.rust-lang.org/) code to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system [\ud83d\udc13 Coq](https://coq.inria.fr/).\\n\\nHere, we show how we formally verify an [ERC-20 smart contract](https://github.com/paritytech/ink/blob/master/integration-tests/erc20/lib.rs) written in Rust for the [Aleph Zero](https://alephzero.org/) blockchain. [ERC-20](https://en.wikipedia.org/wiki/Ethereum#ERC20) smart contracts are used to create new kinds of tokens in an existing blockchain. Examples are stablecoins such as the [\ud83d\udcb2USDT](https://tether.to/).\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nTo formally verify your Rust codebase and improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)! Formal verification is the only way to prevent all bugs by exploring all possible executions of your program.\\n\\n:::\\n\\n:::info Thanks\\n\\nThis work and the development of [coq-of-rust](https://github.com/formal-land/coq-of-rust) is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation, to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n![Rooster verifying](2023-12-13/rooster-verifying.png)\\n\\n## Smart contract code \ud83e\udd80\\n\\nHere is the Rust code of the smart contract that we want to verify:\\n\\n```rust\\n#[ink::contract]\\nmod erc20 {\\n use ink::storage::Mapping;\\n\\n #[ink(storage)]\\n #[derive(Default)]\\n pub struct Erc20 {\\n total_supply: Balance,\\n balances: Mapping,\\n allowances: Mapping<(AccountId, AccountId), Balance>,\\n }\\n\\n #[ink(event)]\\n pub struct Transfer {\\n // ...\\n }\\n\\n #[ink(event)]\\n pub struct Approval {\\n // ...\\n }\\n\\n #[derive(Debug, PartialEq, Eq)]\\n #[ink::scale_derive(Encode, Decode, TypeInfo)]\\n pub enum Error {\\n // ...\\n }\\n\\n pub type Result = core::result::Result;\\n\\n impl Erc20 {\\n #[ink(constructor)]\\n pub fn new(total_supply: Balance) -> Self {\\n let mut balances = Mapping::default();\\n let caller = Self::env().caller();\\n balances.insert(caller, &total_supply);\\n Self::env().emit_event(Transfer {\\n from: None,\\n to: Some(caller),\\n value: total_supply,\\n });\\n Self {\\n total_supply,\\n balances,\\n allowances: Default::default(),\\n }\\n }\\n\\n #[ink(message)]\\n pub fn total_supply(&self) -> Balance {\\n self.total_supply\\n }\\n\\n #[ink(message)]\\n pub fn balance_of(&self, owner: AccountId) -> Balance {\\n self.balance_of_impl(&owner)\\n }\\n\\n #[inline]\\n fn balance_of_impl(&self, owner: &AccountId) -> Balance {\\n self.balances.get(owner).unwrap_or_default()\\n }\\n\\n #[ink(message)]\\n pub fn allowance(&self, owner: AccountId, spender: AccountId) -> Balance {\\n self.allowance_impl(&owner, &spender)\\n }\\n\\n #[inline]\\n fn allowance_impl(&self, owner: &AccountId, spender: &AccountId) -> Balance {\\n self.allowances.get((owner, spender)).unwrap_or_default()\\n }\\n\\n #[ink(message)]\\n pub fn transfer(&mut self, to: AccountId, value: Balance) -> Result<()> {\\n let from = self.env().caller();\\n self.transfer_from_to(&from, &to, value)\\n }\\n\\n #[ink(message)]\\n pub fn approve(&mut self, spender: AccountId, value: Balance) -> Result<()> {\\n let owner = self.env().caller();\\n self.allowances.insert((&owner, &spender), &value);\\n self.env().emit_event(Approval {\\n owner,\\n spender,\\n value,\\n });\\n Ok(())\\n }\\n\\n #[ink(message)]\\n pub fn transfer_from(\\n &mut self,\\n from: AccountId,\\n to: AccountId,\\n value: Balance,\\n ) -> Result<()> {\\n let caller = self.env().caller();\\n let allowance = self.allowance_impl(&from, &caller);\\n if allowance < value {\\n return Err(Error::InsufficientAllowance)\\n }\\n self.transfer_from_to(&from, &to, value)?;\\n // We checked that allowance >= value\\n #[allow(clippy::arithmetic_side_effects)]\\n self.allowances\\n .insert((&from, &caller), &(allowance - value));\\n Ok(())\\n }\\n\\n fn transfer_from_to(\\n &mut self,\\n from: &AccountId,\\n to: &AccountId,\\n value: Balance,\\n ) -> Result<()> {\\n let from_balance = self.balance_of_impl(from);\\n if from_balance < value {\\n return Err(Error::InsufficientBalance)\\n }\\n // We checked that from_balance >= value\\n #[allow(clippy::arithmetic_side_effects)]\\n self.balances.insert(from, &(from_balance - value));\\n let to_balance = self.balance_of_impl(to);\\n self.balances\\n .insert(to, &(to_balance.checked_add(value).unwrap()));\\n self.env().emit_event(Transfer {\\n from: Some(*from),\\n to: Some(*to),\\n value,\\n });\\n Ok(())\\n }\\n }\\n}\\n```\\n\\nThis whole code is rather short and contains no loops, which will simplify our verification process. It uses a lot of macros, such as `#[ink(message)]`, that are specific to the [ink!](https://use.ink/) language for smart contracts, built on top of Rust. To verify this smart contract, we removed all the macros and added a mock of the dependencies, such as `ink::storage::Mapping` to get a map data structure.\\n\\n## The Coq translation \ud83d\udc13\\n\\nBy running our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) we automatically obtain the corresponding Coq code for the contract [erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/erc20.v). Here is an extract for the `transfer` function:\\n\\n```coq\\n(*\\n fn transfer(&mut self, to: AccountId, value: Balance) -> Result<()> {\\n let from = self.env().caller();\\n self.transfer_from_to(&from, &to, value)\\n }\\n*)\\nDefinition transfer\\n (self : mut_ref ltac:(Self))\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance))\\n : M ltac:(erc20.Result unit) :=\\n let* self : M.Val (mut_ref ltac:(Self)) := M.alloc self in\\n let* to : M.Val erc20.AccountId.t := M.alloc to in\\n let* value : M.Val ltac:(erc20.Balance) := M.alloc value in\\n let* from : M.Val erc20.AccountId.t :=\\n let* \u03b10 : mut_ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : erc20.Env.t :=\\n M.call (erc20.Erc20.t::[\\"env\\"] (borrow (deref \u03b10))) in\\n let* \u03b12 : M.Val erc20.Env.t := M.alloc \u03b11 in\\n let* \u03b13 : erc20.AccountId.t :=\\n M.call (erc20.Env.t::[\\"caller\\"] (borrow \u03b12)) in\\n M.alloc \u03b13 in\\n let* \u03b10 : mut_ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : u128.t := M.read value in\\n let* \u03b12 : core.result.Result.t unit erc20.Error.t :=\\n M.call\\n (erc20.Erc20.t::[\\"transfer_from_to\\"] \u03b10 (borrow from) (borrow to) \u03b11) in\\n let* \u03b10 : M.Val (core.result.Result.t unit erc20.Error.t) := M.alloc \u03b12 in\\n M.read \u03b10.\\n```\\n\\nMore details of the translation are given in previous blog posts, but basically:\\n\\n- we make explicit all memory and implicit operations (like borrowing and dereferencing),\\n- we apply a monadic translation to chain the primitive operations with `let*`.\\n\\n## Proof strategy\\n\\n![Proof strategy](2023-12-13/proof-strategy.png)\\n\\nWe verify the code in two steps:\\n\\n1. Show that a simpler, purely functional Coq code can simulate all the smart contract code.\\n2. Show that the simulation is correct.\\n\\nThat way, we can eliminate all the memory-related operations by showing the equivalence with a simulation. Then, we can focus on the functional code, which is more straightforward to reason about. We can cite another project, [Aeneas](https://github.com/AeneasVerif/aeneas), which proposes to do the first step (removing memory operations) automatically.\\n\\n## Simulations\\n\\n### Simulation code\\n\\nWe will work on the example of the `transfer` function. We define the simulations in [Simulations/erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/Simulations/erc20.v). For the `transfer` function this is:\\n\\n```coq\\nDefinition transfer\\n (env : erc20.Env.t)\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n MS? State.t ltac:(erc20.Result unit) :=\\n transfer_from_to (Env.caller env) to value.\\n```\\n\\nThe function `transfer` is a wrapper around `transfer_from_to`, using the smart contract caller as the `from` account. The monad `MS?` combines the state and error effect. The state is given by the `State.t` type:\\n\\n```coq\\nModule State.\\n Definition t : Set := erc20.Erc20.t * list erc20.Event.t.\\nEnd State.\\n```\\n\\nIt combines the state of the contract (type `Self` in the Rust code) and a list of events to represent the logs. The errors of the monad include panic errors, as well as control flow primitives such as `return` or `break` that we implement with exceptions.\\n\\n### Equivalence statement\\n\\nWe write all our proofs in [Proofs/erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/Proofs/erc20.v). The lemma stating that the simulation is equivalent to the original code is:\\n\\n```coq\\nLemma run_transfer\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance))\\n (H_storage : Erc20.Valid.t storage)\\n (H_value : Integer.Valid.t value) :\\n let state := State.of_storage storage in\\n let self := Ref.mut_ref Address.storage in\\n let simulation :=\\n lift_simulation\\n (Simulations.erc20.transfer env to value) storage in\\n {{ Environment.of_env env, state |\\n erc20.Impl_erc20_Erc20_t_2.transfer self to value \u21d3\\n simulation.(Output.result)\\n | simulation.(Output.state) }}.\\n```\\n\\nThe main predicate is:\\n\\n```coq\\n{{ env, state | translated_code \u21d3 result | final_state }}.\\n```\\n\\nThis predicate defines our semantics, explaining how to evaluate a translated Rust code in an environment `env` and a state `state`, to obtain a result `result` and a final state `final_state`. We use an environment in addition to a state to initialize various globals and other information related to the execution context. For example, here, we use the environment to store the `caller` of the contract and the pointer to the list of logs.\\n\\n### Semantics\\n\\nWe define our monad for the translated code `M A` in a style by continuation:\\n\\n```coq\\nInductive t (A : Set) : Set :=\\n| Pure : A -> t A\\n| CallPrimitive {B : Set} : Primitive.t B -> (B -> t A) -> t A\\n| Cast {B1 B2 : Set} : B1 -> (B2 -> t A) -> t A\\n| Impossible : t A.\\nArguments Pure {_}.\\nArguments CallPrimitive {_ _}.\\nArguments Cast {_ _ _}.\\nArguments Impossible {_}.\\n```\\n\\nFor now, we use the primitives to access the memory and the environment:\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set -> Set :=\\n | StateAlloc {A : Set} : A -> t (Ref.t A)\\n | StateRead {Address A : Set} : Address -> t A\\n | StateWrite {Address A : Set} : Address -> A -> t unit\\n | EnvRead {A : Set} : t A.\\nEnd Primitive.\\n```\\n\\nFor each of our monad constructs, we add a case to our evaluation predicate that we will describe:\\n\\n- `Pure` The result is the value itself, and the state is unchanged:\\n ```coq\\n | Pure :\\n {{ env, state\' | LowM.Pure result \u21d3 result | state\' }}\\n ```\\n- `Cast` The evaluation is only possible when `B1` and `B2` are the same type `B`:\\n ```coq\\n | Cast {B : Set} (state : State) (v : B) (k : B -> LowM A) :\\n {{ env, state | k v \u21d3 result | state\' }} ->\\n {{ env, state | LowM.Cast v k \u21d3 result | state\' }}\\n ```\\n In this case, we return the result of the continuation `k` of the cast. We do not change the state in the cast.\\n- We read the state using the primitive `State.read`, checking that the `address` is indeed allocated (it returns `None` otherwise). Note that the type of `v` depends on its address. We directly allocate values with their original type, to avoid serializations/deserializations to represent the state.\\n ```coq\\n | CallPrimitiveStateRead\\n (address : Address) (v : State.get_Set address)\\n (state : State)\\n (k : State.get_Set address -> LowM A) :\\n State.read address state = Some v ->\\n {{ env, state | k v \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateRead address) k \u21d3 result\\n | state\' }}\\n ```\\n- Similarly, we write into the state with `State.alloc_write`, that only succeeds for allocated addresses:\\n ```coq\\n | CallPrimitiveStateWrite\\n (address : Address) (v : State.get_Set address)\\n (state state_inter : State)\\n (k : unit -> LowM A) :\\n State.alloc_write address state v = Some state_inter ->\\n {{ env, state_inter | k tt \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateWrite address v) k \u21d3 result\\n | state\' }}\\n ```\\n- To allocate a new value in memory, we have to make a choice depending on whether we want this value to be writable or not. For immutable values, we do not create a new address and instead say that the address is the value itself:\\n ```coq\\n | CallPrimitiveStateAllocNone {B : Set}\\n (state : State) (v : B)\\n (k : Ref B -> LowM A) :\\n {{ env, state | k (Ref.Imm v) \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateAlloc v) k \u21d3 result\\n | state\' }}\\n ```\\n If we later attempt to update this value, it will not be possible to define a semantics and we will be stuck. It is up to the user to correctly anticipate if a value will be updated or not to define the semantics. For values that might be updated, we use:\\n ```coq\\n | CallPrimitiveStateAllocSome\\n (address : Address) (v : State.get_Set address)\\n (state : State)\\n (k : Ref (State.get_Set address) -> LowM A) :\\n let r :=\\n Ref.MutRef (A := State.get_Set address) (B := State.get_Set address)\\n address (fun full_v => full_v) (fun v _full_v => v) in\\n State.read address state = None ->\\n State.alloc_write address state v = Some state\' ->\\n {{ env, state | k r \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateAlloc v) k \u21d3 result\\n | state\' }}\\n ```\\n We need to provide an address not already allocated: `State.read` should return `None`. At this point, we can make any choice of unallocated address in order to simplify the proofs later.\\n- Finally, we read the whole environment with:\\n ```coq\\n | CallPrimitiveEnvRead\\n (state : State) (k : Env -> LowM A) :\\n {{ env, state | k env \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive Primitive.EnvRead k \u21d3 result\\n | state\' }}\\n ```\\n\\n### Semantics remarks\\n\\nWe can make a few remarks about our semantics:\\n\\n- There are no cases for `M.Impossible` as this primitive corresponds to impossible branches in the code.\\n- The semantics is not computable, in the sense that we cannot define a function `run` to evaluate a monadic program in a certain environment and state. Indeed, the user needs to make a choice during the allocation of new values, to know if we allocate the value as immutable or mutable, and with which address. The `M.Cast` operator is also not computable, as we cannot decide if two types are equal.\\n- We can choose the type that we use for the `State`, as well as the primitives `State.read` and `State.alloc_write`, as long as they verify well-formedness properties. For example, reading after a write at the same address should return the written value. One should choose a `State` that simplifies its proofs the most. To verify the smart contract, we have taken a record with two fields:\\n 1. the storage of the contract (the `Self` type in Rust),\\n 2. the list of events logged by the contract.\\n- Even if the monad is in continuation-passing style, we add a primitive `M.Call` corresponding to a bind, to explicit the points in the code where we call user-defined functions. This is not necessary but helpful to track things in the proofs. Otherwise, the monadic bind is defined as a fixpoint with:\\n ```coq\\n Fixpoint bind {A B : Set} (e1 : t A) (f : A -> t B) : t B :=\\n match e1 with\\n | Pure v => f v\\n | CallPrimitive primitive k =>\\n CallPrimitive primitive (fun v => bind (k v) f)\\n | Cast v k =>\\n Cast v (fun v\' => bind (k v\') f)\\n | Impossible => Impossible\\n end.\\n ```\\n- To handle the panic and `return`/`break` exceptions, we wrap our monad into an error monad:\\n ```coq\\n Definition M (A : Set) : Set :=\\n LowM (A + Exception.t).\\n ```\\n where `LowM` is the monad without errors as defined above and `Exception.t` is:\\n ```coq\\n Module Exception.\\n Inductive t : Set :=\\n (** exceptions for Rust\'s `return` *)\\n | Return {A : Set} : A -> t\\n (** exceptions for Rust\'s `continue` *)\\n | Continue : t\\n (** exceptions for Rust\'s `break` *)\\n | Break : t\\n | Panic : Coq.Strings.String.string -> t.\\n End Exception.\\n ```\\n\\n### Proof of equivalence\\n\\nTo prove that the equivalence between the simulation and the original code holds, we proceed by induction on the monadic code. This corresponds to symbolically evaluating the monadic code, in the proof mode of Coq, applying the primitives of the semantics predicate at each step. We use the following tactic to automate this work:\\n\\n```coq\\nrun_symbolic.\\n```\\n\\nWe manually handle the following cases:\\n\\n- branching (`if` or `match`),\\n- external function calls: generally, we apply an existing equivalence proof for a call to another function instead of doing the symbolic evaluation of the function,\\n- memory allocations: we need to choose the type of allocation (mutable or immutable) and the address of the allocation for mutable ones.\\n\\nHere is the proof for the `transfer` function:\\n\\n```coq\\nProof.\\n unfold erc20.Impl_erc20_Erc20_t_2.transfer,\\n Simulations.erc20.transfer,\\n lift_simulation.\\n Opaque erc20.transfer_from_to.\\n run_symbolic.\\n eapply Run.Call. {\\n apply run_env.\\n }\\n run_symbolic.\\n eapply Run.Call. {\\n apply Env.run_caller.\\n }\\n run_symbolic.\\n eapply Run.Call. {\\n now apply run_transfer_from_to.\\n }\\n unfold lift_simulation.\\n destruct erc20.transfer_from_to as [[] [?storage ?logs]]; run_symbolic.\\n Transparent erc20.transfer_from_to.\\nQed.\\n```\\n\\n## Proofs\\n\\n### Handling of integers\\n\\nWe distinguish the various types of integers used in Rust:\\n\\n- unsigned ones: `u8`, `u16`, `u32`, `u64`, `u128`, `usize`,\\n- signed ones: `i8`, `i16`, `i32`, `i64`, `i128`, `isize`.\\n\\nWe define a separate type for each of them, that is to say, a wrapper around the `Z` type of unbounded integers from Coq:\\n\\n```coq\\nModule u8.\\n Inductive t : Set := Make (z : Z) : t.\\nEnd u8.\\n```\\n\\nTo enforce the bounds, we define a validity predicate for each type:\\n\\n```coq\\nModule Valid.\\n Definition t {A : Set} `{Integer.C A} (v : A) : Prop :=\\n Integer.min <= Integer.to_Z v <= Integer.max.\\nEnd Valid.\\n```\\n\\nAll integer types are of the class `Integer.C` with a `min`, `max`, and `to_Z` functions. We do not embed this predicate with the integer type ([refinement type](https://en.wikipedia.org/wiki/Refinement_type)) to avoid mixing proofs and code. We pay a cost by having to handle the values and the validity proofs separately.\\n\\nDepending on the configuration mode of Rust, integer operations can overflow or panic. We have several implementations of the arithmetic operations, depending on the mode:\\n\\n```coq\\nModule BinOp.\\n (** Operators with panic, in the monad. *)\\n Module Panic.\\n Definition add {A : Set} `{Integer.C A} (v1 v2 : A) : M A :=\\n (* ... *)\\n\\n Definition sub (* ... *)\\n End Panic.\\n\\n (** Operators with overflow, outside of the monad as\\n there cannot be any errors. *)\\n Module Wrap.\\n Definition add {A : Set} `{Integer.C A} (v1 v2 : A) : A :=\\n (* ... *)\\n\\n Definition sub (* ... *)\\n End Wrap.\\nEnd BinOp.\\n```\\n\\nWe also have additional operators, useful for the definition of simulations:\\n\\n- optimistic operators, operating on `Z` without checking the bounds of the result (for cases where we can prove that the result is never out of bounds),\\n- operators returning in the option monad, to handle the case where the result is out of bounds.\\n\\nNote that the comparison operators (`=`, `<`, ...) never panic or overflow. In the context of these smart contracts, the arithmetic operators are panicking in case of overflow.\\n\\n### Definition of messages\\n\\nWe can call the smart contract with three read primitives (`total_supply`, `balance_of`, `allowance`) and three write primitives (`transfer`, `approve`, `transfer_from`). We define two message types to formalize these access points. This will later allow us to express properties over all possible read and write messages:\\n\\n```coq\\nModule ReadMessage.\\n (** The type parameter is the type of result of the call. *)\\n Inductive t : Set -> Set :=\\n | total_supply :\\n t ltac:(erc20.Balance)\\n | balance_of\\n (owner : erc20.AccountId.t) :\\n t ltac:(erc20.Balance)\\n | allowance\\n (owner : erc20.AccountId.t)\\n (spender : erc20.AccountId.t) :\\n t ltac:(erc20.Balance).\\nEnd ReadMessage.\\n\\nModule WriteMessage.\\n Inductive t : Set :=\\n | transfer\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n t\\n | approve\\n (spender : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n t\\n | transfer_from\\n (from : erc20.AccountId.t)\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n t.\\nEnd WriteMessage.\\n```\\n\\n### No panics on read messages\\n\\nWe show that for all possible read messages, the smart contract does not panic:\\n\\n```coq\\nLemma read_message_no_panic\\n (env : erc20.Env.t)\\n (message : ReadMessage.t ltac:(erc20.Balance))\\n (storage : erc20.Erc20.t) :\\n let state := State.of_storage storage in\\n exists result,\\n {{ Environment.of_env env, state |\\n ReadMessage.dispatch message \u21d3\\n (* [inl] means success (no panics) *)\\n inl result\\n | state }}.\\n```\\n\\nThis is done by symbolic evaluation of the simulations:\\n\\n```coq\\nProof.\\n destruct message; simpl.\\n { eexists.\\n apply run_total_supply.\\n }\\n { eexists.\\n apply run_balance_of.\\n }\\n { eexists.\\n apply run_allowance.\\n }\\nQed.\\n```\\n\\n### Invariants\\n\\nThe data structure of the storage of the smart contract is as follows:\\n\\n```rust\\npub struct Erc20 {\\n total_supply: Balance,\\n balances: Mapping,\\n allowances: Mapping<(AccountId, AccountId), Balance>,\\n}\\n```\\n\\nAn invariant is that the total supply is always equal to the sum of all the balances in the mapping `Mapping`. We define this invariant in Coq as:\\n\\n```coq\\nDefinition sum_of_money (storage : erc20.Erc20.t) : Z :=\\n Lib.Mapping.sum Integer.to_Z storage.(erc20.Erc20.balances).\\n\\nModule Valid.\\n Definition t (storage : erc20.Erc20.t) : Prop :=\\n Integer.to_Z storage.(erc20.Erc20.total_supply) =\\n sum_of_money storage.\\nEnd Valid.\\n```\\n\\nWe show that this invariant holds for any output of the write messages, given that it holds for the input storage:\\n\\n```coq\\nLemma write_dispatch_is_valid\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (write_message : WriteMessage.t)\\n (H_storage : Erc20.Valid.t storage)\\n (H_write_message : WriteMessage.Valid.t write_message) :\\n let state := State.of_storage storage in\\n let \'(result, (storage, _)) :=\\n WriteMessage.simulation_dispatch env write_message (storage, []) in\\n match result with\\n | inl _ => Erc20.Valid.t storage\\n | _ => True\\n end.\\n```\\n\\nWe assume that the initial storage is valid with the hypothesis:\\n\\n```coq\\n(H_storage : Erc20.Valid.t storage)\\n```\\n\\nWe show the property in the case without panics with:\\n\\n```coq\\nmatch result with\\n | inl _ => ...\\n```\\n\\nWhen the smart contract panics (integer overflow), the storage is discarded anyways, and it might actually by invalid. For example, in the `transfer_from_to` function we have:\\n\\n```rust\\nself.balances.insert(*from, from_balance - value);\\nlet to_balance = self.balance_of_impl(to);\\nself.balances.insert(*to, to_balance + value);\\n```\\n\\nSo if there is a panic during the addition `+`, like an overflow, the final storage can have the `from` account modified but not the `to` account. So here, the balance sum is no longer equal to the total supply.\\n\\n### Total supply is constant\\n\\nWe show that the total supply is also a constant, meaning that no calls to the smart contract can modify its value. The statement is the following:\\n\\n```coq\\nLemma write_dispatch_is_constant\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (write_message : WriteMessage.t) :\\n let state := State.of_storage storage in\\n let \'(result, (storage\', _)) :=\\n WriteMessage.simulation_dispatch env write_message (storage, []) in\\n match result with\\n | inl _ =>\\n storage.(erc20.Erc20.total_supply) =\\n storage\'.(erc20.Erc20.total_supply)\\n | _ => True\\n end.\\n```\\n\\nIt says that for any initial `storage` and `write_message` sent to the smart contract, if we return a result without panicking (`inl _`), then the total supply in the final storage `storage\'` is equal to the initial one. We verify this fact by symbolic evaluation of all the branches of the simulation. There are no difficulties in this proof as the code never modifies the `total_supply`.\\n\\n### Action from the logs\\n\\nWe infer the action of the smart contract on the storage from its logs. This characterizes exactly what we modifications we can deduce on the storage from the logs. We define an action as a function from the storage to a set of possible new storages, given the knowledge of the logs of the contract:\\n\\n```coq\\nModule Action.\\n Definition t : Type := erc20.Erc20.t -> erc20.Erc20.t -> Prop.\\nEnd Action.\\n```\\n\\nThe main statement is the following:\\n\\n```coq\\nLemma retrieve_action_from_logs\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (write_message : WriteMessage.t)\\n (events : list erc20.Event.t) :\\n match\\n WriteMessage.simulation_dispatch env write_message (storage, [])\\n with\\n | (inl (result.Result.Ok tt), (storage\', events)) =>\\n action_of_events events storage storage\'\\n | _ => True\\n end.\\n```\\n\\nThis relates the final storage `storage\'` to the initial storage `storage` using the logs `events` when there are no panics. We define the `action_of_events` predicate as the successive application of the `action_of_event` predicate, which is defined as:\\n\\n```coq\\nDefinition action_of_event (event : erc20.Event.t) : Action.t :=\\n fun storage storage\' =>\\n match event with\\n | erc20.Event.Transfer (erc20.Transfer.Build_t\\n (option.Option.Some from)\\n (option.Option.Some to)\\n value\\n ) =>\\n (* In case of transfer event, we do not know how the allowances are\\n updated. *)\\n exists allowances\',\\n storage\' =\\n storage <|\\n erc20.Erc20.balances := balances_of_transfer storage from to value\\n |> <|\\n erc20.Erc20.allowances := allowances\'\\n |>\\n | erc20.Event.Transfer (erc20.Transfer.Build_t _ _ _) => False\\n | erc20.Event.Approval (erc20.Approval.Build_t owner spender value) =>\\n storage\' =\\n storage <|\\n erc20.Erc20.allowances :=\\n Lib.Mapping.insert (owner, spender) value\\n storage.(erc20.Erc20.allowances)\\n |>\\n end.\\n```\\n\\nWhen the `event` in the logs is of kind `erc20.Event.Transfer`, the resulting storage has:\\n\\n- the `balances` updated according to the function `balances_of_transfer`;\\n- the `allowances` updated to an unknown value `allowances\'`.\\n\\nWhen the `event` in the logs is of kind `erc20.Event.Approval`, the resulting storage has:\\n\\n- the `allowances` updated calling `Lib.Mapping.insert` on `(owner, spender)`;\\n- the `balances` unchanged.\\n\\n### Approve only on caller\\n\\nWe added one last proof to say that when the `approve` function succeeds, it only modifies the allowance of the caller:\\n\\n```coq\\nLemma approve_only_changes_owner_allowance\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (spender : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n let \'(result, (storage\', _)) :=\\n Simulations.erc20.approve env spender value (storage, []) in\\n match result with\\n | inl (result.Result.Ok tt) =>\\n forall owner spender,\\n Integer.to_Z (Simulations.erc20.allowance storage\' owner spender) <>\\n Integer.to_Z (Simulations.erc20.allowance storage owner spender) ->\\n owner = Simulations.erc20.Env.caller env\\n | _ => True\\n end.\\n```\\n\\nIf an allowance changes after the call to `approve`, then the owner of the allowance is the caller of the smart contract. This is done by symbolic evaluation of the simulation.\\n\\n## Conclusion\\n\\nIn this example, we have shown how we formally verify the ERC-20 smart contract written in Rust for the [Aleph Zero](https://alephzero.org/) project. Formally verifying smart contracts is extremely important as they can hold a lot of money, and a single bug can prove fatal as recent attacks continue to show: [List of crypto hacks in 2023](https://www.ccn.com/education/crypto-hacks-2023-full-list-of-scams-and-exploits-as-millions-go-missing/).\\n\\nIf you have Rust smart contracts to verify, feel free to email us at [contact@formal.land](mailto:contact@formal.land). We will be happy to help!"},{"id":"/2023/11/26/rust-function-body","metadata":{"permalink":"/blog/2023/11/26/rust-function-body","source":"@site/blog/2023-11-26-rust-function-body.md","title":"Translation of function bodies from Rust to Coq","description":"Our tool coq-of-rust enables formal verification of \ud83e\udd80 Rust code, to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system \ud83d\udc13 Coq.","date":"2023-11-26T00:00:00.000Z","formattedDate":"November 26, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"}],"readingTime":4.975,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Translation of function bodies from Rust to Coq","tags":["coq-of-rust","Rust","Coq"],"authors":[]},"unlisted":false,"prevItem":{"title":"Verifying an ERC-20 smart contract in Rust","permalink":"/blog/2023/12/13/rust-verify-erc-20-smart-contract"},"nextItem":{"title":"Optimizing Rust translation to Coq with THIR and bundled traits","permalink":"/blog/2023/11/08/rust-thir-and-bundled-traits"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) enables formal verification of [\ud83e\udd80 Rust](https://www.rust-lang.org/) code, to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system [\ud83d\udc13 Coq](https://coq.inria.fr/).\\n\\nHere, we present how we translate function bodies from Rust to Coq in an example. We also show some of the optimizations we made to reduce the size of the translation.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nIf you need to formally verify your Rust codebase to improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)!\\n\\n:::\\n\\n![Rust and Coq](2023-11-26/rust_and_coq.png)\\n\\n## Translating a function body\\n\\nWe take the following Rust example as input:\\n\\n```rust\\n// fn balance_of_impl(&self, owner: &AccountId) -> Balance { ... }\\n\\nfn balance_of(&self, owner: AccountId) -> Balance {\\n self.balance_of_impl(&owner)\\n}\\n```\\n\\nHere is the corresponding Coq code that `coq-of-rust` generates _without optimizations_:\\n\\n```coq\\nDefinition balance_of\\n (self : ref ltac:(Self))\\n (owner : erc20.AccountId.t)\\n : M ltac:(erc20.Balance) :=\\n let* self : M.Val (ref ltac:(Self)) := M.alloc self in\\n let* owner : M.Val erc20.AccountId.t := M.alloc owner in\\n let* \u03b10 : ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : M.Val erc20.Erc20.t := deref \u03b10 in\\n let* \u03b12 : ref erc20.Erc20.t := borrow \u03b11 in\\n let* \u03b13 : M.Val (ref erc20.Erc20.t) := M.alloc \u03b12 in\\n let* \u03b14 : ref erc20.Erc20.t := M.read \u03b13 in\\n let* \u03b15 : ref erc20.AccountId.t := borrow owner in\\n let* \u03b16 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b15 in\\n let* \u03b17 : ref erc20.AccountId.t := M.read \u03b16 in\\n let* \u03b18 : M.Val erc20.AccountId.t := deref \u03b17 in\\n let* \u03b19 : ref erc20.AccountId.t := borrow \u03b18 in\\n let* \u03b110 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b19 in\\n let* \u03b111 : ref erc20.AccountId.t := M.read \u03b110 in\\n let* \u03b112 : u128.t := erc20.Erc20.t::[\\"balance_of_impl\\"] \u03b14 \u03b111 in\\n let* \u03b113 : M.Val u128.t := M.alloc \u03b112 in\\n M.read \u03b113.\\n```\\n\\nThis code is much more verbose than the original Rust code as we make all pointer manipulations explicit. We will see just after how to simplify it. We start with the function declaration:\\n\\n```coq\\nDefinition balance_of\\n (self : ref ltac:(Self))\\n (owner : erc20.AccountId.t)\\n : M ltac:(erc20.Balance) :=\\n```\\n\\nthat repeats the parameters in the Rust source. Note that the final result is wrapped into the monad type `M`. This is a monad representing all the side-effects used in Rust programs (state, panic, non-termination, ...). Then, we allocate all the function parameters:\\n\\n```coq\\n let* self : M.Val (ref ltac:(Self)) := M.alloc self in\\n let* owner : M.Val erc20.AccountId.t := M.alloc owner in\\n```\\n\\nThis ensures that both `self` and `owner` have an address in memory, in case we borrow them later. This allocation is also fresh, so we cannot access the address of the values from the caller by mistake. We use the monadic let `let*` as allocations can modify the memory state.\\n\\nThen we start by the body of the function itself. We do all the necessary pointer manipulations to compute the parameters `self` and `&owner` of the function `balance_of_impl`. These representations are directly taken from the abstract syntax tree of the Rust compiler (using the [THIR](https://rustc-dev-guide.rust-lang.org/thir.html) version).\\n\\nFor example, for the first parameter `self`, named `\u03b14` in this translation, we do:\\n\\n```coq\\n let* \u03b10 : ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : M.Val erc20.Erc20.t := deref \u03b10 in\\n let* \u03b12 : ref erc20.Erc20.t := borrow \u03b11 in\\n let* \u03b13 : M.Val (ref erc20.Erc20.t) := M.alloc \u03b12 in\\n let* \u03b14 : ref erc20.Erc20.t := M.read \u03b13 in\\n```\\n\\nWe combine the operators:\\n\\n- `M.read`: to get a value of type `A` from a value with an address `M.Val`,\\n- `deref`: to get the value with an address `M.Val A` pointed by a reference `ref A`,\\n- `borrow`: to get the reference `ref A` to a value with an address `M.Val A`,\\n- `M.alloc`: to allocate a new value `A` in memory, returning a value with address `M.Val A`.\\n\\nWe do the same to compute the second parameter `&owner` of `balance_of_impl` with:\\n\\n```coq\\n let* \u03b15 : ref erc20.AccountId.t := borrow owner in\\n let* \u03b16 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b15 in\\n let* \u03b17 : ref erc20.AccountId.t := M.read \u03b16 in\\n let* \u03b18 : M.Val erc20.AccountId.t := deref \u03b17 in\\n let* \u03b19 : ref erc20.AccountId.t := borrow \u03b18 in\\n let* \u03b110 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b19 in\\n let* \u03b111 : ref erc20.AccountId.t := M.read \u03b110 in\\n```\\n\\nFinally, we call the `balance_of_impl` function and return the result:\\n\\n```coq\\n let* \u03b112 : u128.t := erc20.Erc20.t::[\\"balance_of_impl\\"] \u03b14 \u03b111 in\\n let* \u03b113 : M.Val u128.t := M.alloc \u03b112 in\\n M.read \u03b113.\\n```\\n\\nWe do not keep the address of the result, as it will be allocated again by the caller function.\\n\\n## Optimizations\\n\\nSome operations can always be removed, namely:\\n\\n- `M.read (M.alloc v) ==> v`: we do not need to allocate and give an address to a value if it will be immediately read,\\n- `deref (borrow v) ==> v` and `borrow (deref v) ==> v`: the borrowing and dereferencing operators are doing the opposite, so they cancel each other. We need to be careful of the mutability status of the borrowing and dereferencing.\\n\\nApplying these simple simplification rules, we get the following slimed-down translation:\\n\\n```coq\\nDefinition balance_of\\n (self : ref ltac:(Self))\\n (owner : erc20.AccountId.t)\\n : M ltac:(erc20.Balance) :=\\n let* self : M.Val (ref ltac:(Self)) := M.alloc self in\\n let* owner : M.Val erc20.AccountId.t := M.alloc owner in\\n let* \u03b10 : ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : ref erc20.AccountId.t := borrow owner in\\n erc20.Erc20.t::[\\"balance_of_impl\\"] \u03b10 \u03b11.\\n```\\n\\nThis is much shorter and easier to verify!\\n\\n## Conclusion\\n\\nWe have illustrated in an example how we translate a simple function from Rust to Coq. In this example, we saw how the pointer operations are made explicit in the abstract syntax tree of Rust, and how we simplify them for the frequent cases.\\n\\nIf you have any comments or suggestions, feel free to email us at [contact@formal.land](mailto:contact@formal.land). In future posts, we will go into more detail about the verification process itself."},{"id":"/2023/11/08/rust-thir-and-bundled-traits","metadata":{"permalink":"/blog/2023/11/08/rust-thir-and-bundled-traits","source":"@site/blog/2023-11-08-rust-thir-and-bundled-traits.md","title":"Optimizing Rust translation to Coq with THIR and bundled traits","description":"We continued our work on coq-of-rust, a tool to formally verify Rust programs using the proof system Coq \ud83d\udc13. This tool translates Rust programs to an equivalent Coq program, which can then be verified using Coq\'s proof assistant. It opens the door to building mathematically proven bug-free Rust programs.","date":"2023-11-08T00:00:00.000Z","formattedDate":"November 8, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"trait","permalink":"/blog/tags/trait"},{"label":"THIR","permalink":"/blog/tags/thir"},{"label":"HIR","permalink":"/blog/tags/hir"}],"readingTime":5.22,"hasTruncateMarker":true,"authors":[{"name":"Guillaume Claret"}],"frontMatter":{"title":"Optimizing Rust translation to Coq with THIR and bundled traits","tags":["coq-of-rust","Rust","Coq","trait","THIR","HIR"],"author":"Guillaume Claret"},"unlisted":false,"prevItem":{"title":"Translation of function bodies from Rust to Coq","permalink":"/blog/2023/11/26/rust-function-body"},"nextItem":{"title":"Trait representation in Coq","permalink":"/blog/2023/08/25/trait-representation-in-coq"}},"content":"We continued our work on [coq-of-rust](https://github.com/formal-land/coq-of-rust), a tool to formally verify [Rust](https://www.rust-lang.org/) programs using the proof system [Coq \ud83d\udc13](https://coq.inria.fr/). This tool translates Rust programs to an equivalent Coq program, which can then be verified using Coq\'s proof assistant. It opens the door to building mathematically proven bug-free Rust programs.\\n\\nWe present two main improvements we made to `coq-of-rust`:\\n\\n- Using the THIR intermediate language of Rust to have more information during the translation to Coq.\\n- Bundling the type-classes representing the traits of Rust to have faster type-checking in Coq.\\n\\n\x3c!-- truncate --\x3e\\n\\n![Rust and Coq](2023-11-08/rust_and_coq.png)\\n\\n## THIR intermediate language\\n\\nTo translate Rust programs to Coq, we plug into the compiler of Rust, which operates on a series of intermediate languages:\\n\\n- source code (`.rs` files);\\n- abstract syntax tree (AST): immediately after parsing;\\n- [High-Level Intermediate Representation](https://rustc-dev-guide.rust-lang.org/hir.html) (HIR): after macro expansion, with name resolution and close to the AST;\\n- [Typed High-Level Intermediate Representation](https://rustc-dev-guide.rust-lang.org/thir.html) (THIR): after the type-checking;\\n- [Mid-level Intermediate Representation](https://rustc-dev-guide.rust-lang.org/mir/index.html) (MIR): low-level representation based on a [control-flow graph](https://en.wikipedia.org/wiki/Control-flow_graph), inlining traits and polymorphic functions, and with [borrow checking](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html);\\n- machine code (assembly, LLVM IR, ...).\\n\\nWe were previously using the HIR language to start our translation to Coq, because it is not too low-level and close to what the user has originally in the `.rs` file. This helps relate the generated Coq code to the original Rust code.\\n\\nHowever, at the level of HIR, there is still a lot of implicit information. For example, Rust has [automatic dereferencing rules](https://users.rust-lang.org/t/automatic-dereferencing/53828) that are not yet explicit in HIR. In order not to make any mistakes during our translation to Coq, we prefer to use the next representation, THIR, that makes explicit such rules.\\n\\nIn addition, the THIR representation shows when a method call is from a trait (and which trait) or from a standalone `impl` block. Given that we still have trouble translating the traits with [type-classes](https://coq.inria.fr/doc/V8.18.0/refman/addendum/type-classes.html) that are inferrable by Coq, this helps a lot.\\n\\nA downside of the THIR representation is that it is much more verbose. For example, here is a formatting function generated from HIR:\\n\\n```coq\\nDefinition fmt\\n `{\u210b : State.Trait}\\n (self : ref Self)\\n (f : mut_ref core.fmt.Formatter)\\n : M core.fmt.Result :=\\n let* \u03b10 := format_argument::[\\"new_display\\"] (addr_of self.[\\"radius\\"]) in\\n let* \u03b11 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"Circle of radius \\" ])\\n (addr_of [ \u03b10 ]) in\\n f.[\\"write_fmt\\"] \u03b11.\\n```\\n\\nThis is the kind of functions generated by the `#[derive(Debug)]` macro of Rust, to implement a formatting function on a type. Here is the version translated from THIR, with explicit borrowing and dereferencing:\\n\\n```coq\\nDefinition fmt\\n `{\u210b : State.Trait}\\n (self : ref Self)\\n (f : mut_ref core.fmt.Formatter)\\n : M ltac:(core.fmt.Result) :=\\n let* \u03b10 := deref f core.fmt.Formatter in\\n let* \u03b11 := borrow_mut \u03b10 core.fmt.Formatter in\\n let* \u03b12 := borrow [ mk_str \\"Circle of radius \\" ] (list (ref str)) in\\n let* \u03b13 := deref \u03b12 (list (ref str)) in\\n let* \u03b14 := borrow \u03b13 (list (ref str)) in\\n let* \u03b15 := pointer_coercion \\"Unsize\\" \u03b14 in\\n let* \u03b16 := deref self converting_to_string.Circle in\\n let* \u03b17 := \u03b16.[\\"radius\\"] in\\n let* \u03b18 := borrow \u03b17 i32 in\\n let* \u03b19 := deref \u03b18 i32 in\\n let* \u03b110 := borrow \u03b19 i32 in\\n let* \u03b111 := core.fmt.rt.Argument::[\\"new_display\\"] \u03b110 in\\n let* \u03b112 := borrow [ \u03b111 ] (list core.fmt.rt.Argument) in\\n let* \u03b113 := deref \u03b112 (list core.fmt.rt.Argument) in\\n let* \u03b114 := borrow \u03b113 (list core.fmt.rt.Argument) in\\n let* \u03b115 := pointer_coercion \\"Unsize\\" \u03b114 in\\n let* \u03b116 := core.fmt.Arguments::[\\"new_v1\\"] \u03b15 \u03b115 in\\n core.fmt.Formatter::[\\"write_fmt\\"] \u03b11 \u03b116.\\n```\\n\\nWe went from a function having two intermediate variables to seventeen intermediate variables. This code is much more verbose, but it is also more explicit. In particular, it details when the:\\n\\n- borrowing (going from a value of type `T` to `&T`), and the\\n- dereferencing (going from a value of type `&T` to `T`)\\n\\noccur. It also shows that the method `write_fmt` is a method from the implementation of the type `core.fmt.Formatter`, generating:\\n\\n```coq\\ncore.fmt.Formatter::[\\"write_fmt\\"] \u03b11 \u03b116\\n```\\n\\ninstead of:\\n\\n```coq\\nf.[\\"write_fmt\\"] \u03b11\\n```\\n\\n## Bundled traits\\n\\nSome Rust codebases can have a lot of traits. For example in [paritytech/ink/crates/env/src/types.rs](https://github.com/paritytech/ink/blob/ccb38d2c3ac27523fe3108f2bb7bffbbe908cdb7/crates/env/src/types.rs#L120) the trait `Environment` references more than forty other traits:\\n\\n```rust\\npub trait Environment: Clone {\\n const MAX_EVENT_TOPICS: usize;\\n\\n type AccountId: \'static\\n + scale::Codec\\n + CodecAsType\\n + Clone\\n + PartialEq\\n + ...;\\n\\n type Balance: \'static\\n + scale::Codec\\n + CodecAsType\\n + ...;\\n\\n ...\\n```\\n\\nWe first used an unbundled approach to represent this trait by a type-class in Coq, as it felt more natural:\\n\\n```coq\\nModule Environment.\\n Class Trait (Self : Set) `{Clone.Trait Self}\\n {AccountId : Set}\\n `{scale.Codec.Trait AccountId}\\n `{CodecAsType AccountId}\\n `{Clone AccountId}\\n `{PartialEq AccountId}\\n ...\\n```\\n\\nHowever, the backquote operator generated too many implicit arguments, and the type-checker of Coq was very slow. We then switched to a bundled approach, as advocated in this blog post: [Exponential blowup when using unbundled typeclasses to model algebraic hierarchies](https://www.ralfj.de/blog/2019/05/15/typeclasses-exponential-blowup.html). The Coq code for this trait now looks like this:\\n\\n```coq\\nModule Environment.\\n Class Trait `{\u210b : State.Trait} (Self : Set) : Type := {\\n \u210b_0 :: Clone.Trait Self;\\n MAX_EVENT_TOPICS : usize;\\n AccountId : Set;\\n \u2112_0 :: parity_scale_codec.codec.Codec.Trait AccountId;\\n \u2112_1 :: ink_env.types.CodecAsType.Trait AccountId;\\n \u2112_2 :: core.clone.Clone.Trait AccountId;\\n \u2112_3 ::\\n core.cmp.PartialEq.Trait AccountId\\n (Rhs := core.cmp.PartialEq.Default.Rhs AccountId);\\n ...;\\n Balance : Set;\\n \u2112_8 :: parity_scale_codec.codec.Codec.Trait Balance;\\n \u2112_9 :: ink_env.types.CodecAsType.Trait Balance;\\n ...;\\n\\n ...\\n```\\n\\nWe use the notation `::` for fields that are trait instances. With this approach, traits have types as parameters but no other traits.\\n\\nThe type-checking is now much faster, and in particular, we avoid some cases with exponential blowup or non-terminating type-checking. But this is not a perfect solution as we still have cases where the instance inference does not terminate or fails with hard-to-understand error messages.\\n\\n## Conclusion\\n\\nWe have illustrated here some improvements we recently made to our [coq-of-rust](https://github.com/formal-land/coq-of-rust) translator for two key areas:\\n\\n- the translation of traits;\\n- the translation of the implicit borrowing and dereferencing, that can occur every time we call a function.\\n\\nThese improvements will allow us to formally verify some more complex Rust codebases. In particular, we are applying `coq-of-rust` to verify smart contracts written for the [ink!](https://use.ink/) platform, that is a subset of Rust.\\n\\n:::tip Contact\\n\\nIf you have comments, similar experiences to share, or wish to formally verify your codebase to improve the security of your application, contact us at [contact@formal.land](mailto:contact@formal.land)!\\n\\n:::"},{"id":"/2023/08/25/trait-representation-in-coq","metadata":{"permalink":"/blog/2023/08/25/trait-representation-in-coq","source":"@site/blog/2023-08-25-trait-representation-in-coq.md","title":"Trait representation in Coq","description":"In our project coq-of-rust we translate programs written in Rust to equivalent programs in the language of the proof system Coq \ud83d\udc13, which will later allow us to formally verify them.","date":"2023-08-25T00:00:00.000Z","formattedDate":"August 25, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"trait","permalink":"/blog/tags/trait"}],"readingTime":7.58,"hasTruncateMarker":true,"authors":[{"name":"Bart\u0142omiej Kr\xf3likowski"}],"frontMatter":{"title":"Trait representation in Coq","tags":["coq-of-rust","Rust","Coq","trait"],"author":"Bart\u0142omiej Kr\xf3likowski"},"unlisted":false,"prevItem":{"title":"Optimizing Rust translation to Coq with THIR and bundled traits","permalink":"/blog/2023/11/08/rust-thir-and-bundled-traits"},"nextItem":{"title":"Monad for side effects in Rust","permalink":"/blog/2023/05/28/monad-for-side-effects-in-rust"}},"content":"In our project [coq-of-rust](https://github.com/formal-land/coq-of-rust) we translate programs written in [Rust](https://www.rust-lang.org/) to equivalent programs in the language of the proof system [Coq \ud83d\udc13](https://coq.inria.fr/), which will later allow us to formally verify them.\\nBoth Coq and Rust have many unique features, and there are many differences between them, so in the process of translation we need to treat the case of each language construction separately.\\nIn this post, we discuss how we translate the most complicated one: [traits](https://doc.rust-lang.org/book/ch10-02-traits.html).\\n\\n\x3c!-- truncate --\x3e\\n\\n## \ud83e\udd80 Traits in Rust\\n\\nTrait is the way to define a shared behaviour for a group of types in Rust.\\nTo define a trait we have to specify a list of signatures of the methods we want to be implemented for the types implementing our trait.\\nWe can also create a generic definition of a trait with the same syntax as in every Rust definition.\\nOptionally, we can add a default implementation to any method or extend the list with associated types.\\nTraits can also extend a behaviour of one or more other traits, in which case, to implement a trait for a type we would have to implement all its supertraits first.\\n\\nConsider the following example (adapted from the [Rust Book](https://doc.rust-lang.org/book/)):\\n\\n```rust\\nstruct Sheep {\\n naked: bool,\\n name: &\'static str,\\n}\\n\\ntrait Animal {\\n // Associated function signature; `Self` refers to the implementor type.\\n fn new(name: &\'static str) -> Self;\\n\\n // Method signatures; these will return a string.\\n fn name(&self) -> &\'static str;\\n fn noise(&self) -> &\'static str;\\n\\n // Traits can provide default method definitions.\\n fn talk(&self) {\\n println!(\\"{} says {}\\", self.name(), self.noise());\\n }\\n}\\n\\nimpl Sheep {\\n fn is_naked(&self) -> bool {\\n self.naked\\n }\\n}\\n\\n// Implement the `Animal` trait for `Sheep`.\\nimpl Animal for Sheep {\\n // `Self` is the implementor type: `Sheep`.\\n fn new(name: &\'static str) -> Sheep {\\n Sheep {\\n name: name,\\n naked: false,\\n }\\n }\\n\\n fn name(&self) -> &\'static str {\\n self.name\\n }\\n\\n fn noise(&self) -> &\'static str {\\n if self.is_naked() {\\n \\"baaaaah?\\"\\n } else {\\n \\"baaaaah!\\"\\n }\\n }\\n\\n // Default trait methods can be overridden.\\n fn talk(&self) {\\n // For example, we can add some quiet contemplation.\\n println!(\\"{} pauses briefly... {}\\", self.name, self.noise());\\n }\\n}\\n\\nimpl Sheep {\\n fn shear(&mut self) {\\n if self.is_naked() {\\n // Implementor methods can use the implementor\'s trait methods.\\n println!(\\"{} is already naked...\\", self.name());\\n } else {\\n println!(\\"{} gets a haircut!\\", self.name);\\n\\n self.naked = true;\\n }\\n }\\n}\\n\\nfn main() {\\n // Type annotation is necessary in this case.\\n let mut dolly = Animal::new(\\"Dolly\\"): Sheep;\\n\\n dolly.talk();\\n dolly.shear();\\n dolly.talk();\\n}\\n```\\n\\nWe have a type `Sheep`, a trait `Animal`, and an implementation of `Animal` for `Sheep`.\\nAs we can see in `main`, after a trait is implemented for a type, we can use the methods of the trait like normal methods of the type.\\n\\n## Our translation\\n\\nRust notion of trait is very similar to the concept of [typeclasses](https://en.wikipedia.org/wiki/Type_class) in [functional programming](https://en.wikipedia.org/wiki/Functional_programming).\\nTypeclasses are also present in Coq, so translation of this construction is quite straightforward.\\n\\nFor a given trait we create a typeclass with fields being just translated signatures of the methods of the trait.\\nTo allow for the use of method syntax, we also define instances of `Notation.Dot` for every method name of the trait.\\nWe also add a parameter of type `Set` for every type parameter of the trait and translate trait bounds of the types into equivalent typeclass parameters.\\n\\n## Translation of associated types\\n\\nAssociated types are a bit harder than methods to translate, because it is possible to use `::` notation to access them.\\nFor that purpose, we created another typeclass in `Notation` module:\\n\\n```coq\\nClass DoubleColonType {Kind : Type} (type : Kind) (name : string) : Type := {\\n double_colon_type : Set;\\n}.\\n```\\n\\nwith a notation:\\n\\n```coq\\nNotation \\"e1 ::type[ e2 ]\\" := (Notation.double_colon_type e1 e2)\\n (at level 0).\\n```\\n\\nFor every associated type, we create a parameter and a field of the typeclass resulting from the trait translation, and below, we create an instance of `Notation.DoubleColonType`.\\n\\n## The example in Coq\\n\\nHere is our Coq translation of the example code above:\\n\\n```coq\\n(* Generated by coq-of-rust *)\\nRequire Import CoqOfRust.CoqOfRust.\\n\\nModule Sheep.\\n Unset Primitive Projections.\\n Record t : Set := {\\n naked : bool;\\n name : ref str;\\n }.\\n Global Set Primitive Projections.\\n\\n Global Instance Get_naked : Notation.Dot \\"naked\\" := {\\n Notation.dot \'(Build_t x0 _) := x0;\\n }.\\n Global Instance Get_name : Notation.Dot \\"name\\" := {\\n Notation.dot \'(Build_t _ x1) := x1;\\n }.\\nEnd Sheep.\\nDefinition Sheep : Set := @Sheep.t.\\n\\nModule Animal.\\n Class Trait (Self : Set) : Set := {\\n new `{H : State.Trait} : (ref str) -> (M (H := H) Self);\\n name `{H : State.Trait} : (ref Self) -> (M (H := H) (ref str));\\n noise `{H : State.Trait} : (ref Self) -> (M (H := H) (ref str));\\n }.\\n\\n Global Instance Method_new `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"new\\" := {\\n Notation.dot := new;\\n }.\\n Global Instance Method_name `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"name\\" := {\\n Notation.dot := name;\\n }.\\n Global Instance Method_noise `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"noise\\" := {\\n Notation.dot := noise;\\n }.\\n Global Instance Method_talk `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"talk\\" := {\\n Notation.dot (self : ref Self):=\\n (let* _ :=\\n let* _ :=\\n let* \u03b10 := self.[\\"name\\"] in\\n let* \u03b11 := format_argument::[\\"new_display\\"] (addr_of \u03b10) in\\n let* \u03b12 := self.[\\"noise\\"] in\\n let* \u03b13 := format_argument::[\\"new_display\\"] (addr_of \u03b12) in\\n let* \u03b14 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" says \\"; \\"\\n\\" ])\\n (addr_of [ \u03b11; \u03b13 ]) in\\n std.io.stdio._print \u03b14 in\\n Pure tt in\\n Pure tt\\n : M (H := H) unit);\\n }.\\nEnd Animal.\\n\\nModule Impl_traits_Sheep.\\n Definition Self := traits.Sheep.\\n\\n Definition is_naked `{H : State.Trait} (self : ref Self) : M (H := H) bool :=\\n Pure self.[\\"naked\\"].\\n\\n Global Instance Method_is_naked `{H : State.Trait} :\\n Notation.Dot \\"is_naked\\" := {\\n Notation.dot := is_naked;\\n }.\\nEnd Impl_traits_Sheep.\\n\\nModule Impl_traits_Animal_for_traits_Sheep.\\n Definition Self := traits.Sheep.\\n\\n Definition new\\n `{H : State.Trait}\\n (name : ref str)\\n : M (H := H) traits.Sheep :=\\n Pure {| traits.Sheep.name := name; traits.Sheep.naked := false; |}.\\n\\n Global Instance AssociatedFunction_new `{H : State.Trait} :\\n Notation.DoubleColon Self \\"new\\" := {\\n Notation.double_colon := new;\\n }.\\n\\n Definition name `{H : State.Trait} (self : ref Self) : M (H := H) (ref str) :=\\n Pure self.[\\"name\\"].\\n\\n Global Instance Method_name `{H : State.Trait} : Notation.Dot \\"name\\" := {\\n Notation.dot := name;\\n }.\\n\\n Definition noise\\n `{H : State.Trait}\\n (self : ref Self)\\n : M (H := H) (ref str) :=\\n let* \u03b10 := self.[\\"is_naked\\"] in\\n if (\u03b10 : bool) then\\n Pure \\"baaaaah?\\"\\n else\\n Pure \\"baaaaah!\\".\\n\\n Global Instance Method_noise `{H : State.Trait} : Notation.Dot \\"noise\\" := {\\n Notation.dot := noise;\\n }.\\n\\n Definition talk `{H : State.Trait} (self : ref Self) : M (H := H) unit :=\\n let* _ :=\\n let* _ :=\\n let* \u03b10 := format_argument::[\\"new_display\\"] (addr_of self.[\\"name\\"]) in\\n let* \u03b11 := self.[\\"noise\\"] in\\n let* \u03b12 := format_argument::[\\"new_display\\"] (addr_of \u03b11) in\\n let* \u03b13 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" pauses briefly... \\"; \\"\\n\\" ])\\n (addr_of [ \u03b10; \u03b12 ]) in\\n std.io.stdio._print \u03b13 in\\n Pure tt in\\n Pure tt.\\n\\n Global Instance Method_talk `{H : State.Trait} : Notation.Dot \\"talk\\" := {\\n Notation.dot := talk;\\n }.\\n\\n Global Instance I : traits.Animal.Trait Self := {\\n traits.Animal.new `{H : State.Trait} := new;\\n traits.Animal.name `{H : State.Trait} := name;\\n traits.Animal.noise `{H : State.Trait} := noise;\\n }.\\nEnd Impl_traits_Animal_for_traits_Sheep.\\n\\nModule Impl_traits_Sheep_3.\\n Definition Self := traits.Sheep.\\n\\n Definition shear `{H : State.Trait} (self : mut_ref Self) : M (H := H) unit :=\\n let* \u03b10 := self.[\\"is_naked\\"] in\\n if (\u03b10 : bool) then\\n let* _ :=\\n let* _ :=\\n let* \u03b10 := self.[\\"name\\"] in\\n let* \u03b11 := format_argument::[\\"new_display\\"] (addr_of \u03b10) in\\n let* \u03b12 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" is already naked...\\n\\" ])\\n (addr_of [ \u03b11 ]) in\\n std.io.stdio._print \u03b12 in\\n Pure tt in\\n Pure tt\\n else\\n let* _ :=\\n let* _ :=\\n let* \u03b10 := format_argument::[\\"new_display\\"] (addr_of self.[\\"name\\"]) in\\n let* \u03b11 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" gets a haircut!\\n\\" ])\\n (addr_of [ \u03b10 ]) in\\n std.io.stdio._print \u03b11 in\\n Pure tt in\\n let* _ := assign self.[\\"naked\\"] true in\\n Pure tt.\\n\\n Global Instance Method_shear `{H : State.Trait} : Notation.Dot \\"shear\\" := {\\n Notation.dot := shear;\\n }.\\nEnd Impl_traits_Sheep_3.\\n\\n(* #[allow(dead_code)] - function was ignored by the compiler *)\\nDefinition main `{H : State.Trait} : M (H := H) unit :=\\n let* dolly :=\\n let* \u03b10 := traits.Animal.new \\"Dolly\\" in\\n Pure (\u03b10 : traits.Sheep) in\\n let* _ := dolly.[\\"talk\\"] in\\n let* _ := dolly.[\\"shear\\"] in\\n let* _ := dolly.[\\"talk\\"] in\\n Pure tt.\\n```\\n\\nAs we can see, the trait `Animal` is translated to a module `Animal`. Every time we want to refer to the trait we use the name `Trait` or `Animal.Trait`, depending on whether we do it inside or outside its module.\\n\\n## Conclusion\\n\\nTraits are similar enough to Coq classes to make the translation relatively intuitive.\\nThe only hard case is a translation of associated types, for which we need a special notation.\\n\\n:::tip Contact\\n\\nIf you have a Rust codebase that you wish to formally verify, or need advice in your work, contact us at [contact@formal.land](mailto:contact@formal.land). We will be happy to set up a call with you.\\n\\n:::"},{"id":"/2023/05/28/monad-for-side-effects-in-rust","metadata":{"permalink":"/blog/2023/05/28/monad-for-side-effects-in-rust","source":"@site/blog/2023-05-28-monad-for-side-effects-in-rust.md","title":"Monad for side effects in Rust","description":"To formally verify Rust programs, we are building coq-of-rust, a translator from Rust \ud83e\udd80 code to the proof system Coq \ud83d\udc13. We generate Coq code that is as similar as possible to the original Rust code, so that the user can easily understand the generated code and write proofs about it. In this blog post, we explain how we are representing side effects in Coq.","date":"2023-05-28T00:00:00.000Z","formattedDate":"May 28, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"monad","permalink":"/blog/tags/monad"},{"label":"side effects","permalink":"/blog/tags/side-effects"}],"readingTime":5.03,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Monad for side effects in Rust","tags":["coq-of-rust","Rust","Coq","monad","side effects"]},"unlisted":false,"prevItem":{"title":"Trait representation in Coq","permalink":"/blog/2023/08/25/trait-representation-in-coq"},"nextItem":{"title":"Representation of Rust methods in Coq","permalink":"/blog/2023/04/26/representation-of-rust-methods-in-coq"}},"content":"To formally verify Rust programs, we are building [coq-of-rust](https://github.com/formal-land/coq-of-rust), a translator from Rust \ud83e\udd80 code to the proof system [Coq \ud83d\udc13](https://coq.inria.fr/). We generate Coq code that is as similar as possible to the original Rust code, so that the user can easily understand the generated code and write proofs about it. In this blog post, we explain how we are representing side effects in Coq.\\n\\n\x3c!-- truncate --\x3e\\n\\n## \ud83e\udd80 Side effects in Rust\\n\\nIn programming, [side effects]() are all what is not representable by pure functions (mathematical functions, functions that always return the same output for given input parameters). In Rust there are various kinds of side effects:\\n\\n- errors (the [panic!](https://doc.rust-lang.org/core/macro.panic.html) macro) that propagate and do appear in the return type of functions,\\n- non-termination, with some potentially non-terminating loops (never returning a result is considered as a side-effect),\\n- control-flow, with the `break`, `continue`, `return` keywords, that can jump to a different part of the code,\\n- memory allocations and memory mutations,\\n- I/O, with for example the [println!](https://doc.rust-lang.org/std/macro.println.html) macro, that prints a message to the standard output,\\n- concurrency, with the [thread::spawn](https://doc.rust-lang.org/std/thread/fn.spawn.html) function, that creates a new thread.\\n\\n## \ud83d\udc13 Coq, a purely functional language\\n\\nLike most proof systems, Coq is a purely functional language. This means we need to find an encoding for the side effects. The reason for most proof systems to forbid side effects is to be logically consistent. Otherwise, it would be easy to write a proof of `False` by writing a term that does not terminate for example.\\n\\n## \ud83d\udd2e Monads in Coq\\n\\nMonads are a common way to represent side effects in a functional language. A monad is a type constructor `M`:\\n\\n```coq\\nDefinition M (A : Set) : Set :=\\n ...\\n```\\n\\nrepresenting computations returning values of type `A`. As an example we can take the error monad of computations that can fail with an error message, using the [Result](https://doc.rust-lang.org/std/result/enum.Result.html) type like in Rust:\\n\\n```coq\\nDefinition M (A : Set) : Set :=\\n Result A string.\\n```\\n\\nIt must have two operators, `Pure` and `Bind`.\\n\\n### The `Pure` operator\\n\\nThe `Pure` operator has type:\\n\\n```coq\\nDefinition Pure {A : Set} (v : A) : M A :=\\n ...\\n```\\n\\nIt lifts a pure value `v` into the monad. For our error monad, the `Pure` operator is:\\n\\n```coq\\nDefinition Pure {A : Set} (v : A) : M A :=\\n Ok v.\\n```\\n\\n### The `Bind` operator\\n\\nThe `Bind` operator has type:\\n\\n```coq\\nDefinition Bind {A B : Set} (e1 : M A) (f : A -> M B) : M B :=\\n ...\\n```\\n\\nIt sequences two computations `e1` with `f`, where `f` is a function that takes the result of `e1` as input and returns a new computation. We also note the `Bind` operator:\\n\\n```coq\\nlet* x := e1 in\\ne2\\n```\\n\\nassuming that `f` is a function that takes `x` as input and returns `e2`. Requiring this operator for all monads shows that sequencing computations is a very fundamental operation for side effects.\\n\\nFor our error monad, the `Bind` operator is:\\n\\n```coq\\nDefinition Bind {A B : Set} (e1 : M A) (f : A -> M B) : M B :=\\n match e1 with\\n | Ok v => f v\\n | Err msg => Err msg\\n end.\\n```\\n\\n## \ud83d\udea7 State, exceptions, non-termination, control-flow\\n\\nWe use a single monad to represent all the side effects that interest us in Rust. This monad is called `M` and is defined as follows:\\n\\n```coq\\nDefinition RawMonad `{State.Trait} :=\\n ...\\n\\nModule Exception.\\n Inductive t (R : Set) : Set :=\\n | Return : R -> t R\\n | Continue : t R\\n | Break : t R\\n | Panic {A : Set} : A -> t R.\\n Arguments Return {_}.\\n Arguments Continue {_}.\\n Arguments Break {_}.\\n Arguments Panic {_ _}.\\nEnd Exception.\\nDefinition Exception := Exception.t.\\n\\nDefinition Monad `{State.Trait} (R A : Set) : Set :=\\n nat -> State -> RawMonad ((A + Exception R) * State).\\n\\nDefinition M `{State.Trait} (A : Set) : Set :=\\n Monad Empty_set A.\\n```\\n\\nWe assume the definition of some `RawMonad` for memory handling that we will describe in a later post. Our monad `M` is a particular case of the monad `Monad` with `R = Empty_set`. It is a combination four monads:\\n\\n1. The `RawMonad`.\\n2. A state monad, that takes a `State` as input and a return an updated state as output. The trait `State.Trait` provides read/write operations on the `State` type.\\n3. An error monad with errors of type `Exception R`. There errors include the `Return`, `Continue`, `Break` and `Panic` constructors. The `Return` constructor is used to return a value from a function. The `Continue` constructor is used to continue the execution of a loop. The `Break` constructor is used to break the execution of a loop. The `Panic` constructor is used to panic with an error message. We implement all these operations as exceptions, even if only `Panic` is really an error, as they behave in the same way: interrupting the execution of the current sub-expression to bubble up to a certain level.\\n4. A fuel monad for non-termination, with the additional `nat` parameter.\\n\\nThe parameter `R` of the type constructor `Monad` is used to represent the type of values that can be returned in the body of a function. It is the same as the return type of the function. So for a function returning a value of type `A`, we define its body in `Monad A A`. Then, we wrap it in an operator:\\n\\n```coq\\nDefinition catch_return {A : Set} (e : Monad A A) : M A :=\\n ...\\n```\\n\\nthat catches the `Return` exceptions and returns the value.\\n\\n## Conclusion\\n\\nWe will see in the next post how we define the `RawMonad` to handle the Rust state of a program and memory allocation.\\n\\n:::tip Contact\\n\\nIf you have a Rust codebase that you wish to formally verify, or need advice in your work, contact us at [contact@formal.land](mailto:contact@formal.land). We will be happy to set up a call with you.\\n\\n:::"},{"id":"/2023/04/26/representation-of-rust-methods-in-coq","metadata":{"permalink":"/blog/2023/04/26/representation-of-rust-methods-in-coq","source":"@site/blog/2023-04-26-representation-of-rust-methods-in-coq.md","title":"Representation of Rust methods in Coq","description":"With our project coq-of-rust we aim to translate high-level Rust code to similar-looking Coq code, to formally verify Rust programs. One of the important constructs in the Rust language is the method syntax. In this post, we present our technique to translate Rust methods using type-classes in Coq.","date":"2023-04-26T00:00:00.000Z","formattedDate":"April 26, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"}],"readingTime":4.57,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Representation of Rust methods in Coq","tags":["coq-of-rust","Rust","Coq"]},"unlisted":false,"prevItem":{"title":"Monad for side effects in Rust","permalink":"/blog/2023/05/28/monad-for-side-effects-in-rust"},"nextItem":{"title":"Current formal verification efforts \ud83d\udcaa","permalink":"/blog/2023/01/24/current-verification-efforts"}},"content":"With our project [coq-of-rust](https://github.com/formal-land/coq-of-rust) we aim to translate high-level Rust code to similar-looking [Coq](https://coq.inria.fr/) code, to [formally verify](https://en.wikipedia.org/wiki/Formal_verification) Rust programs. One of the important constructs in the Rust language is the [method syntax](https://doc.rust-lang.org/book/ch05-03-method-syntax.html). In this post, we present our technique to translate Rust methods using type-classes in Coq.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Rust Code To Translate\\n\\nConsider the following Rust example, which contains a method (adapted from the [Rust Book](https://doc.rust-lang.org/book/)):\\n\\n```rust\\nstruct Rectangle {\\n width: u32,\\n height: u32,\\n}\\n\\nimpl Rectangle {\\n // Here \\"area\\" is a method\\n fn area(&self) -> u32 {\\n self.width * self.height\\n }\\n}\\n\\nfn main() {\\n let rect1 = Rectangle {\\n width: 30,\\n height: 50,\\n };\\n\\n println!(\\n \\"The area of the rectangle is {} square pixels.\\",\\n // We are calling this method there\\n rect1.area()\\n );\\n}\\n```\\n\\nThe Rust compiler can find the implementation of the `.area()` method call because it knows that the type of `rect1` is `Rectangle`. There could be other `area` methods defined for different types, and the code would still compile calling the `area` method of `Rectangle`.\\n\\nCoq has no direct equivalent for calling a function based on its name and type.\\n\\n## Our Translation\\n\\nHere is our Coq translation of the code above:\\n\\n```coq\\n 1: (* Generated by coq-of-rust *)\\n 2: Require Import CoqOfRust.CoqOfRust.\\n 3:\\n 4: Import Root.std.prelude.rust_2015.\\n 5:\\n 6: Module Rectangle.\\n 7: Record t : Set := {\\n 8: width : u32;\\n 9: height : u32;\\n10: }.\\n11:\\n12: Global Instance Get_width : Notation.Dot \\"width\\" := {\\n13: Notation.dot \'(Build_t x0 _) := x0;\\n14: }.\\n15: Global Instance Get_height : Notation.Dot \\"height\\" := {\\n16: Notation.dot \'(Build_t _ x1) := x1;\\n17: }.\\n18: End Rectangle.\\n19: Definition Rectangle : Set := Rectangle.t.\\n20:\\n21: Module ImplRectangle.\\n22: Definition Self := Rectangle.\\n23:\\n24: Definition area (self : ref Self) : u32 :=\\n25: self.[\\"width\\"].[\\"mul\\"] self.[\\"height\\"].\\n26:\\n27: Global Instance Method_area : Notation.Dot \\"area\\" := {\\n28: Notation.dot := area;\\n29: }.\\n30: End ImplRectangle.\\n31:\\n32: Definition main (_ : unit) : unit :=\\n33: let rect1 := {| Rectangle.width := 30; Rectangle.height := 50; |} in\\n34: _crate.io._print\\n35: (_crate.fmt.Arguments::[\\"new_v1\\"]\\n36: [ \\"The area of the rectangle is \\"; \\" square pixels.\\\\n\\" ]\\n37: [ _crate.fmt.ArgumentV1::[\\"new_display\\"] rect1.[\\"area\\"] ]) ;;\\n38: tt ;;\\n39: tt.\\n```\\n\\nOn line `24` we define the `area` function. On line `27` we declare that `area` is a method. On line `37` we call the `area` method on `rect1` with:\\n\\n```coq\\nrect1.[\\"area\\"]\\n```\\n\\nwhich closely resembles the source Rust code:\\n\\n```rust\\nrect1.area()\\n```\\n\\nCoq can automatically find the code of the `area` method to call.\\n\\n## How It Works\\n\\nThe code:\\n\\n```coq\\nrect1.[\\"area\\"]\\n```\\n\\nis actually a notation for:\\n\\n```coq\\nNotation.dot \\"area\\" rect1\\n```\\n\\nThen we leverage the inference mechanism of type-classes in Coq to find the code of the `area` method:\\n\\n```coq\\nModule Notation.\\n (** A class to represent the notation [e1.e2]. This is mainly used to call\\n methods, or access to named or indexed fields of structures.\\n The kind is either a string or an integer. *)\\n Class Dot {Kind : Set} (name : Kind) {T : Set} : Set := {\\n dot : T;\\n }.\\n Arguments dot {Kind} name {T Dot}.\\nEnd Notation.\\n```\\n\\nThe `Dot` class has three parameters: `Kind`, `name`, and `T`. `Kind` is the type of the name of the method (generally a string but it could be an integer in rare cases), `name` is the name of the method, and `T` is the type of the method. The `dot` field of the class is the code of the method.\\n\\nWhen we define the class instance:\\n\\n```coq\\n27: Global Instance Method_area : Notation.Dot \\"area\\" := {\\n28: Notation.dot := area;\\n29: }.\\n```\\n\\nwe instantiate the class `Notation.Dot` with three parameters:\\n\\n- `Kind` (inferred) is `string` because the name of the method is a string,\\n- `name` is `\\"area\\"` because the name of the method is `area`,\\n- `T` (inferred) is `ref Rectangle -> u32` because the method is declared as `fn area(&self) -> u32`.\\n\\nThen we define the `dot` field of the class instance to be the `area` function.\\n\\nWhen we call:\\n\\n```coq\\nNotation.dot \\"area\\" rect1\\n```\\n\\nCoq will automatically find the class instance `Method_area` because the type of `rect1` is `Rectangle` and the name of the method is `\\"area\\"`.\\n\\n## Other Use Cases\\n\\nThe `Dot` class is also used to access to named or indexed fields of structures or traits. We use a similar mechanism for associated functions. For example, the Rust code:\\n\\n```rust\\nlet rect1 = Rectangle::square(3);\\n```\\n\\nis translated to:\\n\\n```coq\\nlet rect1 := Rectangle::[\\"square\\"] 3 in\\n```\\n\\nwith a type-class for the `type::[name]` notation as follows:\\n\\n```coq\\nModule Notation.\\n (** A class to represent associated functions (the notation [e1::e2]). The\\n kind might be [Set] for functions associated to a type,\\n or [Set -> Set] for functions associated to a trait. *)\\n Class DoubleColon {Kind : Type} (type : Kind) (name : string) {T : Set} :\\n Set := {\\n double_colon : T;\\n }.\\n Arguments double_colon {Kind} type name {T DoubleColon}.\\nEnd Notation.\\n```\\n\\n## In Conclusion\\n\\nThe type-classes mechanism of Coq appears flexible enough to represent our current use cases involving methods and associated functions. It remains to be seen whether this approach will suffice for future use cases.\\n\\n:::tip Contact\\n\\nIf you have a Rust codebase that you wish to formally verify, or need advice in your work, contact us at [contact@formal.land](mailto:contact@formal.land). We will be happy to set up a call with you.\\n\\n:::"},{"id":"/2023/01/24/current-verification-efforts","metadata":{"permalink":"/blog/2023/01/24/current-verification-efforts","source":"@site/blog/2023-01-24-current-verification-efforts.md","title":"Current formal verification efforts \ud83d\udcaa","description":"We are diversifying ourselves to apply formal verification on 3\ufe0f\u20e3 new languages with Solidity, Rust, and TypeScript. In this article we describe our approach. For these three languages, we translate the code to the proof system \ud83d\udc13 Coq. We generate the cleanest \ud83e\uddfc possible output to simplify the formal verification \ud83d\udcd0 effort that comes after.","date":"2023-01-24T00:00:00.000Z","formattedDate":"January 24, 2023","tags":[{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"OCaml","permalink":"/blog/tags/o-caml"},{"label":"Solidity","permalink":"/blog/tags/solidity"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"TypeScript","permalink":"/blog/tags/type-script"}],"readingTime":4.89,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Current formal verification efforts \ud83d\udcaa","tags":["coq-of-ocaml","OCaml","Solidity","Rust","TypeScript"]},"unlisted":false,"prevItem":{"title":"Representation of Rust methods in Coq","permalink":"/blog/2023/04/26/representation-of-rust-methods-in-coq"},"nextItem":{"title":"Latest blog posts on our formal verification effort on Tezos","permalink":"/blog/2022/12/13/latest-blog-posts-on-tezos"}},"content":"We are diversifying ourselves to apply [formal verification](https://en.wikipedia.org/wiki/Formal_verification) on 3\ufe0f\u20e3 new languages with **Solidity**, **Rust**, and **TypeScript**. In this article we describe our approach. For these three languages, we translate the code to the proof system [\ud83d\udc13 Coq](https://coq.inria.fr/). We generate the cleanest \ud83e\uddfc possible output to simplify the formal verification \ud83d\udcd0 effort that comes after.\\n\\n> Formal verification is a way to ensure that a program follows its specification in \ud83d\udcaf% of cases thanks to the use of mathematical methods. It removes far more bugs and security issues than testing, and is necessary to deliver software of the highest quality \ud83d\udc8e.\\n\\n\x3c!-- truncate --\x3e\\n\\n## \ud83d\uddfa\ufe0f General plan\\nTo apply formal verification to real-sized applications, we need to handle thousands of lines of code in a seamless way. We rely on the proof system Coq to write our proofs, as it has a mature ecosystem, and automated (SMT) and interactive ways to write proofs. To keep the proofs simple, we must find an efficient way to convert an existing and evolving codebase to Coq.\\n\\nFor example, given the following TypeScript example:\\n```typescript\\nexport function checkIfEnoughCredits(user: User, credits: number): boolean {\\n if (user.isAdmin) {\\n return credits >= 0;\\n }\\n\\n return credits >= 1000;\\n}\\n```\\nwe want to generate the corresponding Coq code in an automated way:\\n```coq\\nDefinition checkIfEnoughCredits (user : User) (credits : number) : bool :=\\n if user.(User.isAdmin) then\\n credits >= 0\\n else\\n credits >= 1000.\\n```\\nThis is the exact equivalent written using the Coq syntax, where we check the `credits` condition depending on the user\'s status. This is the `checkIfEnoughCredits` definition a Coq developer would directly write, in an idiomatic way.\\n\\nWe make some hypothesis on the input code. In TypeScript we assume the code does not contain mutations, which is often the case to simplify asynchronous code. In Rust we have other hypothesis as making safe mutations is one of the keys features of the language and a frequent pattern. For each language we look for a correct subset to work on, to support common use cases and still generate a clean Coq code.\\n\\n## \ud83c\uddf8 Solidity\\n\u27a1\ufe0f [Project page](/docs/verification/solidity) \u2b05\ufe0f\\n\\nThe [Solidity language](https://soliditylang.org/) is the main language to write smart contracts on the [Ethereum](https://ethereum.org/) blockchain. As smart contracts cannot be easily updated and handle a large amount of money, it is critical to formally verify them to prevent bugs.\\n\\nOur strategy is to develop a translator [coq-of-solidity](https://gitlab.com/formal-land/coq-of-solidity) from Solidity to Coq. We are using an implementation of an [ERC-20](https://en.wikipedia.org/wiki/Ethereum#ERC20) smart contract as an example to guide our translation. Two top difficulties in the translation of Solidity programs are:\\n* the use of object-oriented programming with inheritance on classes,\\n* the use of mutations and errors, that need to be handled in a monad.\\n\\nWe are still trying various approach to handle these difficulties and generate a clean Coq output for most cases.\\n\\nIn addition to our work on Solidity, we are looking at the [EVM code](https://ethereum.org/en/developers/docs/evm/) that is the assembly language of Ethereum. It has the advantage of being more stable and with a simpler semantics than Solidity. However, it is not as expressive and programs in EVM are much harder to read. We have a prototype of translator from EVM to Coq named [ethereum-vm-to-coq](https://gitlab.com/formal-land/ethereum-vm-to-coq). An interesting goal will be to connect the translation of Solidity and of EVM in Coq to show that they have the same semantics on a given smart contract.\\n\\nNote that EVM is the target language of many verification project on Ethereum such as [Certora](https://www.certora.com/) or static analyzers. We prefer to target Solidity as it is more expressive and the generated code in Coq will thus be easier to verify.\\n\\n## \ud83e\udd80 Rust\\n\u27a1\ufe0f [Project page](/docs/verification/rust) \u2b05\ufe0f\\n\\nThe [Rust language](https://www.rust-lang.org/) is a modern systems programming language that is gaining popularity. It is a safe language that prevents many common errors such as buffer overflows or use-after-free. It is also a language that is used to write low-level code, such as drivers or operating systems. As such, it is critical to formally verify Rust programs to prevent bugs.\\n\\nWe work in collaboration with the team developing the [Aeneas](https://github.com/AeneasVerif) project, with people from Inria and Microsoft. The aim is to translate Rust code with mutations to a purely functional form in Coq (without mutations) to simplify the verification effort and avoid the need of separation logic. The idea of this translation is explained in the [Aeneas paper](https://dl.acm.org/doi/abs/10.1145/3547647).\\n\\nThere are two steps in the translation:\\n1. **From [MIR](https://rustc-dev-guide.rust-lang.org/mir/index.html) (low-level intermediate form of Rust) to LLBC.** This is a custom language for the project that contains all the information of MIR but is better suited for analysis. For example, instead of using a control-flow graph it uses control structures and an abstract syntax tree. This step is implemented in Rust.\\n2. **From LLBC to Coq.** This is the heart of the project and is implemented in OCaml. This is where the translation from mutations to a purely functional form occurs.\\n\\nFor now we are focusing on adding new features to LLBC and improving the user experience: better error messages, generation of an output with holes for unhandled Rust features.\\n\\n## \ud83c\udf10 TypeScript\\n\u27a1\ufe0f [Project page](/docs/verification/typescript) \u2b05\ufe0f\\n\\nWe have a [\ud83d\udcfd\ufe0f demo project](https://formal-land.github.io/coq-of-js/) to showcase the translation of a purely functional subset of JavaScript to Coq. We handle functions and basic data types such as records, enums and discriminated unions. We are now porting the code to TypeScript in [coq-of-ts](https://github.com/formal-land/coq-of-ts). We prefer to work on TypeScript rather than JavaScript as type information are useful to guide the translation, and avoid the need of additional annotations on the source code.\\n\\nOur next target will be to make `coq-of-ts` usable on real-life project example.\\n\\n:::info Social media\\nFollow us on Twitter at [Twitter](https://twitter.com/LandFoobar) \ud83d\udc26 and [Telegram](https://t.me/formal_land) to get the latest news about our projects. If you think our work is interesting, please share it with your friends and colleagues. \ud83d\ude4f\\n:::"},{"id":"/2022/12/13/latest-blog-posts-on-tezos","metadata":{"permalink":"/blog/2022/12/13/latest-blog-posts-on-tezos","source":"@site/blog/2022-12-13-latest-blog-posts-on-tezos.md","title":"Latest blog posts on our formal verification effort on Tezos","description":"Here we recall some blog articles that we have written since this summer, on the formal verification of the protocol of Tezos. For this project, we are verifying a code base of around 100,000 lines of OCaml code. We automatically convert the OCaml code to the proof system Coq using the converter coq-of-ocaml. We then apply various proof techniques to make sure that the protocol of Tezos does not contain bugs.","date":"2022-12-13T00:00:00.000Z","formattedDate":"December 13, 2022","tags":[{"label":"coq-tezos-of-ocaml","permalink":"/blog/tags/coq-tezos-of-ocaml"},{"label":"Tezos","permalink":"/blog/tags/tezos"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"}],"readingTime":1.755,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Latest blog posts on our formal verification effort on Tezos","tags":["coq-tezos-of-ocaml","Tezos","coq-of-ocaml"]},"unlisted":false,"prevItem":{"title":"Current formal verification efforts \ud83d\udcaa","permalink":"/blog/2023/01/24/current-verification-efforts"},"nextItem":{"title":"Upgrade coq-of-ocaml to OCaml 4.14","permalink":"/blog/2022/06/23/upgrade-coq-of-ocaml-4.14"}},"content":"Here we recall some blog articles that we have written since this summer, on the [formal verification of the protocol of Tezos](https://formal-land.gitlab.io/coq-tezos-of-ocaml/). For this project, we are verifying a code base of around 100,000 lines of OCaml code. We automatically convert the OCaml code to the proof system Coq using the converter [coq-of-ocaml](https://github.com/formal-land/coq-of-ocaml). We then apply various proof techniques to make sure that the protocol of Tezos does not contain bugs.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Blog articles \ud83d\udcdd\\nHere is the list of articles about the work we have done since this summer. We believe that some of this work is very unique and specific to Tezos.\\n\\n* [The error monad, internal errors and validity predicates, step-by-step](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/12/12/internal-errors-step-by-step/) by *Pierre Vial*: a detailed explanation of what we are doing to verify the absence of unexpected errors in the whole code base;\\n* [Absence of internal errors](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/10/18/absence-of-internal-errors/) by *Guillaume Claret*: the current state of our proofs to verify the absence of unexpected errors;\\n* [Skip-list verification. Using inductive predicates](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/10/03/verifying-the-skip-list-inductive-predicates/) by *Bart\u0142omiej Kr\xf3likowski* and *Natalie Klaus*: a presentation of our verification effort on the skip-list algorithm implementation (part 2);\\n* [Verifying the skip-list](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/10/03/verifying-the-skip-list/) by *Natalie Klaus* and *Bart\u0142omiej Kr\xf3likowski*: a presentation of our verification effort on the skip-list algorithm implementation (part 1);\\n* [Verifying json-data-encoding](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/08/15/verify-json-data-encoding/) by *Tait van Strien*: our work to verify an external library used by the Tezos protocol, to safely serialize data to JSON values;\\n* [Fixing reused proofs](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/07/19/fixing-proofs/) by *Bart\u0142omiej Kr\xf3likowski*: a presentation, with examples, of the work we do to maintain existing proofs and specifications as the code evolves;\\n* [Formal verification of property based tests](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/06/07/formal-verification-of-property-based-tests/) by *Guillaume Claret*: the principle and status of our work to formally verify the generalized case of property-based tests;\\n* [Plan for backward compatibility verification](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/06/02/plan-backward-compatibility) by *Guillaume Claret*: an explanation of the strategy we use to show that two successive versions of the Tezos protocol are fully backward compatible.\\n\\nTo follow more of our activity, feel free to register on our [Twitter account \ud83d\udc26](https://twitter.com/LandFoobar)! If you need services or advices to formally verify your code base, you can drop us an [email \ud83d\udce7](mailto:contact@formal.land)!"},{"id":"/2022/06/23/upgrade-coq-of-ocaml-4.14","metadata":{"permalink":"/blog/2022/06/23/upgrade-coq-of-ocaml-4.14","source":"@site/blog/2022-06-23-upgrade-coq-of-ocaml-4.14.md","title":"Upgrade coq-of-ocaml to OCaml 4.14","description":"In an effort to support the latest version of the protocol of Tezos we upgraded coq-of-ocaml to add compatibility with OCaml 4.14. The result is available in the branch ocaml-4.14. We describe here how we made this upgrade.","date":"2022-06-23T00:00:00.000Z","formattedDate":"June 23, 2022","tags":[{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"ocaml","permalink":"/blog/tags/ocaml"},{"label":"4.14","permalink":"/blog/tags/4-14"}],"readingTime":2.195,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Upgrade coq-of-ocaml to OCaml 4.14","tags":["coq-of-ocaml","ocaml","4.14"]},"unlisted":false,"prevItem":{"title":"Latest blog posts on our formal verification effort on Tezos","permalink":"/blog/2022/12/13/latest-blog-posts-on-tezos"},"nextItem":{"title":"Status update on the verification of Tezos","permalink":"/blog/2022/06/15/status update-tezos"}},"content":"In an effort to support the latest version of the [protocol of Tezos](https://gitlab.com/tezos/tezos/-/tree/master/src/proto_alpha/lib_protocol) we upgraded [`coq-of-ocaml`](https://github.com/formal-land/coq-of-ocaml) to add compatibility with OCaml 4.14. The result is available in the branch [`ocaml-4.14`](https://github.com/formal-land/coq-of-ocaml/pull/217). We describe here how we made this upgrade.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Usage of Merlin\\nIn `coq-of-ocaml` we are using [Merlin](https://github.com/ocaml/merlin) to get the typed [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of OCaml files. We see the AST through the [Typedtree](https://docs.mirage.io/ocaml/Typedtree/index.html) interface, together with an access to all the definitions of the current compilation environment. Merlin computes the current environment by understanding how an OCaml project is configured and connecting to the [dune](https://dune.build/) build system. The environment is mandatory for certain transformations in `coq-of-ocaml`, like:\\n* finding a canonical name for module types;\\n* propagating phantom types.\\n\\nIn order to use Merlin as a library (rather than as a daemon), we vendor the [LSP version](https://github.com/rgrinberg/merlin/tree/lsp) of [rgrinberg](https://github.com/rgrinberg) in the folder [`vendor/`](https://github.com/formal-land/coq-of-ocaml/tree/master/vendor). This vendored version works with no extra configurations.\\n\\n## Upgrade\\nWhen a new version of OCaml is out, we upgrade our vendored version of Merlin to a compatible one. Then we do the necessary changes to `coq-of-ocaml`, as the interface of the AST generally evolves with small changes. For OCaml 4.14, the main change was some types becoming abstract such as `Types.type_expr`. To access to the fields of these types, we now need to use a specific getter and do changes such as:\\n```diff\\n+ match typ.desc with\\n- match Types.get_desc typ with\\n```\\nThis made some patterns in `match` expressions more complex, but otherwise the changes were very minimal. We ran all the unit-tests of `coq-of-ocaml` after the upgrade and they were still valid.\\n\\n## Git submodule or copy & paste?\\nTo vendor Merlin we have two possibilities:\\n1. Using a [Git submodule](https://git-scm.com/book/en/v2/Git-Tools-Submodules).\\n2. Doing a copy & paste of the code.\\n\\nThe first possibility is more efficient in terms of space, but there are a few disadvantages:\\n* we cannot make small modifications if needed;\\n* the archives generated by Github do not contain the code of the submodules (see this [issue](https://github.com/dear-github/dear-github/issues/214))\\n* if a commit in the repository for the submodule disappears, then the submodule is unusable.\\n\\nThe last reason forced us to do a copy & paste for OCaml 4.14. We now have to be cautious not to commit the generate `.ml` file for the OCaml parser.\\n\\n## Next\\nThe next change will be doing the upgrade to OCaml 5. There should be much more changes, and in particular a new way of handling the effects. We do not know yet if it will be possible to translate the effect handlers to Coq in a nice way."},{"id":"/2022/06/15/status update-tezos","metadata":{"permalink":"/blog/2022/06/15/status update-tezos","source":"@site/blog/2022-06-15-status update-tezos.md","title":"Status update on the verification of Tezos","description":"Here we give an update on our verification effort on the protocol of Tezos. We add the marks:","date":"2022-06-15T00:00:00.000Z","formattedDate":"June 15, 2022","tags":[{"label":"tezos","permalink":"/blog/tags/tezos"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"coq","permalink":"/blog/tags/coq"}],"readingTime":7.53,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Status update on the verification of Tezos","tags":["tezos","coq-of-ocaml","coq"]},"unlisted":false,"prevItem":{"title":"Upgrade coq-of-ocaml to OCaml 4.14","permalink":"/blog/2022/06/23/upgrade-coq-of-ocaml-4.14"},"nextItem":{"title":"Make Tezos the first formally verified cryptocurrency","permalink":"/blog/2022/02/02/make-tezos-a-formally-verified-crypto"}},"content":"Here we give an update on our [verification effort](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the protocol of Tezos. We add the marks:\\n* \u2705 for \\"rather done\\"\\n* \ud83c\udf0a for \\"partially done\\"\\n* \u274c for \\"most is yet to do\\"\\n\\nOn the website of project, we also automatically generates pages such as [Compare](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/status/compare/) to follow the status of the tasks.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Maintenance of the translation \u2705\\nWe were able to maintain most of the translation from OCaml to Coq of the protocol of Tezos using [coq-of-ocaml](https://github.com/formal-land/coq-of-ocaml), including all the translation of the Michelson interpreter. There was an increase in the size of the OCaml code base in recent months, due to new features added in Tezos like the [rollups](https://research-development.nomadic-labs.com/tezos-is-scaling.html). Here are the numbers of lines of code (`.ml` and `.mli` files) for the various protocol versions:\\n* protocol H: `51147`\\n* protocol I: `59535`\\n* protocol J: `83271` (increase mainly due to the rollups)\\n* protocol Alpha (development version of K): `90716`\\n\\nWe still translate most of the protocol code up to version J. We stayed on version J for a while as we wanted to add as many proofs as possible before doing a proof of backward compatibility between J and K. We are currently updating the translation to support the protocol version Alpha, preparing for the translation of K.\\n\\nFor protocol J, we needed to add a [blacklist.txt](https://gitlab.com/nomadic-labs/coq-tezos-of-ocaml/-/blob/master/blacklist.txt) of files that we do not support. Indeed, we need to add new changes to `coq-of-ocaml` to support these or do hard-to-maintain changes to [our fork](https://gitlab.com/tezos/tezos/-/merge_requests/3303) of the Tezos protocol. We plan to complete the translation and remove this black-list for the protocol J soon (in a week or two).\\n\\n## Size of the proofs \u2705\\nOne of our plans is to have a reasonable quantity of proofs, to cover a reasonable quantity of code and properties from the protocol. We believe we have a good quantity of proofs now, as we have more than 50,000 lines of Coq code (for an OCaml codebase of 80,000 lines).\\n\\nIn addition to our main targets, we verify many \\"smaller\\" properties, such as:\\n* conversion functions are inverses (when there are two `to_int` and `of_int` functions in a file, we show that they are inverses);\\n* the `compare` functions, to order elements, are well defined (see our blog post [Verifying the compare functions of OCaml](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/04/04/verifying-the-compare-functions));\\n* invariants are preserved. For example, [here](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/proofs/carbonated_map#Make.update_is_valid) we show that updating a carbonated map preserves the property of having a size field actually equal to the number of elements.\\n\\nWe should note that the size of Coq proofs tends to grow faster than the size of the verified code. We have no coverage metrics to know how much of the code is covered by these proofs.\\n\\n## Data-encodings \ud83c\udf0a\\nThe [data-encoding](https://gitlab.com/nomadic-labs/data-encoding) library is a set of combinators to write serialization/de-serialization functions. We verify that the encodings defined for each protocol data type are bijective. The good thing we have is a semi-automated tactic to verify the use of the `data-encoding` primitives. We detail this approach in our blog post [Automation of `data_encoding` proofs](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/11/22/data-encoding-automation). We can verify most of the encoding functions that we encounter. From there, we also express the **invariant** associated with each data type, which the encodings generally check at runtime. The invariants are then the domain of definition of the encodings.\\n\\nHowever, we have a hole: we do not verify the `data-encoding` library itself. Thus the [axioms we made](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/environment/proofs/data_encoding) on the data-encoding primitives may have approximations. And indeed, we missed one issue in the development code of the protocol. This is thus a new high-priority target to verify the `data-encoding` library itself. One of the challenges for the proof is the use of side-effects (references and exceptions) in this library.\\n\\n## Property-based tests \ud83c\udf0a\\nThe property-based tests on the protocol are located in [`src/proto_alpha/lib_protocol/test/pbt`](https://gitlab.com/tezos/tezos/-/tree/master/src/proto_alpha/lib_protocol/test/pbt). These tests are composed of:\\n* a generator, generating random inputs of a certain shape;\\n* a property function, a boolean function taking a generated input and supposed to always answer `true`.\\n\\nWe translated a part of these tests to Coq, to convert them to theorems and have specifications extracted from the code. The result of this work is summarized in this blog post: [Formal verification of property based tests](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/06/07/formal-verification-of-property-based-tests). We have fully translated and verified four test files over a total of twelve. We are continuing the work of translations and proofs.\\n\\nHowever, we found that for some of the files the proofs were taking a long time to write compared to the gains in safety. Indeed, the statements made in the tests are sometimes too complex when translated into general theorems. For example, for [test_carbonated_map.ml](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/test/pbt/test_carbonated_map.ml) we have to deal with:\\n* gas exhaustion (seemingly impossible in the tests);\\n* data structures of size greater than `max_int` (impossible in practice).\\n\\nAll of that complicate the proofs for little gain in safety. So I would say that not all the property-based tests have a nice and useful translation to Coq. We should still note that for some of the tests, like with saturation arithmetic, we have proofs that work well. For these, we rely on the automated linear arithmetic tactic [`lia`](https://coq.inria.fr/refman/addendum/micromega.html) of Coq to verify properties over integer overflows.\\n\\n## Storage system \ud83c\udf0a\\nBy \\"storage system\\" we understand the whole set of functors defined in [`storage_functors.ml`](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/storage_functors.ml) and how we apply them to define the protocol storage in [`storage.ml`](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/storage_functors.ml). These functors create sub-storages with signatures such as:\\n```ocaml\\nmodule type Non_iterable_indexed_data_storage = sig\\n type t\\n type context = t\\n type key\\n type value\\n val mem : context -> key -> bool Lwt.t\\n val get : context -> key -> value tzresult Lwt.t\\n val find : context -> key -> value option tzresult Lwt.t\\n val update : context -> key -> value -> Raw_context.t tzresult Lwt.t\\n val init : context -> key -> value -> Raw_context.t tzresult Lwt.t\\n val add : context -> key -> value -> Raw_context.t Lwt.t\\n val add_or_remove : context -> key -> value option -> Raw_context.t Lwt.t\\n val remove_existing : context -> key -> Raw_context.t tzresult Lwt.t\\n val remove : context -> key -> Raw_context.t Lwt.t\\nend\\n```\\nThis `Non_iterable_indexed_data_storage` API looks like the API of an OCaml\'s [Map](https://v2.ocaml.org/api/Map.Make.html). As a result, our goal for the storage is to show that is can be simulated by standard OCaml data structures such as sets and maps. This is a key step to unlock further reasoning about code using the storage.\\n\\nUnfortunately, we were not able to verify the whole storage system yet. Among the difficulties are that:\\n* there are many layers in the definition of the storage;\\n* the storage functors use a lot of abstractions, and sometimes it is unclear how to specify them in the general case.\\n\\nStill, we have verified some of the functors as seen in [`Proofs/Storage_functors.v`](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/proofs/storage_functors) and specified the `storage.ml` file in [`Proos/Storage.v`](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/storage). We believe in having the correct specifications for all of the storage abstractions now. We plan to complete all these proofs later.\\n\\n## Michelson\\nThe verification of the Michelson interpreter is what occupied most of our time. By considering the OCaml files whose name starts by `script_`, the size of the Michelson interpreter is around 20,000 lines of OCaml code.\\n\\n### Simulations \ud83c\udf0a\\nThe interpreter relies heavily on [GADTs](https://v2.ocaml.org/manual/gadts.html) in OCaml. Because these do not translate nicely in Coq, we need to write simulations in dependent types of the interpreter functions, and prove them correct in Coq. We describe this process in our [Michelson Guide](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/guides/michelson).\\n\\nThe main difficulties we encountered are:\\n* the number of simulations to write (covering the 20,000 lines of OCaml);\\n* the execution time of the proof of correctness of the simulations. This is due to the large size of the inductive types describing the Michelson AST, and the use of dependent types generating large proof terms. For example, there are around 30 cases for the types and 150 for the instructions node in the AST.\\n\\nWhen writing the simulations, we are also verifying the termination of all the functions and the absence of reachable `assert false`. We have defined the simulation of many functions, but are still missing important ones such as [`parse_instr_aux`](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/script_ir_translator/#parse_instr_aux) to parse Michelson programs.\\n\\n### Mi-Cho-Coq \ud83c\udf0a\\nWe have a project to verify that the [Mi-Cho-Coq](https://gitlab.com/nomadic-labs/mi-cho-coq) framework, used to formally verify smart contracts written in Michelson, is compatible with the implementation of the Michelson interpreter in OCaml. We have a partial proof of compatibility in [Micho_to_dep.v](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/simulations/micho_to_dep). We still need to complete this proof, especially to handle instructions with loops. Our goal is to show a complete inclusion of the semantics of Mi-Cho-Coq into the semantics of the implementation.\\n\\n### Parse/unparse \u274c\\nWe wanted to verify that the various parsing and unparsing functions over Michelson are inverses. These functions exist for:\\n* comparable types\\n* types\\n* comparable data\\n* data\\n\\nBecause we are still focused on writing, verifying or updating the simulations, we are still not done for this task.\\n\\n## Conclusion\\nWe have many ongoing projects but few fully completed tasks. We will focus more on having terminated proofs."},{"id":"/2022/02/02/make-tezos-a-formally-verified-crypto","metadata":{"permalink":"/blog/2022/02/02/make-tezos-a-formally-verified-crypto","source":"@site/blog/2022-02-02-make-tezos-a-formally-verified-crypto.md","title":"Make Tezos the first formally verified cryptocurrency","description":"Elephants","date":"2022-02-02T00:00:00.000Z","formattedDate":"February 2, 2022","tags":[{"label":"tezos","permalink":"/blog/tags/tezos"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"coq","permalink":"/blog/tags/coq"}],"readingTime":3.675,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Make Tezos the first formally verified cryptocurrency","tags":["tezos","coq-of-ocaml","coq"]},"unlisted":false,"prevItem":{"title":"Status update on the verification of Tezos","permalink":"/blog/2022/06/15/status update-tezos"},"nextItem":{"title":"New blog posts and Meetup talk","permalink":"/blog/2021/11/12/new-blog-posts-and-meetup-talk"}},"content":"![Elephants](elephants-elmira-gokoryan.webp)\\n\\nOur primary goal at [Formal Land \ud83c\udf32](https://formal.land/) is to make [Tezos](https://tezos.com/) the first crypto-currency with a formally verified implementation. With [formal verification](https://en.wikipedia.org/wiki/Formal_verification), thanks to mathematical methods, we can check that a program behaves as expected for all possible inputs. Formal verification goes beyond what testing can do, as testing can only handle a finite amount of cases. That is critical as cryptocurrencies hold a large amount of money (around $3B for Tezos today). The current result of our verification project is available on [nomadic-labs.gitlab.io/coq-tezos-of-ocaml](https://formal-land.gitlab.io/coq-tezos-of-ocaml/). Formal verification is also key to allowing Tezos to evolve constantly in a safe and backward compatible manner.\\n\\n\x3c!-- truncate --\x3e\\n\\nWe proceed in two steps:\\n1. we translate the code of Tezos, written in [OCaml](https://ocaml.org/), to the proof language [Coq](https://coq.inria.fr/) using the translator [coq-of-ocaml](https://github.com/foobar-land/coq-of-ocaml);\\n2. we write our specifications and proofs in the Coq language.\\n\\nWe believe this is one of the most efficient ways to proceed, as we can work on an almost unmodified version of the codebase and use the full power of the mature proof system Coq. The code of Tezos is composed of around:\\n* 50,000 lines for the protocol (the kernel of Tezos), and\\n* 200,000 lines for the shell (everything else, including the peer-to-peer layer and the storage backend).\\n\\nWe are currently focusing on verifying the protocol for the following modules.\\n\\n## Data-encoding\\nThe [data-encoding](https://gitlab.com/nomadic-labs/data-encoding) library offers serialization and deserialization to binary and JSON formats. It is used in various parts of the Tezos protocol, especially on all the data types ending up in the storage system. In practice, many encodings are defined in the OCaml files named `*_repr.ml`. We verify that the `data-encoding` library is correctly used to define the encodings. We check that converting a value to binary format and from binary returns the initial value. We explicit the domain of validity of such conversions. This verification work generally reveals and propagates invariants about the data structures of the protocol. As an invariant example, all the account amounts should always be positive. Having these invariants will be helpful for the verification of higher-level layers of the protocol.\\n\\n## Michelson smart contracts\\nThe smart contract language of Tezos is [Michelson](https://tezos.gitlab.io/active/michelson.html). The interpreter and type-checker of smart contracts is one of the most complex and critical parts of the protocol. We are verifying two things about this code:\\n* The equivalence of the interpreter and the Coq semantics for Michelson defined in the project [Mi-Cho-Coq](https://gitlab.com/nomadic-labs/mi-cho-coq). Thanks to this equivalence, we can make sure that the formal verification of smart contracts is sound for the current version of the protocol.\\n* The compatibility of the parsing and unparsing functions for the Michelson types and values. The parsing functions take care of the type-checking and do a lot of sanity checks on Michelson expressions with appropriate error messages. Showing that the parsing and unparsing functions are inverses is important for security reasons. The Michelson values are always unparsed at the end of a smart contract execution to be stored on disk.\\n\\nTo do these proofs, we also give a new semantics of Michelson, expressed using dependent types rather than [GADTs](https://ocaml.org/manual/gadts-tutorial.html) in the OCaml implementation.\\n\\n## Storage system\\nCryptocurrencies typically take a lot of space on disk (in the hundreds of gigabytes). In Tezos, we use the key-value database [Irmin](https://irmin.org/). The protocol provides a lot of [abstractions](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/storage_functors.ml) over this database to expose higher-level interfaces with set and map-like APIs. We verify that these abstractions are valid doing a proof by simulation, where we show that the whole system is equivalent to an [in-memory database](https://en.wikipedia.org/wiki/In-memory_database) using simpler data structures. Thanks to this simulation, we will be able to reason about code using the storage as if we were using the simpler in-memory version.\\n\\n## In addition\\nWe also plan to verify:\\n* The implementation of the `data-encoding` library itself. This code is challenging for formal verification as it contains many imperative features. Another specificity of this library is that it sits outside of the protocol of Tezos, and we might need to adapt `coq-of-ocaml` to support it.\\n* The [property-based tests of the protocol](https://gitlab.com/tezos/tezos/-/tree/master/src/proto_alpha/lib_protocol/test/pbt). These tests are written as boolean functions (or functions raising exceptions), which must return `true` on any possible inputs. We will verify them in the general case by importing their definitions to Coq and verifying with mathematical proofs that they are always correct.\\n\\n:::tip Contact\\nFor any questions or remarks, contact us on \ud83d\udc49 [contact@formal.land](mailto:contact@formal.land) \ud83d\udc48.\\n:::"},{"id":"/2021/11/12/new-blog-posts-and-meetup-talk","metadata":{"permalink":"/blog/2021/11/12/new-blog-posts-and-meetup-talk","source":"@site/blog/2021-11-12-new-blog-posts-and-meetup-talk.md","title":"New blog posts and Meetup talk","description":"Recently, we added two new blog posts about the verification of the crypto-currency Tezos:","date":"2021-11-12T00:00:00.000Z","formattedDate":"November 12, 2021","tags":[{"label":"tezos","permalink":"/blog/tags/tezos"},{"label":"mi-cho-coq","permalink":"/blog/tags/mi-cho-coq"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"meetup","permalink":"/blog/tags/meetup"}],"readingTime":0.58,"hasTruncateMarker":false,"authors":[],"frontMatter":{"title":"New blog posts and Meetup talk","tags":["tezos","mi-cho-coq","coq-of-ocaml","meetup"]},"unlisted":false,"prevItem":{"title":"Make Tezos the first formally verified cryptocurrency","permalink":"/blog/2022/02/02/make-tezos-a-formally-verified-crypto"},"nextItem":{"title":"Verification of the use of data-encoding","permalink":"/blog/2021/10/27/verification-data-encoding"}},"content":"Recently, we added two new blog posts about the verification of the crypto-currency [Tezos](https://tezos.com/):\\n* [Verify the Michelson types of Mi-Cho-Coq](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/11/01/verify-michelson-types-mi-cho-coq/) to compare the types defined in the Tezos code for the [Michelson](http://tezos.gitlab.io/active/michelson.html) interpreter and in the [Mi-Cho-Coq library](https://gitlab.com/nomadic-labs/mi-cho-coq) to verify smart contracts;\\n* [Translate the Tenderbake\'s code to Coq](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/11/08/translate-tenderbake/) to explain how we translated the recent changes in Tezos to the Coq using [coq-of-ocaml](https://github.com/foobar-land/coq-of-ocaml). In particular we translated the code of the new [Tenderbake](https://research-development.nomadic-labs.com/a-look-ahead-to-tenderbake.html) consensus algorithm.\\n\\nWe also talked at the [Lambda Lille Meetup](https://www.meetup.com/LambdaLille/events/281374644/) (in French) to present our work on `coq-of-ocaml` for Tezos. A video on the [Youtube channel](https://www.youtube.com/channel/UC-hC7y_ilQBq0QCa9xDu1iA) of the Meetup should be available shortly. We thanks the organizers for hosting the talk."},{"id":"/2021/10/27/verification-data-encoding","metadata":{"permalink":"/blog/2021/10/27/verification-data-encoding","source":"@site/blog/2021-10-27-verification-data-encoding.md","title":"Verification of the use of data-encoding","description":"We added a blog post about the verification of the use of data-encodings in the protocol of Tezos. Currently, we work on the verification of Tezos and publish our blog articles there. We use coq-of-ocaml to translate the OCaml code to Coq and do our verification effort.","date":"2021-10-27T00:00:00.000Z","formattedDate":"October 27, 2021","tags":[{"label":"data-encoding","permalink":"/blog/tags/data-encoding"}],"readingTime":0.235,"hasTruncateMarker":false,"authors":[],"frontMatter":{"title":"Verification of the use of data-encoding","tags":["data-encoding"]},"unlisted":false,"prevItem":{"title":"New blog posts and Meetup talk","permalink":"/blog/2021/11/12/new-blog-posts-and-meetup-talk"},"nextItem":{"title":"Welcome","permalink":"/blog/2021/10/10/welcome"}},"content":"We added a blog post about the [verification of the use of data-encodings](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/10/20/data-encoding-usage) in the protocol of Tezos. Currently, we work on the verification of Tezos and publish our blog articles there. We use [coq-of-ocaml](https://foobar-land.github.io/coq-of-ocaml/) to translate the OCaml code to Coq and do our verification effort."},{"id":"/2021/10/10/welcome","metadata":{"permalink":"/blog/2021/10/10/welcome","source":"@site/blog/2021-10-10-welcome.md","title":"Welcome","description":"Welcome to the blog of Formal Land. Here we will post various updates about the work we are doing.","date":"2021-10-10T00:00:00.000Z","formattedDate":"October 10, 2021","tags":[{"label":"Welcome","permalink":"/blog/tags/welcome"}],"readingTime":0.095,"hasTruncateMarker":false,"authors":[],"frontMatter":{"title":"Welcome","tags":["Welcome"]},"unlisted":false,"prevItem":{"title":"Verification of the use of data-encoding","permalink":"/blog/2021/10/27/verification-data-encoding"}},"content":"Welcome to the blog of [Formal Land](/). Here we will post various updates about the work we are doing."}]}')}}]); \ No newline at end of file diff --git a/assets/js/b2f554cd.e8f027dd.js b/assets/js/b2f554cd.e8f027dd.js new file mode 100644 index 00000000..5161caf9 --- /dev/null +++ b/assets/js/b2f554cd.e8f027dd.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5894],{6042:e=>{e.exports=JSON.parse('{"blogPosts":[{"id":"/2024/06/05/formal-verification-for-software-correctness","metadata":{"permalink":"/blog/2024/06/05/formal-verification-for-software-correctness","source":"@site/blog/2024-06-05-formal-verification-for-software-correctness.md","title":"\ud83e\udd84 Software correctness from first principles","description":"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.","date":"2024-06-05T00:00:00.000Z","formattedDate":"June 5, 2024","tags":[{"label":"formal verification","permalink":"/blog/tags/formal-verification"},{"label":"software correctness","permalink":"/blog/tags/software-correctness"},{"label":"first principles","permalink":"/blog/tags/first-principles"},{"label":"example","permalink":"/blog/tags/example"},{"label":"Python","permalink":"/blog/tags/python"}],"readingTime":7.265,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd84 Software correctness from first principles","tags":["formal verification","software correctness","first principles","example","Python"],"authors":[]},"unlisted":false,"nextItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/software-correctness-from-first-principles"}},"content":"**Formal verification** is a technique to verify the absence of bugs in a program by reasoning from **first principles**. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.\\n\\nWe will present this idea in detail and illustrate how it works for a very simple example.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Use of formal verification\\n\\nWe typically use formal verification for critical applications, where either:\\n\\n- life is at stake, like in the case of trains, airplanes, medical devices, or\\n- money is at stake, like in the case of financial applications.\\n\\nWith formal verification, in theory, **we can guarantee that the software will never fail**, as we can check **all possible cases** for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions.\\n\\nIn this research paper [Finding and Understanding Bugs in C Compilers](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf), no bugs were found in the middle-end of the formally verified [CompCert](https://en.wikipedia.org/wiki/CompCert) C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!\\n\\n## Definition of programming languages\\n\\nTo be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the `if` statement is [defined in the reference manual](https://docs.python.org/3/reference/compound_stmts.html#if) by:\\n\\n```python\\nif_stmt ::= \\"if\\" assignment_expression \\":\\" suite\\n (\\"elif\\" assignment_expression \\":\\" suite)*\\n [\\"else\\" \\":\\" suite]\\n```\\n> It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed.\\n\\nThis means that the Python code:\\n\\n```python\\nif condition:\\n a\\nelse:\\n b\\n```\\n\\nwill execute `a` when the `condition` is true, and `b` otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...).\\n\\nTo make these rules more manageable, we generally split them into two parts:\\n\\n- The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the [grammar](https://docs.python.org/3/reference/grammar.html).\\n- The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the `if` statement.\\n\\nIn formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating \\"syntax errors\\" in case of ill-formed programs.\\n\\n## Example to verify\\n\\nWe consider this short Python example of a function returning the maximum number in a list:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n for x in l:\\n if x > m:\\n m = x\\n return m\\n```\\n\\nWe assume that the list `l` is not empty and only contains integers. If we run it on a few examples:\\n\\n```python\\nmy_max([1, 2, 3]) # => 3\\nmy_max([3, 2, 1]) # => 3\\nmy_max([1, 3, 2]) # => 3\\n```\\n\\nit always returns `3`, the biggest number in the list! But can we make sure this is always the case?\\n\\nWe can certainly not run `my_max` on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning.\\n\\n## Formal verification\\n\\nHere is a general specification that we give of the `my_max` function above:\\n\\n```python\\nforall (index : int) (l : list[int]),\\n 0 \u2264 index < len(l) \u21d2\\n l[index] \u2264 my_max(l)\\n```\\n\\nIt says that for all integer `index` and list of integers `l`, if the index is valid (between `0` and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute.\\n\\nTo verify this property for all possible list `l`, we reason by induction. A non-empty list is either:\\n\\n- a list with one element, where the maximum is the only element, or\\n- a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list.\\n\\nAt the start of the code, we will always have:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n```\\n\\nwith `m` being equal to the first item of the list. Then:\\n\\n- If the list has only one element, we iterate only once in the `for` loop, with `x` equal to `l[0]`. The condition:\\n ```python\\n if x > m:\\n ```\\n is then equivalent to:\\n ```python\\n if l[0] > l[0]:\\n ```\\n and is always false. We then return `m = l[0]`, which is the only element of the list, and it verifies our property as:\\n ```python\\n l[0] \u2264 l[0]\\n ```\\n- If the list has at least two elements, we unroll the code execution of the `for` loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:\\n ```python\\n l[index] \u2264 m\\n ```\\n for all `index` between `0` and `len(l) - 2`. When we reach the last element, we have:\\n ```python\\n if x > m:\\n m = x\\n ```\\n with `x` being `l[len(l) - 1]`. There are two possibilities. Either *(i)* `x` is less than or equal to `m`, and we do not update `m`, or *(ii)* `x` is greater than `m`, and we update `m` to `x`. In both cases, the property is verified for the last element of the list, as:\\n 1. In the first case, `m` stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last `if` statement.\\n 2. In the second case, `m` is updated to `x`, which is the last element of the list and a greater value than the original `m`. Then it means that `m` is still larger or equal to all the elements of the list except the last one, being larger that the original `m`, and larger or equal to the last one as it is in fact equals to the last one.\\n\\nWe have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code.\\n\\nIn practice, with formal verification, the reasoning above is done in a proof assistance such as [Coq](https://coq.inria.fr/) to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details.\\n\\n## Completing the property\\n\\nAn additional property that we did not verify is:\\n\\n```python\\nforall (l : list[int]),\\n exists (index : int),\\n 0 \u2264 index < len(l) and\\n l[index] = my_max(l)\\n```\\n\\nIt says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise.\\n\\n:::info For more\\n\\nIf you want to go into more details for the formal verification of Python programs, you can look at our [coq-of-python](https://github.com/formal-land/coq-of-python) project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for [Rust](https://github.com/formal-land/coq-of-rust) and other languages like [OCaml](https://github.com/formal-land/coq-of-ocaml). Contact us at [contact@formal.land](mailto:contact@formal.land) to discuss!\\n\\n:::\\n\\n## Conclusion\\n\\nWe have presented here the idea of **formal verification**, a technique to verify the absence of bugs in a program by reasoning from **first principles**. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct **for all possible lists of integers**.\\n\\nWe will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and to tell us what subjects you want to see covered!"},{"id":"/2024/06/05/software-correctness-from-first-principles","metadata":{"permalink":"/blog/2024/06/05/software-correctness-from-first-principles","source":"@site/blog/2024-06-05-software-correctness-from-first-principles.md","title":"\ud83e\udd84 Software correctness from first principles","description":"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.","date":"2024-06-05T00:00:00.000Z","formattedDate":"June 5, 2024","tags":[{"label":"formal verification","permalink":"/blog/tags/formal-verification"},{"label":"software correctness","permalink":"/blog/tags/software-correctness"},{"label":"first principles","permalink":"/blog/tags/first-principles"},{"label":"example","permalink":"/blog/tags/example"},{"label":"Python","permalink":"/blog/tags/python"}],"readingTime":7.26,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd84 Software correctness from first principles","tags":["formal verification","software correctness","first principles","example","Python"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/formal-verification-for-software-correctness"},"nextItem":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace"}},"content":"**Formal verification** is a technique to verify the absence of bugs in a program by reasoning from **first principles**. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.\\n\\nWe will present this idea in detail and illustrate how it works for a very simple example.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Use of formal verification\\n\\nWe typically use formal verification for critical applications, where either:\\n\\n- life is at stake, like in the case of trains, airplanes, medical devices, or\\n- money is at stake, like in the case of financial applications.\\n\\nWith formal verification, in theory, **we can guarantee that the software will never fail**, as we can check **all possible cases** for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions.\\n\\nIn this research paper [Finding and Understanding Bugs in C Compilers](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf), no bugs were found in the middle-end of the formally verified [CompCert](https://en.wikipedia.org/wiki/CompCert) C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!\\n\\n## Definition of programming languages\\n\\nTo be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the `if` statement is [defined in the reference manual](https://docs.python.org/3/reference/compound_stmts.html#if) by:\\n\\n```python\\nif_stmt ::= \\"if\\" assignment_expression \\":\\" suite\\n (\\"elif\\" assignment_expression \\":\\" suite)*\\n [\\"else\\" \\":\\" suite]\\n```\\n> It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed.\\n\\nThis means that the Python code:\\n\\n```python\\nif condition:\\n a\\nelse:\\n b\\n```\\n\\nwill execute `a` when the `condition` is true, and `b` otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...).\\n\\nTo make these rules more manageable, we generally split them into two parts:\\n\\n- The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the [grammar](https://docs.python.org/3/reference/grammar.html).\\n- The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the `if` statement.\\n\\nIn formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating \\"syntax errors\\" in case of ill-formed programs.\\n\\n## Example to verify\\n\\nWe consider this short Python example of a function returning the maximum number in a list:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n for x in l:\\n if x > m:\\n m = x\\n return m\\n```\\n\\nWe assume that the list `l` is not empty and only contains integers. If we run it on a few examples:\\n\\n```python\\nmy_max([1, 2, 3]) # => 3\\nmy_max([3, 2, 1]) # => 3\\nmy_max([1, 3, 2]) # => 3\\n```\\n\\nit always returns `3`, the biggest number in the list! But can we make sure this is always the case?\\n\\nWe can certainly not run `my_max` on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning.\\n\\n## Formal verification\\n\\nHere is a general specification that we give of the `my_max` function above:\\n\\n```python\\nforall (index : int) (l : list[int]),\\n 0 \u2264 index < len(l) \u21d2\\n l[index] \u2264 my_max(l)\\n```\\n\\nIt says that for all integer `index` and list of integers `l`, if the index is valid (between `0` and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute.\\n\\nTo verify this property for all possible list `l`, we reason by induction. A non-empty list is either:\\n\\n- a list with one element, where the maximum is the only element, or\\n- a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list.\\n\\nAt the start of the code, we will always have:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n```\\n\\nwith `m` being equal to the first item of the list. Then:\\n\\n- If the list has only one element, we iterate only once in the `for` loop, with `x` equal to `l[0]`. The condition:\\n ```python\\n if x > m:\\n ```\\n is then equivalent to:\\n ```python\\n if l[0] > l[0]:\\n ```\\n and is always false. We then return `m = l[0]`, which is the only element of the list, and it verifies our property as:\\n ```python\\n l[0] \u2264 l[0]\\n ```\\n- If the list has at least two elements, we unroll the code execution of the `for` loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:\\n ```python\\n l[index] \u2264 m\\n ```\\n for all `index` between `0` and `len(l) - 2`. When we reach the last element, we have:\\n ```python\\n if x > m:\\n m = x\\n ```\\n with `x` being `l[len(l) - 1]`. There are two possibilities. Either *(i)* `x` is less than or equal to `m`, and we do not update `m`, or *(ii)* `x` is greater than `m`, and we update `m` to `x`. In both cases, the property is verified for the last element of the list, as:\\n 1. In the first case, `m` stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last `if` statement.\\n 2. In the second case, `m` is updated to `x`, which is the last element of the list and a greater value than the original `m`. Then it means that `m` is still larger or equal to all the elements of the list except the last one, being larger that the original `m`, and larger or equal to the last one as it is in fact equals to the last one.\\n\\nWe have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code.\\n\\nIn practice, with formal verification, the reasoning above is done in a proof assistance such as [Coq](https://coq.inria.fr/) to help making sure that we did not forget any case and add automation for most simple cases. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, or some third-party person to check it without reading all the details.\\n\\n## Completing the property\\n\\nAn additional property that we did not verify is:\\n\\n```python\\nforall (l : list[int]),\\n exists (index : int),\\n 0 \u2264 index < len(l) and\\n l[index] = my_max(l)\\n```\\n\\nIt says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise.\\n\\n:::info Contact\\n\\nIf you want to go into more details for the formal verification of Python programs, you can look at our [coq-of-python](https://github.com/formal-land/coq-of-python) project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for [Rust](https://github.com/formal-land/coq-of-rust) and other languages like [OCaml](https://github.com/formal-land/coq-of-ocaml). Contact us at [contact@formal.land](mailto:contact@formal.land) to discuss!\\n\\n:::\\n\\n## Conclusion\\n\\nWe have presented here the idea of **formal verification**, a technique to verify the absence of bugs in a program by reasoning from **first principles**. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct **for all possible lists of integers**.\\n\\nWe will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"},{"id":"/2024/05/22/translation-of-python-code-simulations-from-trace","metadata":{"permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace","source":"@site/blog/2024-05-22-translation-of-python-code-simulations-from-trace.md","title":"\ud83d\udc0d Simulation of Python code from traces in Coq","description":"In order to formally verify Python code in Coq our approach is the following:","date":"2024-05-22T00:00:00.000Z","formattedDate":"May 22, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"},{"label":"simulation","permalink":"/blog/tags/simulation"},{"label":"trace","permalink":"/blog/tags/trace"}],"readingTime":8.59,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum","simulation","trace"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/software-correctness-from-first-principles"},"nextItem":{"title":"\ud83d\udc0d Simulation of Python code in Coq","permalink":"/blog/2024/05/14/translation-of-python-code-simulations"}},"content":"In order to formally verify Python code in Coq our approach is the following:\\n\\n1. Import Python code in Coq by running [coq-of-python](https://github.com/formal-land/coq-of-python).\\n2. Write a purely functional simulation in Coq of the code.\\n3. Show that this simulation is equivalent to the translation.\\n4. Verify the simulation.\\n\\nWe will show in this article how we can merge the steps 2. and 3. to save time in the verification process. We do so by relying on the proof mode of Coq and unification.\\n\\nOur mid-term goal is to formally specify the [Ethereum Virtual Machine](https://ethereum.org/en/developers/docs/evm/) (EVM) and prove that this specification is correct according to [reference implementation of the EVM](https://github.com/ethereum/execution-specs) in Python. This would ensure that it is always up-to-date and exhaustive. The code of this project is open-source and available on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python).\\n\\n\x3c!-- truncate --\x3e\\n\\n
\\n ![Python at work](2024-05-22/python.webp)\\n
\\n\\n## Our Python\'s monad \ud83d\udc0d\\n\\nWe put the Python code that we import in Coq in a monad `M` to represent all the features that are hard to express in Coq, mainly the side effects. This monad is a combination of two levels:\\n\\n- `LowM` for the side effects except the control flow.\\n- `M` that adds an error monad on top of `LowM` to handle the control flow (exceptions, `break` instruction, ...).\\n\\n### LowM\\n\\nHere is the definition of the `LowM` monad in [CoqOfPython.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/CoqOfPython.v):\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set -> Set :=\\n | StateAlloc (object : Object.t Value.t) : t (Pointer.t Value.t)\\n | StateRead (mutable : Pointer.Mutable.t Value.t) : t (Object.t Value.t)\\n | StateWrite (mutable : Pointer.Mutable.t Value.t) (update : Object.t Value.t) : t unit\\n | GetInGlobals (globals : Globals.t) (name : string) : t Value.t.\\nEnd Primitive.\\n\\nModule LowM.\\n Inductive t (A : Set) : Set :=\\n | Pure (a : A)\\n | CallPrimitive {B : Set} (primitive : Primitive.t B) (k : B -> t A)\\n | CallClosure {B : Set} (closure : Data.t Value.t) (args kwargs : Value.t) (k : B -> t A)\\n | Impossible.\\n Arguments Pure {_}.\\n Arguments CallPrimitive {_ _}.\\n Arguments CallClosure {_ _}.\\n Arguments Impossible {_}.\\n\\n Fixpoint bind {A B : Set} (e1 : t A) (e2 : A -> t B) : t B :=\\n match e1 with\\n | Pure a => e2 a\\n | CallPrimitive primitive k => CallPrimitive primitive (fun v => bind (k v) e2)\\n | CallClosure closure args kwargs k => CallClosure closure args kwargs (fun a => bind (k a) e2)\\n | Impossible => Impossible\\n end.\\nEnd LowM.\\n```\\n\\nThis is a monad defined by continuation (the variable `k`):\\n\\n- We terminate a computation with the primitive `Pure` and some result `a`, that can be any purely functional expression.\\n- We can call some primitives grouped in `Primitive.t` that are side effects:\\n - `StateAlloc` to allocate a new object in the memory,\\n - `StateRead` to read an object from the memory,\\n - `StateWrite` to write an object in the memory,\\n - `GetInGlobals` to read a global variable, doing name resolution. This is a side effects as function definitions in Python do not need to be ordered.\\n- We can call a closure (an anonymous function) with `CallClosure`. This is required for termination, as we cannot define an eval function on the type of Python values since some do not terminate like the [\u03a9 expression](https://medium.com/@dkeout/why-you-must-actually-understand-the-%CF%89-and-y-combinators-c9204241da7a). See our previous post [Translation of Python code to Coq](/blog/2024/05/10/translation-of-python-code) for our definition of Python values. The combinator `CallClosure` is also very convenient to modularize our proofs: we reason on each closure independently.\\n- We can mark a code path as unreachable with `Impossible`.\\n\\n### M\\n\\nThe final monad `M` is defined as:\\n\\n```coq\\nDefinition M : Set :=\\n LowM.t (Value.t + Exception.t).\\n```\\n\\nIt has no parameters as Python is untyped, so all expressions have the same result type:\\n\\n- either a success value of type `Value.t`,\\n- or an exception of type `Exception.t`, with some special cases to represent a `return`, a `break`, or a `continue` instruction.\\n\\nWe define the monadic bind of `M` like for the error monad:\\n\\n```coq\\nDefinition bind (e1 : M) (e2 : Value.t -> M) : M :=\\n LowM.bind e1 (fun v => match v with\\n | inl v => e2 v\\n | inr e => LowM.Pure (inr e)\\n end).\\n```\\n\\n## Traces \ud83d\udc3e\\n\\nWe define our semantics of a computation `e` of type `M` in [simulations/proofs/CoqOfPython.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/simulations/proofs/CoqOfPython.v) with the predicate:\\n\\n```coq\\n{{ stack, heap | e \u21d3 to_value | P_stack, P_heap }}\\n```\\n\\nthat we call a _run_ or a _trace_, saying that:\\n\\n- starting from the initial state `stack`, `heap`,\\n- the computation `e` terminates with a value,\\n- that is in the image of the function `to_value`,\\n- and with a final stack and heap that satisfy the predicates `P_stack` and `P_heap`.\\n\\nNote that we do not explicit the resulting value and memory state of a computation in this predicate. We only say that it exists and verifies a few properties, that are here for compositionality. We have a purely functional function `evaluate` that can derive the result of a run of a computation:\\n\\n```coq\\nevaluate :\\n forall `{Heap.Trait} {A B : Set}\\n {stack : Stack.t} {heap : Heap} {e : LowM.t B}\\n {to_value : A -> B} {P_stack : Stack.t -> Prop} {P_heap : Heap -> Prop}\\n (run : {{ stack, heap | e \u21d3 to_value | P_stack, P_heap }}),\\n A * { stack : Stack.t | P_stack stack } * { heap : Heap | P_heap heap }\\n```\\n\\nThe function `evaluate` is defined in Coq by a `Fixpoint`. Its result is what we call a _simulation_, which is a purely functional definition equivalent to the orignal computation `e` from Python. It is equivalent by construction.\\n\\n## Building a trace \ud83d\udd28\\n\\nA trace is an inductive in `Set` that we can build with the following constructors:\\n\\n```coq\\nInductive t `{Heap.Trait} {A B : Set}\\n (stack : Stack.t) (heap : Heap)\\n (to_value : A -> B) (P_stack : Stack.t -> Prop) (P_heap : Heap -> Prop) :\\n LowM.t B -> Set :=\\n(* [Pure] primitive *)\\n| Pure\\n (result : A)\\n (result\' : B) :\\n result\' = to_value result ->\\n P_stack stack ->\\n P_heap heap ->\\n {{ stack, heap |\\n LowM.Pure result\' \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* [StateRead] primitive *)\\n| CallPrimitiveStateRead\\n (mutable : Pointer.Mutable.t Value.t)\\n (object : Object.t Value.t)\\n (k : Object.t Value.t -> LowM.t B) :\\n IsRead.t stack heap mutable object ->\\n {{ stack, heap |\\n k object \u21d3\\n to_value\\n | P_stack, P_heap }} ->\\n {{ stack, heap |\\n LowM.CallPrimitive (Primitive.StateRead mutable) k \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* [CallClosure] primitive *)\\n| CallClosure {C : Set}\\n (f : Value.t -> Value.t -> M)\\n (args kwargs : Value.t)\\n (to_value_inter : C -> Value.t + Exception.t)\\n (P_stack_inter : Stack.t -> Prop) (P_heap_inter : Heap -> Prop)\\n (k : Value.t + Exception.t -> LowM.t B) :\\n let closure := Data.Closure f in\\n {{ stack, heap |\\n f args kwargs \u21d3\\n to_value_inter\\n | P_stack_inter, P_heap_inter }} ->\\n (* We quantify over every possible values as we cannot compute the result of the closure here.\\n We only know that it exists and respects some constraints in this inductive definition. *)\\n (forall value_inter stack_inter heap_inter,\\n P_stack_inter stack_inter ->\\n P_heap_inter heap_inter ->\\n {{ stack_inter, heap_inter |\\n k (to_value_inter value_inter) \u21d3\\n to_value\\n | P_stack, P_heap }}\\n ) ->\\n {{ stack, heap |\\n LowM.CallClosure closure args kwargs k \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* ...cases for the other primitives of the monad... *)\\n```\\n\\n### Pure\\n\\nIn the `Pure` case we return the final result of the computation. We check the state fulfills the predicate `P_stack` and `P_heap`, and that the result is the image by the function `to_value` of some `result`.\\n\\n### CallPrimitiveStateRead\\n\\nTo read a value in memory, we rely on another predicate `IsRead` that checks if the `mutable` pointer is valid in the `stack` or `heap` and that the `object` is the value at this pointer. We then call the continuation `k` with this object. We have similar rules for allocating a new object in memory and writing at a pointer.\\n\\nNote that we parameterize all our semantics by `` `{Heap.Trait}`` that provides a specific `Heap` type with read and write primitives. We can choose the implementation of the memory model that we want to use in our simulations in order to simplify the reasoning.\\n\\n### CallClosure\\n\\nTo call a closure, we first evaluate the closure with the arguments and keyword arguments. We then call the continuation `k` with the result of the closure. We quantify over all possible results of the closure, as we cannot compute it here. This would require to be able to define `Fixpoint` together with `Inductive`, which is not possible in Coq. So we only know that the result of the closure exists, and can use the constraints on its result (the function `to_value` and the predicates `P_stack_inter` and `P_heap_inter`) to build a run of the continuation.\\n\\nThe other constructors are not presented here but are similar to the above. We will also add a monadic primitive for loops with the following idea: we show that a loop terminates by building a trace, as traces are `Inductive` so must be finite. We have no rules for the `Impossible` case so that building the trace of a computation also shows that the `Impossible` calls are in unreachable paths.\\n\\n## Example \ud83d\udd0d\\n\\nWe have applied these technique to a small code example with allocation, memory read, and closure call primitives. We were able to show that the resulting simulation obtained by running `evaluate` on the trace is equal to a simulation written by hand. The proof was just the tactic `reflexivity`. We believe that we can automate most of the tactics used to build a run, except for the allocations were the user needs to make a choice (immediate, stack, or heap allocation, which address, ...).\\n\\nTo continue our experiments we now need to complete our semantics of Python, especially to take into account method and operator calls.\\n\\n## Conclusion\\n\\nWe have presented an alternative way to build simulations of imperative Python code in purely functional Coq code. The idea is to enable faster reasoning over Python code by removing the need to build explicit simulations. We plan to port this technique to other tools like [coq-of-rust](https://github.com/formal-land/coq-of-rust) as well.\\n\\nTo see what we can do for you talk with us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7. For our previous projects, see our [formal verification of the Tezos\' L1](https://formal-land.gitlab.io/coq-tezos-of-ocaml/)!"},{"id":"/2024/05/14/translation-of-python-code-simulations","metadata":{"permalink":"/blog/2024/05/14/translation-of-python-code-simulations","source":"@site/blog/2024-05-14-translation-of-python-code-simulations.md","title":"\ud83d\udc0d Simulation of Python code in Coq","description":"We are continuing to specify the Ethereum Virtual Machine (EVM) in the formal verification language Coq. We are working from the automatic translation in Coq of the reference implementation of the EVM, which is written in the language Python.","date":"2024-05-14T00:00:00.000Z","formattedDate":"May 14, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"}],"readingTime":6.63,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Simulation of Python code in Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace"},"nextItem":{"title":"\ud83d\udc0d Translation of Python code to Coq","permalink":"/blog/2024/05/10/translation-of-python-code"}},"content":"We are continuing to specify the [Ethereum Virtual Machine](https://ethereum.org/en/developers/docs/evm/) (EVM) in the formal verification language [Coq](https://coq.inria.fr/). We are working from the [automatic translation in Coq](https://github.com/formal-land/coq-of-python/tree/main/CoqOfPython/ethereum) of the [reference implementation of the EVM](https://github.com/ethereum/execution-specs), which is written in the language [Python](https://www.python.org/).\\n\\nIn this article, we will see how we specify the EVM in Coq by writing an interpreter that closely mimics the behavior of the Python code. We call that implementation a _simulation_ as it aims to reproduce the behavior of the Python code, the reference.\\n\\nIn contrast to the automatic translation from Python, the simulation is a manual translation written in idiomatic Coq. We expect it to be ten times smaller in lines compared to the automatic translation, and of about the same size as the Python code. This is because the automatic translation needs to encode all the Python specific features in Coq, like variable mutations and the class system.\\n\\nIn the following article, we will show how we can prove that the simulation is correct, meaning that it behaves exactly as the automatic translation.\\n\\nThe code of this project is open-source and available on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python). This work follows a call from [Vitalik Buterin](https://en.wikipedia.org/wiki/Vitalik_Buterin) for more formal verification of the Ethereum\'s code.\\n\\n\x3c!-- truncate --\x3e\\n\\n
\\n ![Python writing simulations](2024-05-14/python_simulation.webp)\\n
\\n\\n## The `add` function \ud83e\uddee\\n\\nWe focus on a simulation for the `add` function in [vm/instructions/arithmetic.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/paris/vm/instructions/arithmetic.py) that implements the addition primitive of the EVM. The Python code is:\\n\\n```python\\ndef add(evm: Evm) -> None:\\n \\"\\"\\"\\n Adds the top two elements of the stack together, and pushes the result back\\n on the stack.\\n\\n Parameters\\n ----------\\n evm :\\n The current EVM frame.\\n\\n \\"\\"\\"\\n # STACK\\n x = pop(evm.stack)\\n y = pop(evm.stack)\\n\\n # GAS\\n charge_gas(evm, GAS_VERY_LOW)\\n\\n # OPERATION\\n result = x.wrapping_add(y)\\n\\n push(evm.stack, result)\\n\\n # PROGRAM COUNTER\\n evm.pc += 1\\n```\\n\\nMost of the functions of the interpreter are written in this style. They take the global state of the interpreter, called `Evm` as input, and mutate it with the effect of the current instruction.\\n\\nThe `Evm` structure is defined as:\\n\\n```python\\n@dataclass\\nclass Evm:\\n \\"\\"\\"The internal state of the virtual machine.\\"\\"\\"\\n\\n pc: Uint\\n stack: List[U256]\\n memory: bytearray\\n code: Bytes\\n gas_left: Uint\\n env: Environment\\n valid_jump_destinations: Set[Uint]\\n logs: Tuple[Log, ...]\\n refund_counter: int\\n running: bool\\n message: Message\\n output: Bytes\\n accounts_to_delete: Set[Address]\\n touched_accounts: Set[Address]\\n return_data: Bytes\\n error: Optional[Exception]\\n accessed_addresses: Set[Address]\\n accessed_storage_keys: Set[Tuple[Address, Bytes32]]\\n```\\n\\nIt contains the current instruction pointer `pc`, the stack of the EVM, the memory, the code, the gas left, ...\\n\\nAs the EVM is a stack-based machine, the addition function does the following:\\n\\n1. It pops the two top elements of the stack `x` and `y`,\\n2. It charges a very low amount of gas,\\n3. It computes the result of the addition `result = x + y`,\\n4. It pushes the result back on the stack,\\n5. It increments the program counter `pc`.\\n\\nNote that all these operations might fail and raise an exception, for example,if the stack is empty when we pop `x`and `y` at the beginning.\\n\\n## Monad for the simulations \ud83e\uddea\\n\\nThe main side-effects that we want to integrate into the Coq simulations are:\\n\\n- the mutation of the global state `Evm`,\\n- the raising of exceptions.\\n\\nFor that, we use a state and error monad `MS?`:\\n\\n```coq\\nModule StateError.\\n Definition t (State Error A : Set) : Set :=\\n State -> (A + Error) * State.\\n\\n Definition return_ {State Error A : Set}\\n (value : A) :\\n t State Error A :=\\n fun state => (inl value, state).\\n\\n Definition bind {State Error A B : Set}\\n (value : t State Error A)\\n (f : A -> t State Error B) :\\n t State Error B :=\\n fun state =>\\n let (value, state) := value state in\\n match value with\\n | inl value => f value state\\n | inr error => (inr error, state)\\n end.\\nEnd StateError.\\n\\nNotation \\"MS?\\" := StateError.t.\\n```\\n\\nWe parametrize it by an equivalent definition in Coq of the type `Evm` and the type of exceptions that we might raise.\\n\\nIn Python the exceptions are a class that is extended as needed to add new kinds of exceptions. We use a closed sum type in Coq to represent the all possible exceptions that might happen in the EVM interpreter.\\n\\nFor the `Evm` state, some functions might actually only modify a part of it. For example, the `pop` function only modifies the `stack` field. We use a mechanism of [lens](https://medium.com/javascript-scene/lenses-b85976cb0534) to specialize the state monad to only modify a part of the state. For example, the `pop` function has the type:\\n\\n```coq\\npop : MS? (list U256.t) Exception.t U256.t\\n```\\n\\nwhere `list U256.t` is the type of the stack, while the `add` function has type:\\n\\n```coq\\nadd : MS? Evm.t Exception.t unit\\n```\\n\\nWe define a lens for the stack in the `Evm` type with:\\n\\n```coq\\nModule Lens.\\n Record t (Big_A A : Set) : Set := {\\n read : Big_A -> A;\\n write : Big_A -> A -> Big_A\\n }.\\nEnd Lens.\\n\\nModule Evm.\\n Module Lens.\\n Definition stack : Lens.t Evm.t (list U256.t) := {|\\n Lens.read := (* ... *);\\n Lens.write := (* ... *);\\n |}.\\n```\\n\\nWe can then lift the `pop` function to be used in a context where the `Evm` state is modified with:\\n\\n```coq\\nletS? x := StateError.lift_lens Evm.Lens.stack pop in\\n```\\n\\n## Typing discipline \ud83d\udc6e\\n\\nWe keep in Coq all the type names from the Python source code. When a new class is created we create a new Coq type. When the class inherits from another one, we add a field in the Coq type to represent the parent class. Thus we work by composition rather than inheritance.\\n\\nHere is an example of the primitive types defined in [base_types.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/base_types.py):\\n\\n```python\\nclass FixedUint(int):\\n MAX_VALUE: ClassVar[\\"FixedUint\\"]\\n\\n # ...\\n\\n def __add__(self: T, right: int) -> T:\\n # ...\\n\\nclass U256(FixedUint):\\n MAX_VALUE = 2**256 - 1\\n\\n # ...\\n```\\n\\nWe simulate it by:\\n\\n```coq\\nModule FixedUint.\\n Record t : Set := {\\n MAX_VALUE : Z;\\n value : Z;\\n }.\\n\\n Definition __add__ (self right_ : t) : M? Exception.t t :=\\n (* ... *).\\nEnd FixedUint.\\n\\nModule U256.\\n Inductive t : Set :=\\n | Make (value : FixedUint.t).\\n\\n Definition of_Z (value : Z) : t :=\\n Make {|\\n FixedUint.MAX_VALUE := 2^256 - 1;\\n FixedUint.value := value;\\n |}.\\n\\n (* ... *)\\nEnd U256.\\n```\\n\\nFor the imports, that are generally written with an explicit list of names:\\n\\n```python\\nfrom ethereum.base_types import U255_CEIL_VALUE, U256, U256_CEIL_VALUE, Uint\\n```\\n\\nwe follow the same pattern in Coq:\\n\\n```coq\\nRequire ethereum.simulations.base_types.\\nDefinition U255_CEIL_VALUE := base_types.U255_CEIL_VALUE.\\nModule U256 := base_types.U256.\\nDefinition U256_CEIL_VALUE := base_types.U256_CEIL_VALUE.\\nModule Uint := base_types.Uint.\\n```\\n\\nThis is a bit more verbose than the usual way in Coq to import a module, but it makes the translation more straightforward.\\n\\n## Final simulation \ud83e\udeb6\\n\\nFinally, our Coq simulation of the `add` function is the following:\\n\\n```coq\\nDefinition add : MS? Evm.t Exception.t unit :=\\n (* STACK *)\\n letS? x := StateError.lift_lens Evm.Lens.stack pop in\\n letS? y := StateError.lift_lens Evm.Lens.stack pop in\\n\\n (* GAS *)\\n letS? _ := charge_gas GAS_VERY_LOW in\\n\\n (* OPERATION *)\\n let result := U256.wrapping_add x y in\\n\\n letS? _ := StateError.lift_lens Evm.Lens.stack (push result) in\\n\\n (* PROGRAM COUNTER *)\\n letS? _ := StateError.lift_lens Evm.Lens.pc (fun pc =>\\n (inl tt, Uint.__add__ pc (Uint.Make 1))) in\\n\\n returnS? tt.\\n```\\n\\nWe believe that it has a size and readability close to the original Python code. You can look at this definition in [vm/instructions/simulations/arithmetic.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/ethereum/paris/vm/instructions/simulations/arithmetic.v). As a reference, the automatic translation is 65 lines long and in [vm/instructions/arithmetic.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/ethereum/paris/vm/instructions/arithmetic.v).\\n\\n## Conclusion\\n\\nWe have seen how to write a simulation for one example of a Python function. We now need to do it for the rest of the code of the interpreter. We will also see in a following article how to prove that the simulation behaves as the automatic translation of the Python code in Coq.\\n\\nFor our formal verification services, reach us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7! To know more about what we have done, see [our previous project](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the verification of the L1 of Tezos."},{"id":"/2024/05/10/translation-of-python-code","metadata":{"permalink":"/blog/2024/05/10/translation-of-python-code","source":"@site/blog/2024-05-10-translation-of-python-code.md","title":"\ud83d\udc0d Translation of Python code to Coq","description":"We are starting to work on a new product, coq-of-python. The idea of this tool is, as you can guess, to translate Python code to the proof system Coq.","date":"2024-05-10T00:00:00.000Z","formattedDate":"May 10, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"}],"readingTime":10.445,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Translation of Python code to Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Simulation of Python code in Coq","permalink":"/blog/2024/05/14/translation-of-python-code-simulations"},"nextItem":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","permalink":"/blog/2024/04/26/translation-core-alloc-crates"}},"content":"We are starting to work on a new product, [coq-of-python](https://github.com/formal-land/coq-of-python). The idea of this tool is, as you can guess, to translate Python code to the [proof system Coq](https://coq.inria.fr/).\\n\\nWe want to import specifications written in Python to a formal system like Coq. In particular, we are interested in the [reference specification](https://github.com/ethereum/execution-specs) of [Ethereum](https://ethereum.org/), which describes how [EVM smart contracts](https://ethereum.org/en/developers/docs/evm/) run. Then, we will be able to use this specification to either formally verify the various implementations of the EVM or smart contracts.\\n\\nAll this effort follows [a Tweet](https://twitter.com/VitalikButerin/status/1759369749887332577) from [Vitalik Buterin](https://en.wikipedia.org/wiki/Vitalik_Buterin) hoping for more formal verification of the Ethereum\'s code:\\n\\n> One application of AI that I am excited about is AI-assisted formal verification of code and bug finding.\\n>\\n> Right now ethereum\'s biggest technical risk probably is bugs in code, and anything that could significantly change the game on that would be amazing.\\n>\\n> — Vitalik Buterin\\n\\nWe will now describe the technical development of `coq-of-python`. For the curious, all the code is on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python).\\n\\n\x3c!-- truncate --\x3e\\n\\n
\\n ![Python with a rooster](2024-05-10/python_rooster.webp)\\n \x3c!--
A python with a rooster
--\x3e\\n
\\n\\n## Reading Python code \ud83d\udcd6\\n\\nA first step we need to do to translate Python code is to read it in a programmatic way. For simplicity and better integration, we chose to write `coq-of-python` in Python.\\n\\nWe use the [ast](https://docs.python.org/3/library/ast.html) module to parse the code and get an abstract syntax tree (AST) of the code. This is a tree representation of the code that we can manipulate in Python. We could have used other representations, such as the Python bytecode, but it seemed too low-level to be understandable by a human.\\n\\nGiven the path to a Python file, we get its AST with the following code:\\n\\n```python\\nimport ast\\n\\ndef read_python_file(path: str) -> ast.Module:\\n with open(path, \\"r\\") as file:\\n return ast.parse(file.read())\\n```\\n\\nThis code is very short, and we benefit from the general elegance of Python. There is no typing or advanced data types in Python, keeping the AST rather small. Here is an extract of it:\\n\\n```\\nexpr = BoolOp(boolop op, expr* values)\\n | NamedExpr(expr target, expr value)\\n | BinOp(expr left, operator op, expr right)\\n | UnaryOp(unaryop op, expr operand)\\n | Lambda(arguments args, expr body)\\n | IfExp(expr test, expr body, expr orelse)\\n | Dict(expr* keys, expr* values)\\n | Set(expr* elts)\\n | ListComp(expr elt, comprehension* generators)\\n | SetComp(expr elt, comprehension* generators)\\n | ... more cases ...\\n```\\n\\nAn expression is described as being of one of several kinds. For example, the application of a binary operator such as:\\n\\n```python\\n1 + 2\\n```\\n\\ncorresponds to the case `BinOp` with `1` as the `left` expression, `+` as the `op` operator, and `2` as the `right` expression.\\n\\n## Outputting Coq code \ud83d\udcdd\\n\\nWe translate each element of the Python\'s AST into a string of Coq code. We keep track of the current indentation level in order to present a nice output. Here is the code to translate the binary operator expressions:\\n\\n```python\\ndef generate_expr(indent, is_with_paren, node: ast.expr):\\n if isinstance(node, ast.BoolOp):\\n ...\\n elif isinstance(node, ast.BinOp):\\n return paren(\\n is_with_paren,\\n generate_operator(node.op) + \\" (|\\\\n\\" +\\n generate_indent(indent + 1) +\\n generate_expr(indent + 1, False, node.left) + \\",\\\\n\\" +\\n generate_indent(indent + 1) +\\n generate_expr(indent + 1, False, node.right) + \\"\\\\n\\" +\\n generate_indent(indent) + \\"|)\\"\\n )\\n elif ...\\n```\\n\\nWe have the current number of indentation levels in the `indent` variable. We use the flag `is_with_paren` to know whether we should add parenthesis around the current expression if it is the sub-expression of another one.\\n\\nWe apply the `node.op` operator on the two parameters `node.left` and `node.right`. For example, the translation of the Python code `1 + 2` will be:\\n\\n```coq\\nBinOp.add (|\\n Constant.int 1,\\n Constant.int 2\\n|)\\n```\\n\\nWe use a special notation `f (| x1, ..., xn |)` to represent a function application in a monadic context. In the next section, we explain why we need this notation.\\n\\n## Monad and values \ud83d\udd2e\\n\\nOne of the difficulties in translating some code to a language such as Coq is that Coq is purely functional. This means that a function can never modify a variable or raise an exception. The non-purely functional actions are called side-effects.\\n\\nTo solve this issue, we represent the side-effects of the Python code in a [monad]() in Coq. A monad is a special data structure representing the side-effects of a computation. We can chain monadic actions together to represent a sequence of side-effects.\\n\\nWe thus have two Coq types:\\n\\n- `Value.t` for the Python values (there is only one type for all values, as Python is a dynamically typed language),\\n- `M` for the monadic expressions.\\n\\nNote that we do not need to parametrize the monad by the type of the values, as we only have one type of value.\\n\\n### Values\\n\\nAccording to the reference manual of Python on the [data model](https://docs.python.org/3/reference/datamodel.html):\\n\\n> All data in a Python program is represented by objects or by relations between objects.\\n\\n> Every object has an identity, a type and a value. An object\u2019s identity never changes once it has been created; you may think of it as the object\u2019s address in memory.\\n\\n> Like its identity, an object\u2019s type is also unchangeable.\\n\\n> The value of some objects can change. Objects whose value can change are said to be mutable; objects whose value is unchangeable once they are created are called immutable.\\n\\nBy following this description, we propose this formalization for the values:\\n\\n```coq\\nModule Data.\\n Inductive t (Value : Set) : Set :=\\n | Ellipsis\\n | Bool (b : bool)\\n | Integer (z : Z)\\n | Tuple (items : list Value)\\n (* ... various other primitive types like lists, ... *)\\n | Closure {Value M : Set} (f : Value -> Value -> M)\\n | Klass {Value M : Set}\\n (bases : list (string * string))\\n (class_methods : list (string * (Value -> Value -> M)))\\n (methods : list (string * (Value -> Value -> M))).\\nEnd Data.\\n\\nModule Object.\\n Record t {Value : Set} : Set := {\\n internal : option (Data.t Value);\\n fields : list (string * Value);\\n }.\\nEnd Object.\\n\\nModule Pointer.\\n Inductive t (Value : Set) : Set :=\\n | Imm (data : Object.t Value)\\n | Mutable {Address A : Set}\\n (address : Address)\\n (to_object : A -> Object.t Value).\\nEnd Pointer.\\n\\nModule Value.\\n Inductive t : Set :=\\n | Make (globals : string) (klass : string) (value : Pointer.t t).\\nEnd Value.\\n```\\n\\nWe describe a `Value.t` by:\\n\\n- its type, given by a class name `klass` and a module name `globals` from which the class is defined,\\n- its value, given by a pointer to an object.\\n\\nA `Pointer.t` is either an immutable object `Imm` or a mutable object `Mutable` with an address and a function to get the object from what is stored in the memory. This function `to_object` is required as we plan to allow the user to provide its own custom memory model.\\n\\nAn `Object.t` has a list of named fields that we can populate in the `__init__` method of a class. It also has a special `internal` field that we can use to store special kinds of data, like primitive values.\\n\\nIn `Data.t`, we list the various primitive values that we use to define the primitive types of the Python language. We have:\\n\\n- atomic values such as booleans, integers, strings, ...\\n- composite values such as tuples, lists, dictionaries, ...\\n- closures with a function that takes the two arguments `*args` and `**kwargs` and returns a monadic value,\\n- classes with their bases, class methods, and instance methods.\\n\\n### Monad\\n\\nFor now, we axiomatize the monad `M`:\\n\\n```coq\\nParameter M : Set.\\n```\\n\\nWe will see later how to define it, probably by taking some inspiration from our monad from our similar project [coq-of-rust](https://github.com/formal-land/coq-of-rust).\\n\\nTo make the monadic code less heavy, we use a notation inspired by the `async/await` notation of many languages. We believe it to be less heavy than the monadic notation of languages like [Haskell](https://www.haskell.org/). We note:\\n\\n```coq\\nf (| x1, ..., xn |)\\n```\\n\\nto call a function `f` of type:\\n\\n```coq\\nValue.t -> ... -> Value.t -> M\\n```\\n\\nwith the arguments `x1`, ..., `xn` of type `Value.t` and binds its result to the current continuation in the context of the tactic `ltac:(M.monadic ...)`. See our blog post [Monadic notation for the Rust translation](/blog/2024/04/03/monadic-notation-for-rust-translation) for more information.\\n\\nIn summary:\\n\\n- `f (| x1, ..., xn |)` is like `await`,\\n- `ltac:(M.monadic ...)` is like `async`.\\n\\n## Handling of the names \ud83c\udff7\ufe0f\\n\\nNow we talk about how we handle the variable names and link them to their definitions. In the reference manual of Python, the part [Execution model](https://docs.python.org/3/reference/executionmodel.html) gives some information.\\n\\nFor now, we distinguish between two scopes, the global one (top-level definitions) and the local one for variables defined in a function. We might introduce a stack of local scopes to handle nested functions.\\n\\nWe name the global scope with a string, that is the path of the current file. Having absolute names helps us translating each file independently. The only file that a translated file requires is `CoqOfPython.CoqOfPython`, to have the definition of the values and the monad.\\n\\nTo translate `import` statements, we use assertions:\\n\\n```coq\\nAxiom ethereum_crypto_imports_elliptic_curve :\\n IsImported globals \\"ethereum.crypto\\" \\"elliptic_curve\\".\\nAxiom ethereum_crypto_imports_finite_field :\\n IsImported globals \\"ethereum.crypto\\" \\"finite_field\\".\\n```\\n\\nThis represents:\\n\\n```python\\nfrom . import elliptic_curve, finite_field\\n```\\n\\nIt means that in the current global scope `globals` we can use the name `\\"elliptic_curve\\"` from the other global scope `\\"ethereum.crypto\\"`.\\n\\nWe set the local scope at the entry of a function with the call:\\n\\n```coq\\nM.set_locals (| args, kwargs, [ \\"x1\\"; ...; \\"xn\\" ] |)\\n```\\n\\nfor a function whose parameter names are `x1`, ..., `xn`. For uniformity, we always group the function\'s parameters as `*args` and `**kwargs`. We do not yet handle the default values.\\n\\nWhen a user creates or updates a local variable `x` with a value `value`, we run:\\n\\n```coq\\nM.assign_local \\"x\\" value : M\\n```\\n\\nTo read a variable, we have a primitive:\\n\\n```coq\\nM.get_name : string -> string -> M\\n```\\n\\nIt takes as a parameter the name of the current global scope and the name of the variable the are reading. The local scope should be accessible from the monad. For now all these primitives are axiomatized.\\n\\n## Some numbers \ud83d\udcca\\n\\nThe code base that we analyze, the Python specification of Ethereum, contains _28,455 lines_ of Python, excluding comments. When we translate it to Coq we obtain _299,484 lines_. This is a roughly ten times increase.\\n\\nThe generated code completely compiles. For now, we avoid some complex Python expressions, like list comprehension, by generating a dummy expression instead. Having all the code that compiles will allow us to iterate and add support for more Python features with a simple check: making sure that all the code still compiles.\\n\\nAs an example, we translate the following function:\\n\\n```python\\ndef bnf2_to_bnf12(x: BNF2) -> BNF12:\\n \\"\\"\\"\\n Lift a field element in `BNF2` to `BNF12`.\\n \\"\\"\\"\\n return BNF12.from_int(x[0]) + BNF12.from_int(x[1]) * (\\n BNF12.i_plus_9 - BNF12.from_int(9)\\n )\\n```\\n\\nto the Coq code:\\n\\n```coq\\nDefinition bnf2_to_bnf12 : Value.t -> Value.t -> M :=\\n fun (args kwargs : Value.t) => ltac:(M.monadic (\\n let _ := M.set_locals (| args, kwargs, [ \\"x\\" ] |) in\\n let _ := Constant.str \\"\\n Lift a field element in `BNF2` to `BNF12`.\\n \\" in\\n let _ := M.return_ (|\\n BinOp.add (|\\n M.call (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"from_int\\" |),\\n make_list [\\n M.get_subscript (|\\n M.get_name (| globals, \\"x\\" |),\\n Constant.int 0\\n |)\\n ],\\n make_dict []\\n |),\\n BinOp.mult (|\\n M.call (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"from_int\\" |),\\n make_list [\\n M.get_subscript (|\\n M.get_name (| globals, \\"x\\" |),\\n Constant.int 1\\n |)\\n ],\\n make_dict []\\n |),\\n BinOp.sub (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"i_plus_9\\" |),\\n M.call (|\\n M.get_field (| M.get_name (| globals, \\"BNF12\\" |), \\"from_int\\" |),\\n make_list [\\n Constant.int 9\\n ],\\n make_dict []\\n |)\\n |)\\n |)\\n |)\\n |) in\\n M.pure Constant.None_)).\\n```\\n\\n## Conclusion\\n\\nWe continue working on the translation from Python to Coq, especially to now add a semantics to the translation. Our next goal is to have a version, written in idiomatic Coq, of the file [src/ethereum/paris/vm/instructions/arithmetic.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/paris/vm/instructions/arithmetic.py), and proven equal to the original code. This will open the door to making a Coq specification of the EVM that is always synchronized to the Python\'s version.\\n\\nFor our services, reach us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7! We want to ensure the blockchain\'s L1 and L2 are bug-free, thanks to a mathematical analysis of the code. See [our previous project](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the L1 of Tezos."},{"id":"/2024/04/26/translation-core-alloc-crates","metadata":{"permalink":"/blog/2024/04/26/translation-core-alloc-crates","source":"@site/blog/2024-04-26-translation-core-alloc-crates.md","title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","description":"We continue our work on formal verification of Rust programs with our tool coq-of-rust, to translate Rust code to the formal proof system Coq. One of the limitation we had was the handling of primitive constructs from the standard library of Rust, like Option::unwrapordefault or all other primitive functions. For each of these functions, we had to make a Coq definition to represent its behavior. This is both tedious and error prone.","date":"2024-04-26T00:00:00.000Z","formattedDate":"April 26, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"core","permalink":"/blog/tags/core"},{"label":"alloc","permalink":"/blog/tags/alloc"}],"readingTime":5.365,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","tags":["coq-of-rust","Rust","Coq","translation","core","alloc"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Translation of Python code to Coq","permalink":"/blog/2024/05/10/translation-of-python-code"},"nextItem":{"title":"\ud83e\udd80 Monadic notation for the Rust translation","permalink":"/blog/2024/04/03/monadic-notation-for-rust-translation"}},"content":"We continue our work on formal verification of [Rust](https://www.rust-lang.org/) programs with our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust), to translate Rust code to the formal proof system [Coq](https://coq.inria.fr/). One of the limitation we had was the handling of primitive constructs from the standard library of Rust, like [Option::unwrap_or_default](https://doc.rust-lang.org/core/option/enum.Option.html#method.unwrap_or_default) or all other primitive functions. For each of these functions, we had to make a Coq definition to represent its behavior. This is both tedious and error prone.\\n\\nTo solve this issue, we worked on the translation of the [core](https://doc.rust-lang.org/core/) and [alloc](https://doc.rust-lang.org/alloc/) crates of Rust using `coq-of-rust`. These are very large code bases, with a lot of unsafe or advanced Rust code. We present what we did to have a \\"best effort\\" translation of these crates. The resulting translation is in the following folders:\\n\\n- [CoqOfRust/alloc](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/alloc)\\n- [CoqOfRust/core](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/core)\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat \u260e\ufe0f!\\n\\n:::\\n\\n
\\n ![Crab with a pen](2024-04-26/crab-in-library.webp)\\n
A crab in a library
\\n
\\n\\n## Initial run \ud83d\udc25\\n\\nAn initial run of `coq-of-rust` on the `alloc` and `core` crates of Rust generated us two files of a few hundred thousands lines of Coq corresponding to the whole translation of these crates. This is a first good news, as it means the tool runs of these large code bases. However the generated Coq code does not compile, even if the errors are very rare (one every few thousands lines).\\n\\nTo get an idea, here is the size of the input Rust code as given by the `cloc` command:\\n\\n- `alloc`: 26,299 lines of Rust code\\n- `core`: 54,192 lines of Rust code\\n\\nGiven that this code uses macros that we expand in our translation, the actual size that we have to translate is even bigger.\\n\\n## Splitting the generated code \ud83e\ude93\\n\\nThe main change we made was to split the output generated by `coq-of-rust` with one file for each input Rust file. This is possible because our translation is insensitive to the order of definitions and context-free. So, even if there are typically cyclic dependencies between the files in Rust, something that is forbidden in Coq, we can still split them.\\n\\nWe get the following sizes as output:\\n\\n- `alloc`: 54 Coq files, 171,783 lines of Coq code\\n- `core`: 190 Coq files, 592,065 lines of Coq code\\n\\nThe advantages of having the code split are:\\n\\n- it is easier to read and navigate in the generated code\\n- it is easier to compile as we can parallelize the compilation\\n- it is easier to debug as we can focus on one file at a time\\n- it is easier to ignore files that do not compile\\n- it will be easier to maintain, as it is easier to follow the diff of a single file\\n\\n## Fixing some bugs \ud83d\udc1e\\n\\nWe had some bugs related to the collisions between module names. These can occur when we choose a name for the module for an `impl` block. We fixed these by adding more information in the module names to make them more unique, like the `where` clauses that were missing. For example, for the implementation of the `Default` trait for the `Mapping` type:\\n\\n```rust\\n#[derive(Default)]\\nstruct Mapping {\\n // ...\\n}\\n```\\n\\nwe were generating the following Coq code:\\n\\n```coq\\nModule Impl_core_default_Default_for_dns_Mapping_K_V.\\n (* ...trait implementation ... *)\\nEnd Impl_core_default_Default_for_dns_Mapping_K_V.\\n```\\n\\nWe now generate:\\n\\n```coq\\nModule Impl_core_default_Default_where_core_default_Default_K_where_core_default_Default_V_for_dns_Mapping_K_V.\\n (* ... *)\\n```\\n\\nwith a module name that includes the `where` clauses of the `impl` block, stating that both `K` and `V` should implement the `Default` trait.\\n\\nHere is the list of files that do not compile in Coq, as of today:\\n\\n- `alloc/boxed.v`\\n- `core/any.v`\\n- `core/array/mod.v`\\n- `core/cmp/bytewise.v`\\n- `core/error.v`\\n- `core/escape.v`\\n- `core/iter/adapters/flatten.v`\\n- `core/net/ip_addr.v`\\n\\nThis represents 4% of the files. Note that in the files that compile there are some unhandled Rust constructs that are axiomatized, so this does not give the whole picture of what we do not support.\\n\\n## Example \ud83d\udd0e\\n\\nHere is the source code of the `unwrap_or_default` method for the `Option` type:\\n\\n```rust\\npub fn unwrap_or_default(self) -> T\\nwhere\\n T: Default,\\n{\\n match self {\\n Some(x) => x,\\n None => T::default(),\\n }\\n}\\n```\\n\\nWe translate it to:\\n\\n```coq\\nDefinition unwrap_or_default (T : Ty.t) (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n let Self : Ty.t := Self T in\\n match \u03c4, \u03b1 with\\n | [], [ self ] =>\\n ltac:(M.monadic\\n (let self := M.alloc (| self |) in\\n M.read (|\\n M.match_operator (|\\n self,\\n [\\n fun \u03b3 =>\\n ltac:(M.monadic\\n (let \u03b30_0 :=\\n M.get_struct_tuple_field_or_break_match (|\\n \u03b3,\\n \\"core::option::Option::Some\\",\\n 0\\n |) in\\n let x := M.copy (| \u03b30_0 |) in\\n x));\\n fun \u03b3 =>\\n ltac:(M.monadic\\n (M.alloc (|\\n M.call_closure (|\\n M.get_trait_method (| \\"core::default::Default\\", T, [], \\"default\\", [] |),\\n []\\n |)\\n |)))\\n ]\\n |)\\n |)))\\n | _, _ => M.impossible\\n end.\\n```\\n\\nWe prove that it is equivalent to the simpler functional code:\\n\\n```coq\\nDefinition unwrap_or_default {T : Set}\\n {_ : core.simulations.default.Default.Trait T}\\n (self : Self T) :\\n T :=\\n match self with\\n | None => core.simulations.default.Default.default (Self := T)\\n | Some x => x\\n end.\\n```\\n\\nThis simpler definition is what we use when verifying code. The proof of equivalence is in [CoqOfRust/core/proofs/option.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/core/proofs/option.v). In case the original source code changes, we are sure to capture these changes thanks to our proof. Because the translation of the `core` library was done automatically, we trust the generated definitions more than definitions that would be done by hand. However, there can still be mistakes or incompleteness in `coq-of-rust`, so we still need to check at proof time that the code makes sense.\\n\\n## Conclusion\\n\\nWe can now work on the verification of Rust programs with more trust in our formalization of the standard library. Our next target is to simplify our proof process, which is still tedious. In particular, showing that simulations are equivalent to the original Rust code requires doing the name resolution, introduction of high-level types, and removal of the side-effects. We would like to split these steps.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) \ud83d\udc8c! Formal verification provides the highest level of safety for critical applications, with a mathematical guarantee of the absence of bugs for a given specification."},{"id":"/2024/04/03/monadic-notation-for-rust-translation","metadata":{"permalink":"/blog/2024/04/03/monadic-notation-for-rust-translation","source":"@site/blog/2024-04-03-monadic-notation-for-rust-translation.md","title":"\ud83e\udd80 Monadic notation for the Rust translation","description":"At Formal Land our mission is to reduce the cost of finding bugs in software. We use formal verification, that is to say mathematical reasoning on code, to make sure we find more bugs than with testing. As part of this effort, we are working on a tool coq-of-rust to translate Rust code to Coq, a proof assistant, to analyze Rust programs. Here we present a technical improvement we made in this tool.","date":"2024-04-03T00:00:00.000Z","formattedDate":"April 3, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"monad","permalink":"/blog/tags/monad"}],"readingTime":5.2,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Monadic notation for the Rust translation","tags":["coq-of-rust","Rust","Coq","translation","monad"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","permalink":"/blog/2024/04/26/translation-core-alloc-crates"},"nextItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","permalink":"/blog/2024/03/22/improvements-rust-translation-part-3"}},"content":"At Formal Land our mission is to reduce the cost of finding bugs in software. We use [formal verification](https://runtimeverification.com/blog/formal-verification-lore), that is to say mathematical reasoning on code, to make sure we find more bugs than with testing. As part of this effort, we are working on a tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) to translate Rust code to Coq, a proof assistant, to analyze Rust programs. Here we present a technical improvement we made in this tool.\\n\\nOne of the challenges of our translation from Rust to Coq is that the generated code is very verbose. The size increase is about ten folds in our examples. A reasons is that we use a monad to represent side effects in Coq, so we need to name each intermediate result and apply the `bind` operator. Here, we will present a monadic notation that prevents naming intermediate results to make the code more readable.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat \u260e\ufe0f!\\n\\n:::\\n\\n
\\n ![Crab with a pen](2024-04-03/crab-writing.webp)\\n
A crab writing
\\n
\\n\\n## Example \ud83d\udd0e\\n\\nHere is the Rust source code that we consider:\\n\\n```rust\\nfn add(a: i32, b: i32) -> i32 {\\n a + b\\n}\\n```\\n\\nBefore, we were generating the following Coq code, with `let*` as the notation for the bind:\\n\\n```coq\\nDefinition add (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \u03c4, \u03b1 with\\n | [], [ a; b ] =>\\n let* a := M.alloc a in\\n let* b := M.alloc b in\\n let* \u03b10 := M.read a in\\n let* \u03b11 := M.read b in\\n BinOp.Panic.add \u03b10 \u03b11\\n | _, _ => M.impossible\\n end.\\n```\\n\\nNow, with the new monadic notation, we generate:\\n\\n```coq\\nDefinition add (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \u03c4, \u03b1 with\\n | [], [ a; b ] =>\\n ltac:(M.monadic\\n (let a := M.alloc (| a |) in\\n let b := M.alloc (| b |) in\\n BinOp.Panic.add (| M.read (| a |), M.read (| b |) |)))\\n | _, _ => M.impossible\\n end.\\n```\\n\\nThe main change is that we do not need to introduce intermediate `let*` expressions with generated names. The code structure is more similar to the original Rust code, with additional calls to memory primitives such as `M.alloc` and `M.read`.\\n\\nThe notation `f (| x1, ..., xn |)` represents the call to the function `f` with the arguments `x1`, ..., `xn` returning a monadic result. We bind the result with the current continuation that goes up to the wrapping `ltac:(M.monadic ...)` tactic. We automatically transform the `let` into a `let*` with the `M.monadic` tactic when needed.\\n\\n## Where do we use this notation? \ud83e\udd14\\n\\nWe use this notation in all the function bodies that we generate, that are all in a monad to represent side effects. We call the `ltac:(M.monadic ...)` tactic at the start of the functions, as well as at the start of closure bodies that are defined inside functions. This also applies to the translation of `if`, `match`, and `loop` expressions, as we represent their bodies as functions.\\n\\nHere is an example of code with a `match` expression:\\n\\n```rust\\nfn add(a: i32, b: i32) -> i32 {\\n match a - b {\\n 0 => a + b,\\n _ => a - b,\\n }\\n}\\n```\\n\\nWe translate it to:\\n\\n```coq\\nDefinition add (\u03c4 : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \u03c4, \u03b1 with\\n | [], [ a; b ] =>\\n ltac:(M.monadic\\n (let a := M.alloc (| a |) in\\n let b := M.alloc (| b |) in\\n M.read (|\\n M.match_operator (|\\n M.alloc (| BinOp.Panic.sub (| M.read (| a |), M.read (| b |) |) |),\\n [\\n fun \u03b3 =>\\n ltac:(M.monadic\\n (let _ :=\\n M.is_constant_or_break_match (|\\n M.read (| \u03b3 |),\\n Value.Integer Integer.I32 0\\n |) in\\n M.alloc (|\\n BinOp.Panic.add (| M.read (| a |), M.read (| b |) |)\\n |)));\\n fun \u03b3 =>\\n ltac:(M.monadic (\\n M.alloc (|\\n BinOp.Panic.sub (| M.read (| a |), M.read (| b |) |)\\n |)\\n ))\\n ]\\n |)\\n |)))\\n | _, _ => M.impossible\\n end.\\n```\\n\\nWe see that we call the tactic `M.monadic` for each branch of the `match` expression.\\n\\n## How does it work? \ud83d\udee0\ufe0f\\n\\nThe `M.monadic` tactic is defined in [M.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/M.v). The main part is:\\n\\n```coq showLineNumbers\\nLtac monadic e :=\\n lazymatch e with\\n (* ... *)\\n | context ctxt [M.run ?x] =>\\n lazymatch context ctxt [M.run x] with\\n | M.run x => monadic x\\n | _ =>\\n refine (M.bind _ _);\\n [ monadic x\\n | let v := fresh \\"v\\" in\\n intro v;\\n let y := context ctxt [v] in\\n monadic y\\n ]\\n end\\n (* ... *)\\n end.\\n```\\n\\nIn our translation of Rust, all of the values have the common type `Value.t`. The monadic bind is of type `M -> (Value.t -> M) -> M` where `M` is the type of the monad. The `M.run` function is an axiom that we use as a marker to know where we need to apply `M.bind`. The type of `M.run` is:\\n\\n```coq\\nAxiom run : M -> Value.t.\\n```\\n\\nThe notation for monadic function calls is defined using the `M.run` axiom with:\\n\\n```coq\\nNotation \\"e (| e1 , .. , en |)\\" := (M.run ((.. (e e1) ..) en)).\\n```\\n\\nWhen we encounter a `M.run` (line 4) we apply the `M.bind` (line 8) to the monadic expression `x` (line 9) and its continuation `ctx` that we obtain thanks to the `context` keyword (line 4) of the matching of expressions in Ltac.\\n\\nThere is another case in the `M.monadic` tactic to handle the `let` expressions, that is not shown here.\\n\\n## Conclusion\\n\\nThanks to this new monadic notation, the generated Coq code is more readable and closer to the original Rust code. This should simplify our work in writing proofs on the generated code, as well as debugging the translation.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) \ud83d\udc8c! Formal verification provides the highest level of safety for critical applications, with a mathematical guarantee of the absence of bugs for a given specification."},{"id":"/2024/03/22/improvements-rust-translation-part-3","metadata":{"permalink":"/blog/2024/03/22/improvements-rust-translation-part-3","source":"@site/blog/2024-03-22-improvements-rust-translation-part-3.md","title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","description":"We explained how we started updating our translation tool coq-of-rust in our previous blog post, to support more of the Rust language. Our goal is to provide formal verification for the Rust \ud83e\udd80 language, relying on the proof system Coq \ud83d\udc13. We will see in this post how we continue implementing changes in coq-of-rust to:","date":"2024-03-22T00:00:00.000Z","formattedDate":"March 22, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":10.105,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","tags":["coq-of-rust","Rust","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Monadic notation for the Rust translation","permalink":"/blog/2024/04/03/monadic-notation-for-rust-translation"},"nextItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","permalink":"/blog/2024/03/08/improvements-rust-translation-part-2"}},"content":"We explained how we started updating our translation tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) in our [previous blog post](/blog/2024/03/08/improvements-rust-translation-part-2), to support more of the Rust language. Our goal is to provide formal verification for the Rust \ud83e\udd80 language, relying on the proof system Coq \ud83d\udc13. We will see in this post how we continue implementing changes in `coq-of-rust` to:\\n\\n1. remove the types from the translation,\\n2. be independent of the ordering of the definitions.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::info\\n\\n- Previous post: [Improvements in the Rust translation to Coq, part 2](/blog/2024/03/08/improvements-rust-translation-part-2)\\n\\n:::\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat \u260e\ufe0f!\\n\\n:::\\n\\n## Translating the `dns` example \ud83d\ude80\\n\\nWe continue with our previous example [dns.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/dns.rs), which is composed of around 200 lines of Rust code.\\n\\n### Borrow and dereference\\n\\nThe next error that we encounter when type-checking the Coq translation of `dns.rs` is:\\n\\n```\\nFile \\"./examples/default/examples/ink_contracts/dns.v\\", line 233, characters 22-27:\\nError: The reference deref was not found in the current environment.\\n```\\n\\nIn Rust, we can either take the address of a value with `&`, or dereference a reference with `*`. In our translation, we do not distinguish between the four following pointer types:\\n\\n- `&`\\n- `&mut`\\n- `*const`\\n- `*mut`\\n\\nWe let the user handle these in different ways if it can simplify their proofs, especially regarding the distinction between mutable and non-mutable pointers. It simplifies the definition of our borrowing and dereferencing operators, as we need only two to cover all cases. We even go further: we remove these two operators in the translation, as they are the identity in our case!\\n\\nTo better understand why they are the identity, we need to see that there are two kinds of Rust values in our representation:\\n\\n- the value itself and\\n- the value with its address.\\n\\nThe value itself is useful to compute over the values. For example, we use it to define the primitive addition over integers. The value with its address corresponds to the final Rust expression. Indeed, we can take the address of any sub-expression in Rust with the `&` operator, so each sub-expression should come with its address. When we take the address of an expression, we:\\n\\n- start from a value with its address and go to\\n- a value that is an address to the value above, which we will need to allocate to have an address for it also.\\n\\nThus, the `&` operator behaves as the identity function followed by an allocation. Similarly, the `*` is a memory read followed by the identity function. Since we already use the alloc and read operations to go from a value to a value with its address and the other way around, we do not need to define the `*` and `&` operators in our translation and remove them.\\n\\n### Primitive operators\\n\\nWe now need to distinguish between the function calls, that use the primitive:\\n\\n```coq\\nM.get_function : string -> M\\n```\\n\\nto find the right function to call when defining the semantics of the program (even if the function is defined later), and the calls to primitive operators (`+`, `*`, `!`, ...) that we define in our base library for Rust in Coq. The full list of primitive operators is given by:\\n\\n- [rustc_middle::mir::syntax::BinOp](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/mir/syntax/enum.BinOp.html)\\n- [rustc_middle::thir::LogicalOp](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/thir/enum.LogicalOp.html) (with lazy evaluation of the parameters)\\n- [rustc_middle::mir::syntax::UnOp](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/mir/syntax/enum.UnOp.html)\\n\\nWe adapted the handling of primitive operators from the code we had before and added a few other fixes so that now the `dns.rs` example type-checks in Coq \ud83c\udf8a! We will now focus on fixing the other examples.\\n\\n## Cleaning the code \ud83e\uddfc\\n\\nBut let us first clean the code a bit. All the expressions in the internal [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of `coq-of-rust` are in a wrapper with the current type of the expression:\\n\\n```rust\\npub(crate) struct Expr {\\n pub(crate) kind: Rc,\\n pub(crate) ty: Option>,\\n}\\n\\npub(crate) enum ExprKind {\\n Pure(Rc),\\n LocalVar(String),\\n Var(Path),\\n Constructor(Path),\\n // ... all the cases\\n```\\n\\nHaving access to the type of each sub-expression was useful before annotating the `let` expressions. This is not required anymore, as all the values have the type `Value.t`. Thus, we remove the wrapper `Expr` and rename `ExprKind` into `Expr`. The resulting code is easier to read, as wrapping everything with a type was verbose sometimes.\\n\\nWe also cleaned some translated types that were not used anymore in the code, removed unused `Derive` traits, and removed the monadic translation on the types.\\n\\n
\\n ![Crab in space](2024-03-22/crab-in-space.webp)\\n
A crab safely walking in space thanks to formal verification.
\\n
\\n\\n## Handling the remaining examples\\n\\nTo handle the remaining examples of our test suite (extracted from the snippets of the [Rust by Example](https://doc.rust-lang.org/rust-by-example/) book), we mainly needed to re-implement the pattern matching on the new untyped values. Here is an example of Rust code with matching:\\n\\n```rust\\nfn matching(tuple: (i32, i32)) -> i32 {\\n match tuple {\\n (0, 0) => 0,\\n (_, _) => 1,\\n }\\n}\\n```\\n\\nwith its translation in Coq:\\n\\n```coq showLineNumbers\\nDefinition matching (\ud835\udf0f : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \ud835\udf0f, \u03b1 with\\n | [], [ tuple ] =>\\n let* tuple := M.alloc tuple in\\n let* \u03b10 :=\\n match_operator\\n tuple\\n [\\n fun \u03b3 =>\\n let* \u03b30_0 := M.get_tuple_field \u03b3 0 in\\n let* \u03b30_1 := M.get_tuple_field \u03b3 1 in\\n let* _ :=\\n let* \u03b10 := M.read \u03b30_0 in\\n M.is_constant_or_break_match \u03b10 (Value.Integer Integer.I32 0) in\\n let* _ :=\\n let* \u03b10 := M.read \u03b30_1 in\\n M.is_constant_or_break_match \u03b10 (Value.Integer Integer.I32 0) in\\n M.alloc (Value.Integer Integer.I32 0);\\n fun \u03b3 =>\\n let* \u03b30_0 := M.get_tuple_field \u03b3 0 in\\n let* \u03b30_1 := M.get_tuple_field \u03b3 1 in\\n M.alloc (Value.Integer Integer.I32 1)\\n ] in\\n M.read \u03b10\\n | _, _ => M.impossible\\n end.\\n```\\n\\nHere is a breakdown of how it works:\\n\\n- On line 6 we call the `match_operator` primitive that takes a value to match on, `tuple`, and a list of functions that try to match the value with a pattern and execute some code in case of success. We execute the matching functions successively until one succeeds and we stop. There should be at least one succeeding function as pattern-match in Rust is exhaustive.\\n- On line 10 we get the first element of the tuple. Note that, more precisely, what we get is the address of the first element of `\u03b3` that is the address of the tuple `tuple` given as parameter to the function. Having the address might be required for some operations, like doing subsequent matching by reference or using the `&` operator in the `match`\'s body.\\n- On line 11 we do the same with the second element of the tuple. The indices for `\u03b3` are generated to avoid name clashes. They correspond to the depth of the sub-pattern being considered, followed by the index of the current item in this sub-pattern.\\n- On line 14, we check that the first element of the tuple is `0`. We use the `M.is_constant_or_break_match` primitive that checks if the value is a constant and if it is equal to the expected value. If it is not the case, it exits the current matching function, and the `match_operator` primitive will evaluate the next one, going to line 19.\\n- On line 24 we return the final result. Note that we always do a `M.alloc` followed by `M.read` to return the result. This could be simplified, as immediately reading an allocated value is like running the identity function.\\n\\nBy implementing the new version of the pattern-matching, as well as a few other smaller fixes, we were able to make all the examples type-check again! We now need to fix the proofs we had on the [erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/erc20.v) example, as the generated code changed a lot.\\n\\n## Updating the proofs \ud83d\udc69\u200d\ud83d\ude80\\n\\nUnfortunately, all these changes in the generated code are breaking our proofs. We still want to write our specifications and proofs by first showing a simulation of the Rust code with a simpler and functional definition. Before, with our simulations, we were:\\n\\n- replacing the management of pointers by either stateless functions or functions in a state monad;\\n- simplifying the error handling, especially for code that cannot panic.\\n\\nNow we also have to:\\n\\n- define the types;\\n- add the typing information;\\n- add the trait constraints and resolve the trait instances;\\n- resolve the function or associated function calls.\\n\\nWe have not finished updating the proofs but still merged our work in `main` with the pull request [#472](https://github.com/formal-land/coq-of-rust/pull/472) as this was taking too long. The proof that we want to update is in the file [proofs/erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/proofs/erc20.v) and is about the smart contract [erc20.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/erc20.rs).\\n\\n### Phi operators \ud83c\udfa0\\n\\nOur basic strategy for the proof, in order to handle the untyped Rust values of the new translation, is to define various `\u03c6` operators coming from a user-defined Coq type to a Rust value of type `Value.t`. These translate the data types that we define to represent the Rust types of the original program. Note that we previously had trouble translating the Rust types in the general case, especially for mutually recursive types or types involving a lot of trait manipulations.\\n\\nMore formally, we introduce the Coq typeclass:\\n\\n```coq\\nClass ToValue (A : Set) : Set := {\\n \u03a6 : Ty.t;\\n \u03c6 : A -> Value.t;\\n}.\\nArguments \u03a6 _ {_}.\\n```\\n\\nThis describes how to go from a user-defined type in Coq to the equivalent representation in `Value.t`. In addition to the `\u03c6` operator, we also define the `\u03a6` operator that gives the Rust type of the Coq type. This type is required to give for polymorphic definitions.\\n\\nWe always go from user-defined types to `Value.t`. We write our simulation statements like this:\\n\\n```coq\\n{{env, state |\\n code.example.get_at_index [] [\u03c6 vector; \u03c6 index] \u21d3\\n inl (\u03c6 (simulations.example.get_at_index vector index))\\n| state\'}}\\n```\\n\\nwhere:\\n\\n```coq\\n{{env, state | rust_program \u21d3 simulation_result | state\'}}\\n```\\n\\nis our predicate to state an evaluation of a Rust program to a simulation result. We apply the `\u03c6` operator to the arguments of the Rust program and to the result of the simulation. In some proofs, we set this operator as `Opaque` in order to keep track of it and avoid unwanted reductions.\\n\\n### Traits\\n\\nThe trait definitions, as well as trait constraints, are absent from the generated Coq code. For now, we add them back as follows, for the example of the `Default` trait:\\n\\n1. We define a `Default` typeclass in Coq:\\n\\n ```coq\\n Module Default.\\n Class Trait (Self : Set) : Set := {\\n default : Self;\\n }.\\n End Default.\\n ```\\n\\n2. We define what it means to implement the `Default` trait and have a corresponding simulation:\\n\\n ```coq\\n Module Default.\\n Record TraitHasRun (Self : Set)\\n `{ToValue Self}\\n `{core.simulations.default.Default.Trait Self} :\\n Prop := {\\n default :\\n exists default,\\n IsTraitMethod\\n \\"core::default::Default\\" (\u03a6 Self) []\\n \\"default\\" default /\\\\\\n Run.pure\\n (default [] [])\\n (inl (\u03c6 core.simulations.default.Default.default));\\n }.\\n End Default.\\n ```\\n\\n where `Run.pure` is our simulation predicate for the case where the `state` does not change.\\n\\n3. Finally, we use the `TraitHasRun` predicate as an additional hypothesis for simulation proofs on functions that depend on the `Default` trait in Rust:\\n\\n ```coq\\n (** Simulation proof for `unwrap_or_default` on the type `Option`. *)\\n Lemma run_unwrap_or_default {T : Set}\\n {_ : ToValue T}\\n {_ : core.simulations.default.Default.Trait T}\\n (self : option T) :\\n core.proofs.default.Default.TraitHasRun T ->\\n Run.pure\\n (core.option.Impl_Option_T.unwrap_or_default (\u03a6 T) [] [\u03c6 self])\\n (inl (\u03c6 (core.simulations.option.Impl_Option_T.unwrap_or_default self))).\\n Proof.\\n (* ... *)\\n Qed.\\n ```\\n\\n## Conclusion \u270d\ufe0f\\n\\nWe still have a lot to do, especially in finding the right approach to verify the newly generated Rust code. But we have finalized our new translation mode without types and ordering, which helps to successfully translate many more Rust examples. We also do not need to translate the dependencies of a project anymore before compiling it.\\n\\nOur next target is to translate the whole of Rust\'s standard library (with the help of some axioms for the expressions which we do not handle yet), in order to have a faithful definition of the Rust primitives, such as functions of the [option](https://doc.rust-lang.org/core/option/) and [vec](https://doc.rust-lang.org/alloc/vec/) modules.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) \ud83d\udc8c! Formal verification provides the highest level of safety for critical applications, with a mathematical guarantee of the absence of bugs for a given specification."},{"id":"/2024/03/08/improvements-rust-translation-part-2","metadata":{"permalink":"/blog/2024/03/08/improvements-rust-translation-part-2","source":"@site/blog/2024-03-08-improvements-rust-translation-part-2.md","title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","description":"In our previous blog post, we stated our plan to improve our translation of Rust \ud83e\udd80 to Coq \ud83d\udc13 with coq-of-rust. We also provided a new definition for our Rust monad in Coq, and the definition of a unified type to represent any Rust values. We will now see how we modify the Rust implementation of coq-of-rust to make the generated code use these new definitions.","date":"2024-03-08T00:00:00.000Z","formattedDate":"March 8, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":9.055,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","tags":["coq-of-rust","Rust","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 3","permalink":"/blog/2024/03/22/improvements-rust-translation-part-3"},"nextItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","permalink":"/blog/2024/02/29/improvements-rust-translation"}},"content":"In our [previous blog post](/blog/2024/02/29/improvements-rust-translation), we stated our plan to improve our translation of Rust \ud83e\udd80 to Coq \ud83d\udc13 with [coq-of-rust](https://github.com/formal-land/coq-of-rust). We also provided a new definition for our Rust monad in Coq, and the definition of a unified type to represent any Rust values. We will now see how we modify the Rust implementation of `coq-of-rust` to make the generated code use these new definitions.\\n\\nWith this new translation strategy, to support more Rust code, we want:\\n\\n1. to remove the types from the translation,\\n2. to avoid the need to order the definitions in the generated Coq code.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::info\\n\\n- Next post: [Improvements in the Rust translation to Coq, part 3](/blog/2024/03/22/improvements-rust-translation-part-3)\\n- Previous post: [Improvements in the Rust translation to Coq, part 1](/blog/2024/02/29/improvements-rust-translation)\\n\\n:::\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase bug-free with [formal verification](https://en.wikipedia.org/wiki/Formal_verification).\\n\\nContact us at [contact@formal.land](mailto:contact@formal.land) to chat!\\n\\n:::\\n\\n## Implementation of the monad\\n\\nWe implemented the new monad and the type `Value.t` holding any kind of Rust values as described in the previous blog post. For now, we have removed the definitions related to the standard library of Rust (everything except the base definitions such as the integer types). This should not be an issue to type-check the generated Coq code, as the new code should be independent of the ordering of definitions: in particular, it should type-check even if the needed definitions are not yet there.\\n\\nWe added some definitions for the primitive unary and binary operators. These include some operations on the integers such arithmetic operations (with or without overflow, depending on the compilation mode), as well as comparisons (equality, lesser or equal than, ...).\\n\\nNow that the main library file [CoqOfRust/CoqOfRust.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/CoqOfRust.v) compiles in Coq, we can start to test the translation on our examples.\\n\\n## Generating the tests\\n\\nWe generate new snapshots for our translations with:\\n\\n```sh\\ncargo build && time python run_tests.py\\n```\\n\\nThis builds the project `coq-of-rust` (with a lot of warning about unused code for now) and re-generates our snapshots: for each Rust file in the [examples](https://github.com/formal-land/coq-of-rust/tree/main/examples) directory, we generate a Coq file with the same name but the extension `.v`. We generate two versions:\\n\\n- one in axiom mode, where all definitions are axiomatized, to translate libraries, for example, and\\n- one in full definition mode, where we also translate the bodies of the function definitions.\\n\\n## Axiom mode\\n\\nWe first try to type-check and fix the code generated in axiom mode.\\n\\n### Type aliases\\n\\nWe have a first error for type aliases that we do not translate properly. We need access to the fully qualified name of the alias. We do that by combining calls to the functions:\\n\\n- [crate_name](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.crate_name) to get the name of the current crate and\\n- [def_path](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/ty/context/struct.TyCtxt.html#method.def_path) to get the whole definition path without the crate name.\\n\\nAs a result, for the file [examples/ink_contracts/basic_contract_caller.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/basic_contract_caller.rs), we translate the type alias:\\n\\n```rust\\ntype Hash = [u8; 32];\\n```\\n\\ninto the Coq code:\\n\\n```coq\\nAxiom Hash :\\n (Ty.path \\"basic_contract_caller::Hash\\") =\\n (Ty.apply (Ty.path \\"array\\") [Ty.path \\"u8\\"]).\\n```\\n\\nThen, during the proofs, we will be able to substitute the type `Hash` by its definition when it appears. Note that we now translate types by values of the type `Ty.t`, so there should be no difficulties in rewriting types.\\n\\nWe should add the length of the array in the type. This is not done yet.\\n\\n### Traits\\n\\nIn axiom mode, we remove most of the trait definitions. Instead, with our new translation model, the traits are mostly unique names (the absolute path of the trait definition). The main use of traits is to distinguish them from other traits, to know which trait implementation to use when calling a trait\'s method. We still translate the provided methods (that are default methods in the trait definition) to axioms and add a predicate stating that they are associated with the current trait. For example, we translate the following Rust trait:\\n\\n```rust\\n// crate `my_crate`\\n\\ntrait Animal {\\n fn new(name: &\'static str) -> Self;\\n\\n fn name(&self) -> &\'static str;\\n fn noise(&self) -> &\'static str;\\n\\n fn talk(&self) {\\n println!(\\"{} says {}\\", self.name(), self.noise());\\n }\\n}\\n```\\n\\nto the Coq code:\\n\\n```coq\\n(* Trait *)\\nModule Animal.\\n Parameter talk : (list Ty.t) -> (list Value.t) -> M.\\n\\n Axiom ProvidedMethod_talk : M.IsProvidedMethod \\"my_crate::Animal\\" talk.\\nEnd Animal.\\n```\\n\\nWe realize with this example that the translation in axiom mode generates very few errors, as we remove all the type definitions and all the function axioms have the same signature:\\n\\n```coq\\n(* A list of types that can be empty for non-polymorphic functions,\\n a list of parameters, and a return value in the monad `M`. *)\\nlist Ty.t -> list Value.t -> M\\n```\\n\\nso the type-checking of these axioms never fails. We thus jump to the full definition mode as this is where our new approach might fail.\\n\\n## Definition mode\\n\\nWe now try to type-check the generated Coq code in full definition mode. We start with the [dns.rs](https://github.com/formal-land/coq-of-rust/blob/main/examples/ink_contracts/dns.rs) smart contract example.\\n\\n### Polymorphic trait implementation\\n\\nThis example is interesting, as it contains polymorphic implementations, such as for the [mock](https://en.wikipedia.org/wiki/Mock_object) type `Mapping`:\\n\\n```rust\\n#[derive(Default)]\\nstruct Mapping {\\n _key: core::marker::PhantomData,\\n _value: core::marker::PhantomData,\\n}\\n```\\n\\nthat implements the [Default](https://doc.rust-lang.org/core/default/trait.Default.html) trait on the type `Mapping` for two type parameters `K` and `V`. We translate it to:\\n\\n```coq showLineNumbers\\n(* Struct Mapping *)\\n\\nModule Impl_core_default_Default_for_dns_Mapping_K_V.\\n (*\\n Default\\n *)\\n Definition default (\ud835\udf0f : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \ud835\udf0f, \u03b1 with\\n | [ Self; K; V ], [] =>\\n let* \u03b10 :=\\n M.get_method\\n \\"core::default::Default\\"\\n \\"default\\"\\n [ (* Self *) Ty.apply (Ty.path \\"core::marker::PhantomData\\") [ K ] ] in\\n let* \u03b11 := M.call \u03b10 [] in\\n let* \u03b12 :=\\n M.get_method\\n \\"core::default::Default\\"\\n \\"default\\"\\n [ (* Self *) Ty.apply (Ty.path \\"core::marker::PhantomData\\") [ V ] ] in\\n let* \u03b13 := M.call \u03b12 [] in\\n M.pure\\n (Value.StructRecord \\"dns::Mapping\\" [ (\\"_key\\", \u03b11); (\\"_value\\", \u03b13) ])\\n | _, _ => M.impossible\\n end.\\n\\n Axiom Implements :\\n forall (K V : Ty.t),\\n M.IsTraitInstance\\n \\"core::default::Default\\"\\n (* Self *) (Ty.apply (Ty.path \\"dns::Mapping\\") [ K; V ])\\n []\\n [ (\\"default\\", InstanceField.Method default) ]\\n [ K; V ].\\nEnd Impl_core_default_Default_for_dns_Mapping_K_V.\\n```\\n\\nHere are the interesting bits of this code:\\n\\n- On line 1, we translate the `Mapping` type into a single comment, as the types disappear in our translation and become just markers. The marker for `Mapping` is its absolute name `Ty.path \\"dns::Mapping\\"`.\\n- On line 7, the function `default` takes a list of types `\ud835\udf0f` as a parameter in case it is polymorphic. Here, this method is not polymorphic, but we still add the `\ud835\udf0f` parameter for uniformity. We also take three additional type parameters:\\n\\n - `Self`\\n - `K`\\n - `V`\\n\\n that represent the `Self` type on which the trait is implemented, and the two type parameters of the `Mapping` type. These will be provided when calling the `default` method.\\n\\n- On line 11, we use the primitive `M.get_method` (axiomatized for now) to get the method `default` of the trait `core::default::Default` for the type `core::marker::PhantomData`. Here, we see that having access to the type `K` in the body of the `default` function is useful, as it helps us to disambiguate between the various implementations of the `Default` trait instances that we call. Here, we provide the `Self` type of the trait in a list of a single element. If the `Default` trait or the `default` method were polymorphic, we would also append these type parameters in this list.\\n- On line 15, we call the `default` method instance that we found with an empty list of arguments.\\n- On line 23, we build a value of type `Mapping` with the two fields `_key` and `_value` initialized with the results of the two calls to the `default` method. We use the `Value.StructRecord` constructor to build the value, and its result is of type `Value.t` like all other Rust values.\\n- On line 24, we eliminate a case with a wrong number of type and value arguments. This should never happen as the arity of all the function calls is checked by the Rust type-checker.\\n- On line 27, we state that we have a new instance of the `Default` trait for the `Mapping` type, with the `default` method implemented by the `default` function. This is true for any values of the types `K` and `V`.\\n- On line 34, we specify that `[K, V]` are the type parameters of this implementation that should be given as extra parameters when calling the `default` method of this instance, together with the `Self` type.\\n\\n### Polymorphic implementation\\n\\nNext, we have a polymorphic implementation of mock associated functions for the `Mapping` type:\\n\\n```rust\\nimpl Mapping {\\n fn contains(&self, _key: &K) -> bool {\\n unimplemented!()\\n }\\n\\n // ...\\n```\\n\\nWe translate it to:\\n\\n```coq showLineNumbers\\nModule Impl_dns_Mapping_K_V.\\n Definition Self (K V : Ty.t) : Ty.t :=\\n Ty.apply (Ty.path \\"dns::Mapping\\") [ K; V ].\\n\\n (*\\n fn contains(&self, _key: &K) -> bool {\\n unimplemented!()\\n }\\n *)\\n Definition contains (\ud835\udf0f : list Ty.t) (\u03b1 : list Value.t) : M :=\\n match \ud835\udf0f, \u03b1 with\\n | [ Self; K; V ], [ self; _key ] =>\\n let* self := M.alloc self in\\n let* _key := M.alloc _key in\\n let* \u03b10 := M.var \\"core::panicking::panic\\" in\\n let* \u03b11 := M.read (mk_str \\"not implemented\\") in\\n let* \u03b12 := M.call \u03b10 [ \u03b11 ] in\\n never_to_any \u03b12\\n | _, _ => M.impossible\\n end.\\n\\n Axiom AssociatedFunction_contains :\\n forall (K V : Ty.t),\\n M.IsAssociatedFunction (Self K V) \\"contains\\" contains [ K; V ].\\n\\n (* ... *)\\n```\\n\\nWe follow a similar approach as for the translation of trait implementations, especially regarding the handling of polymorphic type variables. Here are some differences:\\n\\n- On line 2, we define a `Self` type as a function of the type parameters `K` and `V`. This is useful for avoiding repeating the same type expression later.\\n- On line 22, we use the predicate `M.IsAssociatedFunction` to state that we have a new associated function `contains` for the `Mapping` type, with the `contains` method implemented by the `contains` function. This is true for any values of the types `K` and `V`. Like for the trait implementations, we explicit the list `[K, V]` that will be given as an extra parameter to the function `contains`.\\n\\n## Conclusion\\n\\nIn the next blog post, we will see how we continue to translate the examples in full definition mode. There is still a lot to do to get to the same level of Rust support as before, but we are hopeful that our new approach will be more robust and easier to maintain.\\n\\nIf you are interested in formally verifying your Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land)! Formal verification provides the highest level of safety for critical applications. See the [White House report on secure software development](https://www.whitehouse.gov/wp-content/uploads/2024/02/Final-ONCD-Technical-Report.pdf) for more on the importance of formal verification."},{"id":"/2024/02/29/improvements-rust-translation","metadata":{"permalink":"/blog/2024/02/29/improvements-rust-translation","source":"@site/blog/2024-02-29-improvements-rust-translation.md","title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","description":"Our tool coq-of-rust is translating Rust \ud83e\udd80 programs to the proof system Coq \ud83d\udc13 to do formal verification on Rust programs. Even if we are able to verify realistic code, such as an ERC-20 smart contract, coq-of-rust still has some limitations:","date":"2024-02-29T00:00:00.000Z","formattedDate":"February 29, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":12.655,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","tags":["coq-of-rust","Rust","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 2","permalink":"/blog/2024/03/08/improvements-rust-translation-part-2"},"nextItem":{"title":"\ud83e\uddab Translating Go to Coq, part 1","permalink":"/blog/2024/02/22/journey-coq-of-go"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) is translating Rust \ud83e\udd80 programs to the proof system Coq \ud83d\udc13 to do formal verification on Rust programs. Even if we are able to verify realistic code, such as an [ERC-20 smart contract](http://localhost:3000/blog/2023/12/13/rust-verify-erc-20-smart-contract), `coq-of-rust` still has some limitations:\\n\\n- fragile trait handling\\n- difficulties in ordering the definitions, in their order of dependencies as required by Coq\\n\\nWe will present how we plan to improve our tool to address these limitations.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::info\\n\\n- Next post: [Improvements in the Rust translation to Coq, part 2](/blog/2024/03/08/improvements-rust-translation-part-2)\\n\\n:::\\n\\n## Introduction\\n\\nAs emphasized in the [recent report from the White House](https://www.whitehouse.gov/wp-content/uploads/2024/02/Final-ONCD-Technical-Report.pdf), memory safety and formal verification are keys to ensure secure and correct software. Rust provides memory safety and we provide formal verification on top of it with `coq-of-rust`.\\n\\nWe will take the Rust [serde](https://github.com/serde-rs/serde) serialization library to have an example of code to translate in Coq. This is a popular Rust library that is used in almost all projects, either as a direct or transitive dependency. Serialization has a simple specification (being a bijection between the data and its serialized form) and is a good candidate for formal verification. We might verify this library afterwards if there is a need.\\n\\n:::tip Contact\\n\\nThis work is funded by the [Aleph Zero](https://alephzero.org/) crypto-currency in order to verify their Rust smart contracts. You can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase totally bug-free. Contact us at [contact@formal.land](mailto:contact@formal.land) to chat! We offer a free audit to assess the feasibility of formal verification on your case.\\n\\n:::\\n\\n:::note Goal\\n\\nOur company goal is to make formal verification accessible to all projects, reducing its cost to 20% of the development cost. There should be no reason to have bugs in end-user products!\\n\\n:::\\n\\n## Warnings\\n\\nWe start by running the command:\\n\\n```sh\\ncargo coq-of-rust\\n```\\n\\nin the `serde` directory. We get a lot of warnings, but the translation does not panic as it tries to always produce something for debugging purposes. We have two kinds of warnings.\\n\\n### Constants in patterns\\n\\nThe warning is the following:\\n\\n```\\nwarning: Constants in patterns are not yet supported.\\n --\x3e serde/src/de/mod.rs:2277:13\\n |\\n2277 | 0 => panic!(), // special case elsewhere\\n | ^\\n```\\n\\nThe reason why we did not handle constants in patterns is that they are represented in a special format in the Rust compiler that was not obvious to handle. The definition of [rustc_middle::mir::consts::Const](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/mir/consts/enum.Const.html) representing the constants in patterns is:\\n\\n```rust\\npub enum Const<\'tcx> {\\n Ty(Const<\'tcx>),\\n Unevaluated(UnevaluatedConst<\'tcx>, Ty<\'tcx>),\\n Val(ConstValue<\'tcx>, Ty<\'tcx>),\\n}\\n```\\n\\nThere are three cases, and each contains several more cases. To fix this issue, we added the code to handle the signed and unsigned integers, which are enough for our `serde` example. We will need to add other cases later, especially for the strings. This allowed us to discover and fix a bug in our handling of patterns for tuples with elision `..`, like in the example:\\n\\n```rust\\nfn main() {\\n let triple = (0, -2, 3);\\n\\n match triple {\\n (0, y, z) => println!(\\"First is `0`, `y` is {:?}, and `z` is {:?}\\", y, z),\\n (1, ..) => println!(\\"First is `1` and the rest doesn\'t matter\\"),\\n (.., 2) => println!(\\"last is `2` and the rest doesn\'t matter\\"),\\n (3, .., 4) => println!(\\"First is `3`, last is `4`, and the rest doesn\'t matter\\"),\\n _ => println!(\\"It doesn\'t matter what they are\\"),\\n }\\n}\\n```\\n\\nThese changes are in the pull-request [coq-of-rust#470](https://github.com/formal-land/coq-of-rust/pull/470).\\n\\n### Unimplemented `parent_kind`\\n\\nWe get a second form of warning:\\n\\n```\\nunimplemented parent_kind: Struct\\nexpression: Expr {\\n kind: ZstLiteral {\\n user_ty: None,\\n },\\n ty: FnDef(\\n DefId(2:31137 ~ core[10bc]::cmp::Reverse::{constructor#0}),\\n [\\n T/#1,\\n ],\\n ),\\n temp_lifetime: Some(\\n Node(14),\\n ),\\n span: serde/src/de/impls.rs:778:22: 778:29 (#0),\\n}\\n```\\n\\nThis is for some cases of expressions [rustc_middle::thir::ExprKind::ZstLiteral](https://doc.rust-lang.org/beta/nightly-rustc/rustc_middle/thir/enum.ExprKind.html#variant.ZstLiteral) in the Rust\'s [THIR representation](https://rustc-dev-guide.rust-lang.org/thir.html) that we do not handle. If we look at the `span` field, we see that it appears in the source in the file `serde/src/de/impls.rs` at line 778:\\n\\n```rust\\nforwarded_impl! {\\n (T), Reverse, Reverse // Here is the error\\n}\\n```\\n\\nThis is not very informative as this code is generated by a macro. Another similar kind of expression appears later:\\n\\n```rust\\nimpl<\'de, T> Deserialize<\'de> for Wrapping\\nwhere\\n T: Deserialize<\'de>,\\n{\\n fn deserialize(deserializer: D) -> Result\\n where\\n D: Deserializer<\'de>,\\n {\\n Deserialize::deserialize(deserializer).map(\\n // Here is the error:\\n Wrapping\\n )\\n }\\n}\\n```\\n\\nThe `Wrapping` term is the constructor of a structure, used as a function. We add the support of this case in the pull-request [coq-of-rust#471](https://github.com/formal-land/coq-of-rust/pull/471).\\n\\n## Coq errors\\n\\nWhen we type-check the generated Coq code, we quickly get an error:\\n\\n```coq\\n(* Generated by coq-of-rust *)\\nRequire Import CoqOfRust.CoqOfRust.\\n\\nModule lib.\\n Module core.\\n\\n End core.\\nEnd lib.\\n\\nModule macros.\\n\\nEnd macros.\\n\\nModule integer128.\\n\\nEnd integer128.\\n\\nModule de.\\n Module value.\\n Module Error.\\n Section Error.\\n Record t : Set := {\\n (* Here is the error: *)\\n err : ltac:(serde.de.value.ErrorImpl);\\n }.\\n\\n (* 180.000 more lines! *)\\n```\\n\\nThe reason is that `serde.de.value.ErrorImpl` is not yet defined here. In Coq, we must order the definitions in the order of dependencies to ensure that there are no non-terminating definitions with infinite recursive calls and to preserve the consistency of the system.\\n\\nThis issue does not seem easy to us, as in a Rust crate, everything can depend on each other:\\n\\n- types\\n- definitions\\n- traits\\n- `impl` blocks\\n\\nOur current solutions are:\\n\\n1. **To reorder the definitions in the source Rust code**, so that they appear in the right order for Coq. This is technically the simplest solution (no changes in `coq-of-rust`), but it is not very practical. Indeed, reordering elements in a big project generates a lot of conflicts in the version control system, especially if we cannot upstream the changes to the original project.\\n2. **To use a configuration file** to specify the order of the definitions. This works in a lot of cases, but we need to write this file manually and have it complete to compile the whole crate in Coq, even if we are interested in verifying a small part of the code. There are also some cases that are hard to entangle, in particular with traits that can depend on both types and definitions, that themselves may depend on traits.\\n\\nIn order to handle large projects, such as `serde`, we need to find a more definitive solution to handle the order of dependencies.\\n\\n## Plan for the order of definitions\\n\\nOur idea is to use a more verbose, but simpler translation, to generate Coq code that is not sensitive to the ordering of Rust. In addition, we should have a more robust mechanism for the traits, as there are still some edge cases that we do not handle well.\\n\\nOur main ingredients are:\\n\\n1. Generating an untyped code, where all Rust values become part of a single and shared `Value` type. With this approach, we can represent mutually recursive Rust types, that are generally hard to translate in a sound manner to Coq. We should also avoid a lot of errors on the Coq side related to type inference.\\n2. Adding an indirection level to all function calls, as any function call might refer to a definition that appears later in the code.\\n\\nThese ingredients have some drawbacks:\\n\\n- By removing the types, we will obtain a code that is less readable. It might contain translation errors that will be harder to spot. We will need to add the types back during the specification of the code.\\n- We will need to add error cases corresponding to type errors at runtime, as we will not have the type system to ensure that functions expecting a certain type of value receive it. We know from the Rust type checker that these errors should not happen, but we will need to prove it in Coq.\\n- We will have to resolve the indirections in the calls at proof time, or with other mechanisms, that will be more complex than the current translation.\\n- We will still need to have a translation of the types (as values), to guide the inference of trait instances.\\n\\n## Definition of a new monad\\n\\nWe rework our definitions of values, pointers and monad to represent the effects, taking into account the fact that we remove the types from the translation. Here are the main definitions that we are planning to use. We have not tested them yet as we need to update the translation to Coq to use them. We will do that just after.\\n\\n### Pointers\\n\\n```coq\\nModule Pointer.\\n Module Index.\\n Inductive t : Set :=\\n | Tuple (index : Z)\\n | Array (index : Z)\\n | StructRecord (constructor field : string)\\n | StructTuple (constructor : string) (index : Z).\\n End Index.\\n\\n Module Path.\\n Definition t : Set := list Index.t.\\n End Path.\\n\\n Inductive t (Value : Set) : Set :=\\n | Immediate (value : Value)\\n | Mutable {Address : Set} (address : Address) (path : Path.t).\\n Arguments Immediate {_}.\\n Arguments Mutable {_ _}.\\nEnd Pointer.\\n```\\n\\nA pointer is either:\\n\\n- a pointer to an immutable data, that is directly represented by its data;\\n- a pointer to a mutable data, that is inside a cell at a certain address in the memory. The exact location in the cell is given by the path.\\n\\nThe type of `Address` is not enforced yet, but we will do it when defining the semantics.\\n\\n### Values\\n\\n```coq\\nModule Value.\\n Inductive t : Set :=\\n | Bool : bool -> t\\n | Integer : Integer.t -> Z -> t\\n (** For now we do not know how to represent floats so we use a string *)\\n | Float : string -> t\\n | UnicodeChar : Z -> t\\n | String : string -> t\\n | Tuple : list t -> t\\n | Array : list t -> t\\n | StructRecord : string -> list (string * t) -> t\\n | StructTuple : string -> list t -> t\\n | Pointer : Pointer.t t -> t\\n (** The two existential types of the closure must be [Value.t] and [M]. We\\n cannot enforce this constraint there yet, but we will do when defining the\\n semantics. *)\\n | Closure : {\'(t, M) : Set * Set @ t -> M} -> t.\\nEnd Value.\\n```\\n\\nHere, this type aims to represent any Rust value. We might add a few cases later to represent the `dyn` values, for example. Most of the cases of this type are as expected:\\n\\n- The constructor `StructRecord` is for constructors of `struct` or `enum` with named fields.\\n- The constructor `StructTuple` is for constructors of `struct` or `enum` with unnamed fields.\\n- The constructor `Pointer` is for pointers to data, that could be either `&`, `&mut`, `*const`, or `*mut`.\\n- The constructor `Closure` is for closures (anonymous functions). To prevent errors with the positivity checker of Coq, we use an existential type for the type `Value.t` (as well as `M`, which will be defined later). Note that we are using impredicative `Set` in Coq, and `{A : Set @ P A}` is our notation for existential `Set` in `Set`. Without impredicative sets, we could have issues with the universe levels. The fact that these existential types are always `Value.t` and `M` will be enforced when defining the semantics.\\n\\n### Monad\'s primitives\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set :=\\n | StateAlloc (value : Value.t)\\n | StateRead {Address : Set} (address : Address)\\n | StateWrite {Address : Set} (address : Address) (value : Value.t)\\n | EnvRead.\\nEnd Primitive.\\n```\\n\\nHere are the IO calls to the system that the monad can make. This list might be extended later. For now, we mainly have primitives to access the memory.\\n\\n### Monad: base\\n\\n```coq\\nModule LowM.\\n Inductive t (A : Set) : Set :=\\n | Pure : A -> t A\\n | CallPrimitive : Primitive.t -> (Value.t -> t A) -> t A\\n | Loop : t A -> (A -> bool) -> (A -> t A) -> t A\\n | Impossible : t A\\n (** This constructor is not strictly necessary, but is used as a marker for\\n functions calls in the generated code, to help the tactics to recognize\\n points where we can compose about functions. *)\\n | Call : t A -> (A -> t A) -> t A.\\n Arguments Pure {_}.\\n Arguments CallPrimitive {_}.\\n Arguments Loop {_}.\\n Arguments Impossible {_}.\\n Arguments Call {_}.\\n\\n Fixpoint let_ {A : Set} (e1 : t A) (f : A -> t A) : t A :=\\n match e1 with\\n | Pure v => f v\\n | CallPrimitive primitive k =>\\n CallPrimitive primitive (fun v => let_ (k v) f)\\n | Loop body is_break k =>\\n Loop body is_break (fun v => let_ (k v) f)\\n | Impossible => Impossible\\n | Call e k =>\\n Call e (fun v => let_ (k v) f)\\n end.\\nEnd LowM.\\n```\\n\\nThis is the first layer of our monad, very similar to what we had before. We remove the cast operation, as now everything has the same type. We use a style by continuation, but we also define a `let_` function to have a \\"bind\\" operator. Note that we always have the same type as parameter, so this is not really a monad as the \\"bind\\" operator should have the type:\\n\\n```coq\\nforall {A B : Set}, M A -> (A -> M B) -> M B\\n```\\n\\nAlways having the same type is enough for us as we use a single type of all Rust values.\\n\\n### Monad: with exceptions\\n\\nWe have the same type as before for the exceptions, representing the panics and all the special control flow operations such as `continue`, `return`, and `break`:\\n\\n```coq\\nModule Exception.\\n Inductive t : Set :=\\n (** exceptions for Rust\'s `return` *)\\n | Return : Value.t -> t\\n (** exceptions for Rust\'s `continue` *)\\n | Continue : t\\n (** exceptions for Rust\'s `break` *)\\n | Break : t\\n (** escape from a match branch once we know that it is not valid *)\\n | BreakMatch : t\\n | Panic : string -> t.\\nEnd Exception.\\n```\\n\\nOur final monad definition is a thin wrapper around `LowM`, to add an error monad to propagate the exceptions:\\n\\n```coq\\nDefinition M : Set :=\\n LowM.t (Value.t + Exception.t).\\n\\nDefinition let_ (e1 : M) (e2 : Value.t -> M) : M :=\\n LowM.let_ e1 (fun v1 =>\\n match v1 with\\n | inl v1 => e2 v1\\n | inr error => LowM.Pure (inr error)\\n end).\\n```\\n\\nOnce again, this is not really a monad as the type of the values that we compute is always the same, and we do not need more. Having a definition in two steps (`LowM` and `M`) is useful to separate the part that can be defined by computation (the `M` part) from the part whose semantics can only be given by inductive predicates (the `LowM` part).\\n\\n## Conclusion\\n\\nNext, we will see how we can use this new definition of Rust values, whether it works to translate our examples, and most importantly, how to modify `coq-of-rust` to generate terms without types.\\n\\nIf you are interested in formally verifying Rust projects, do not hesitate to get in touch with us at [contact@formal.land](mailto:contact@formal.land) or go to our [GitHub repository](https://github.com/formal-land/coq-of-rust) for `coq-of-rust`."},{"id":"/2024/02/22/journey-coq-of-go","metadata":{"permalink":"/blog/2024/02/22/journey-coq-of-go","source":"@site/blog/2024-02-22-journey-coq-of-go.md","title":"\ud83e\uddab Translating Go to Coq, part 1","description":"In this blog post, we present our development steps to build a tool to translate Go programs to the proof system Coq.","date":"2024-02-22T00:00:00.000Z","formattedDate":"February 22, 2024","tags":[{"label":"coq-of-go","permalink":"/blog/tags/coq-of-go"},{"label":"Go","permalink":"/blog/tags/go"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":12.03,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\uddab Translating Go to Coq, part 1","tags":["coq-of-go","Go","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd80 Improvements in the Rust translation to Coq, part 1","permalink":"/blog/2024/02/29/improvements-rust-translation"},"nextItem":{"title":"\u03bb Experiment on translation from Haskell to Coq","permalink":"/blog/2024/02/14/experiment-coq-of-hs"}},"content":"In this blog post, we present our development steps to build a tool to translate Go programs to the proof system Coq.\\n\\nThe goal is to formally verify Go programs to make them totally bug-free. It is actually possible to make a program totally bug-free, as [formal verification](https://en.wikipedia.org/wiki/Formal_verification) can cover all execution cases and kinds of properties thanks to the use of mathematical methods. This corresponds to the highest level of the [Evaluation Assurance Levels](https://en.wikipedia.org/wiki/Evaluation_Assurance_Level) used for critical applications, such as the space industry.\\n\\nAll the code of our work is available on GitHub at [github.com/formal-land/coq-of-go-experiment](https://github.com/formal-land/coq-of-go-experiment).\\n\\n\x3c!-- truncate --\x3e\\n\\n## Introduction\\n\\nWe believe that there are not yet a lot of formal verification tools for Go. We can cite [Goose](https://github.com/tchajed/goose), which is working by translation from Go to the proof system Coq. We will follow a similar approach, translating the Go language to our favorite proof system Coq. In contrast to Goose, we plan to support the whole Go language, even at the expense of the simplicity of the translation.\\n\\nFor that, we target the translation of the [SSA form of Go](https://pkg.go.dev/golang.org/x/tools/go/ssa) of Go instead of the [Go AST](https://pkg.go.dev/go/ast). The SSA form is a more low-level representation of Go, so we hope to capture the semantics of the whole Go language more easily. This should be at the expense of the simplicity of the generated translation, but we hope that having full language support outweighs this.\\n\\nGo is an interesting target as:\\n\\n- this is quite a popular language,\\n- it is focusing on simplicity, with a reduced set of language features,\\n- a lot of critical backend applications are written in Go, including for very large companies (Google, Netflix, Uber, Twitch, etc.).\\n\\nAmong interesting properties that we can verify are:\\n\\n- the absence of reachable `panic` in the code,\\n- the absence of race conditions or deadlocks,\\n- the backward compatibility from release to release, for parts of the code whose behavior is not supposed to change,\\n- the strict application of business rules.\\n\\n:::tip Contact\\n\\nYou can [follow us on X](https://twitter.com/LandFoobar) to get our updates. We propose tools and services to make your codebase totally bug-free. Contact us at [contact@formal.land](mailto:contact@formal.land) to chat! We offer a free audit to assess the feasibility of formal verification on your case.\\n\\n:::\\n\\n:::note Goal\\n\\nOur company goal is to make formal verification accessible to all projects, reducing its cost to 20% of the development cost. There should be no reason to have bugs in end-user products!\\n\\n:::\\n\\n![Mole and Rooster](2024-02-22/mole_rooster.webp)\\n\\n## First target\\n\\nOur first target is to achieve the formal verification _including all the dependencies_ of the hello world program:\\n\\n```go\\npackage main\\n\\nimport \\"fmt\\"\\n\\nfunc main() {\\n\\tfmt.Println(\\"Hello, World!\\")\\n}\\n```\\n\\nWhat we want to show about this code is that it does a single and only thing: outputting the string \\"Hello, World!\\" to the standard output. Its only dependency is the `fmt` package, but when we look at the transitive dependencies of this package:\\n\\n```sh\\ngo list -f \'{{ .Deps }}\' fmt\\n```\\n\\nwe get around forty packages:\\n\\n```\\nerrors\\ninternal/abi\\ninternal/bytealg\\ninternal/coverage/rtcov\\ninternal/cpu\\ninternal/fmtsort\\ninternal/goarch\\ninternal/godebugs\\ninternal/goexperiment\\ninternal/goos\\ninternal/itoa\\ninternal/oserror\\ninternal/poll\\ninternal/race\\ninternal/reflectlite\\ninternal/safefilepath\\ninternal/syscall/execenv\\ninternal/syscall/unix\\ninternal/testlog\\ninternal/unsafeheader\\nio\\nio/fs\\nmath\\nmath/bits\\nos\\npath\\nreflect\\nruntime\\nruntime/internal/atomic\\nruntime/internal/math\\nruntime/internal/sys\\nruntime/internal/syscall\\nsort\\nstrconv\\nsync\\nsync/atomic\\nsyscall\\ntime\\nunicode\\nunicode/utf8\\nunsafe\\n```\\n\\nWe will need to translate all these packages to meaningful Coq code.\\n\\n## The start\\n\\nWe made the `coq-of-go` tool, with everything in a single file [main.go](https://github.com/formal-land/coq-of-go-experiment/blob/main/main.go) for now. We retrieve the SSA form of a Go package provided as a command line parameter (code without the error handling):\\n\\n```go\\nfunc main() {\\n\\tpackageToTranslate := os.Args[1]\\n\\tcfg := &packages.Config{Mode: packages.LoadSyntax}\\n\\tinitial, _ := packages.Load(cfg, packageToTranslate)\\n\\t_, pkgs := ssautil.Packages(initial, 0)\\n\\tpkgs[0].Build()\\n\\tmembers := pkgs[0].Members\\n```\\n\\n:::note SSA form\\n\\nThe [SSA form](https://en.wikipedia.org/wiki/Static_single-assignment_form) of a program is generally used internally by compilers to have a simple representation to work on. The [LLVM](https://llvm.org/) language is such an example. In SSA, each variable is assigned exactly once and the control flow is explicit, with jumps or conditional jumps to labels. There are no `for` loops, `if` statements, or non-primitive expressions.\\n\\n:::\\n\\nThen we iterate over all the SSA `members`, and directly print the corresponding Coq code to the standard output. We do not use an intermediate representation or make intermediate passes. We do not even do pretty-printing (splitting lines that are too long at the right place, and introducing indentation)! This should not be necessary as the SSA code cannot nest sub-expressions or statements. We still try to print a readable Coq code, as it will be used in the proofs.\\n\\nThere are four kinds of SSA members:\\n\\n- named constants,\\n- globals,\\n- types,\\n- functions.\\n\\nNamed constants and globals are similar, and are for top-level variables whose value is either known at compile-time or computed at the program\'s init. Types are for type definitions. We will focus on functions, as this is where the code is.\\n\\n## Functions\\n\\nThe SSA functions in Go are described by the type [`ssa.Function`](https://pkg.go.dev/golang.org/x/tools/go/ssa#Function):\\n\\n```go\\ntype Function struct {\\n\\tSignature *types.Signature\\n\\n\\t// source information\\n\\tSynthetic string // provenance of synthetic function; \\"\\" for true source functions\\n\\n\\tPkg *Package // enclosing package; nil for shared funcs (wrappers and error.Error)\\n\\tProg *Program // enclosing program\\n\\n\\tParams []*Parameter // function parameters; for methods, includes receiver\\n\\tFreeVars []*FreeVar // free variables whose values must be supplied by closure\\n\\tLocals []*Alloc // frame-allocated variables of this function\\n\\tBlocks []*BasicBlock // basic blocks of the function; nil => external\\n\\tRecover *BasicBlock // optional; control transfers here after recovered panic\\n\\tAnonFuncs []*Function // anonymous functions directly beneath this one\\n\\t// contains filtered or unexported fields\\n}\\n```\\n\\nThe main part of interest for us is `Blocks`. A block is a sequence of instructions, and the control flow is explicit. The last instruction of a block is a jump to another block, or a return. The first instructions of a block can be the special `Phi` instruction, which is used to merge control flow from different branches.\\n\\nWe decided to write a first version to see what the SSA code of Go looks like when printed in Coq, without thinking about generating a well-typed code. This looks like this:\\n\\n```coq\\nwith MakeUint64 (\u03b1 : list Val.t) : M (list Val.t) :=\\n M.Thunk (\\n match \u03b1 with\\n | [x] =>\\n M.Thunk (M.EvalBody [(0,\\n let* \\"t0\\" := Instr.BinOp x \\"<\\" (Val.Lit (Lit.Int 9223372036854775808)) in\\n Instr.If (Register.read \\"t0\\") 1 2\\n );\\n (1,\\n let* \\"t1\\" := Instr.Convert x in\\n let* \\"t2\\" := Instr.ChangeType (Register.read \\"t1\\") in\\n let* \\"t3\\" := Instr.MakeInterface (Register.read \\"t2\\") in\\n M.Return [(Register.read \\"t3\\")]\\n );\\n (2,\\n let* \\"t4\\" := Instr.Alloc (* complit *) Alloc.Local \\"*go/constant.intVal\\" in\\n let* \\"t5\\" := Instr.FieldAddr (Register.read \\"t4\\") 0 in\\n let* \\"t6\\" := Instr.Call (CallKind.Function (newInt [])) in\\n let* \\"t7\\" := Instr.Call (CallKind.Function (TODO_method [(Register.read \\"t6\\"); x])) in\\n do* Instr.Store (Register.read \\"t5\\") (Register.read \\"t7\\") in\\n let* \\"t8\\" := Instr.UnOp \\"*\\" (Register.read \\"t4\\") in\\n let* \\"t9\\" := Instr.MakeInterface (Register.read \\"t8\\") in\\n M.Return [(Register.read \\"t9\\")]\\n )])\\n | _ => M.Thunk (M.EvalBody [])\\n end)\\n```\\n\\nfor a source Go code (from the [go/constant](https://pkg.go.dev/go/constant) package):\\n\\n```go\\n// MakeUint64 returns the [Int] value for x.\\nfunc MakeUint64(x uint64) Value {\\n\\tif x < 1<<63 {\\n\\t\\treturn int64Val(int64(x))\\n\\t}\\n\\treturn intVal{newInt().SetUint64(x)}\\n}\\n```\\n\\nThere are three blocks of code, labeled with `0`, `1`, and `2`. The first block ends with a conditional jump `If` corresponding to the `if` statement in the Go code. The following blocks are corresponding to the two possible branches of the `if` statement. They both end with a `Return` instruction, corresponding to the `return` statement in the Go code. They run various primitive instructions that we have translated as we can.\\n\\nThe generated Coq code is still readable but more verbose than the original Go code. We will later develop proof techniques using simulations to enable the user to define equivalent but simpler versions of the translation. Being able to define simulations of an imperative program is also important for the proofs, as we can rewrite the code in functional style to make it easier to reason about.\\n\\n## Type-checking\\n\\nFrom there, a second step is to have a generated code that type-checks, forgetting about making a code with sound semantics for now. We generate the various Coq definitions that are needed in a header of the generated code, using axioms for all the definitions. For example, for the allocations we do:\\n\\n```coq\\nModule Alloc.\\n Inductive t : Set :=\\n | Heap\\n | Local.\\nEnd Alloc.\\n\\nModule Instr.\\n Parameter Alloc : Alloc.t -> string -> M Val.t.\\n```\\n\\nThe `Inductive` keyword in Coq defines a type with two constructors `Heap` and `Local`. The `Parameter` keyword defines an axiomatized definition, where we only provide the type but not the definition itself. The `Instr.Alloc` instruction takes as parameters an allocation mode `Alloc.t` and a string and returns an `M Val.t` value.\\n\\n### Representation of values\\n\\nWe make the choice to remove the types while doing the translation, as the type system of Go is probably incompatible with the one of Coq in many ways. We thus translate everything to a single type `Val.t` in Coq to represent all kinds of possible Go values. The downside of this approach is that is makes the generated code less readable and less safe, as types are useful to track the correct use of values.\\n\\nFor now, we define the `Val.t` type as:\\n\\n```coq\\nModule Val.\\n Inductive t : Set :=\\n | Lit (_ : Lit.t)\\n | Tuple (_ : list t).\\nEnd Val.\\n```\\n\\nwith the literals `Lit.t` as:\\n\\n```coq\\nModule Lit.\\n Inductive t : Set :=\\n | Bool (_ : bool)\\n | Int (_ : Z)\\n | Float (_ : Rational)\\n | Complex (_ _ : Rational)\\n | String (_ : string)\\n | Nil.\\nEnd Lit.\\n```\\n\\nWe plan to refine this type and add more cases as we improve `coq-of-go`. Structures, pointers, and closures are missing for now.\\n\\n### Monadic style\\n\\nIn order to represent the side-effects of the Go code, we use a [monadic style](). This is a standard approach to represent side-effects like mutations, exceptions, or non-termination in a purely function language such as Coq. We choose to use:\\n\\n- A free monad, where all the primitives are constructor of the inductive type `M` of the monad. This simplifies the manipulation of the monad by allowing to compute on it and by delegating the actual implementation of the monadic primitives for later.\\n- A co-inductive type, to allow potentially non-terminating programs. Co-inductive types are like lazy definitions in Haskell where it is possible to make an infinite list for example, as long as only a finite number of elements are consumed.\\n\\nIn that sense, we follow the approach in the paper [Modular, Compositional, and Executable Formal Semantics for LLVM IR](https://cambium.inria.fr/~eyoon/paper/vir.pdf), that is using a co-inductive free monad (interaction tree) to formalize a reasonable subset of the LLVM language that is also an SSA representation but with more low-level instructions than Go.\\n\\nOur definition for `M` for now is:\\n\\n```coq\\nModule M.\\n CoInductive t (A : Set) : Set :=\\n | Return (_ : A)\\n | Bind {B : Set} (_ : t B) (_ : B -> t A)\\n | Thunk (_ : t A)\\n | EvalBody (_ : list (Z * t A)).\\n Arguments Return {A}.\\n Arguments Bind {A B}.\\n Arguments Thunk {A}.\\n Arguments EvalBody {A}.\\nEnd M.\\nDefinition M : Set -> Set := M.t.\\n```\\n\\nWe define all the functions that we translate as mutually recursive with the `CoFixpoint ... with ...` keyword of Coq. Thus, we do not have to preserve the ordering of definitions that is required by Coq or care for recursive or mutually recursive functions in Go.\\n\\nHowever, we did not achieve to make the type-checker of Coq happy for our `CoFixpoint` as many definitions are axiomatized, and the type-checker of Coq wants their definitions to know if they produce co-inductive constructors. So, for now, we admit this step by disabling the termination checker with this flag:\\n\\n```coq\\nLocal Unset Guard Checking.\\n```\\n\\n## Next\\n\\nWhen we translate our hello world example we get the Coq code:\\n\\n```coq\\nCoFixpoint Main (\u03b1 : list Val.t) : M (list Val.t) :=\\n M.Thunk (\\n match \u03b1 with\\n | [] =>\\n M.Thunk (M.EvalBody [(0,\\n let* \\"t0\\" := Instr.Alloc (* varargs *) Alloc.Heap \\"*[1]any\\" in\\n let* \\"t1\\" := Instr.IndexAddr (Register.read \\"t0\\") (Val.Lit (Lit.Int 0)) in\\n let* \\"t2\\" := Instr.MakeInterface (Val.Lit (Lit.String \\"Hello, World!\\")) in\\n do* Instr.Store (Register.read \\"t1\\") (Register.read \\"t2\\") in\\n let* \\"t3\\" := Instr.Slice (Register.read \\"t0\\") None None in\\n let* \\"t4\\" := Instr.Call (CallKind.Function (fmt.Println [(Register.read \\"t3\\")])) in\\n M.Return []\\n )])\\n | _ => M.Thunk (M.EvalBody [])\\n end)\\n\\nwith init (\u03b1 : list Val.t) : M (list Val.t) :=\\n M.Thunk (\\n match \u03b1 with\\n | [] =>\\n M.Thunk (M.EvalBody [(0,\\n let* \\"t0\\" := Instr.UnOp \\"*\\" (Register.read \\"init$guard\\") in\\n Instr.If (Register.read \\"t0\\") 2 1\\n );\\n (1,\\n do* Instr.Store (Register.read \\"init$guard\\") (Val.Lit (Lit.Bool true)) in\\n let* \\"t1\\" := Instr.Call (CallKind.Function (fmt.init [])) in\\n Instr.Jump 2\\n );\\n (2,\\n M.Return []\\n )])\\n | _ => M.Thunk (M.EvalBody [])\\n end).\\n```\\n\\nThe `init` function, which is automatically generated by the Go compiler to initialize global variables, does not do much here. It checks whether it was already called or not reading the `init$guard` variable, and if not, it calls the `fmt.init` function. The `Main` function is the one that we are interested in. It allocates a variable to store the string \\"Hello, World!\\", and then calls the `fmt.Println` function to print it.\\n\\nFrom there, to continue the project we have two possibilities:\\n\\n1. Give actual definitions to each primitive instruction that is used in this example (for now, everything is axiomatized).\\n2. Translate all the transitive dependencies of the hello world program to Coq, and make sure that we can compile everything together.\\n\\nFor the next step, we choose to follow the second possibility as we are more confident in being able to define the semantics of the instructions, which is purely done on the Coq side, than in being able to use the Go compiler\'s APIs to retrieve the definitions of all the dependencies and related them together.\\n\\n## Conclusion\\n\\nWe have presented the beginning of our journey to translate Go programs to Coq, to build a formal verification tool for Go. The translation type-checks on the few examples we have tried but has no semantics. We will follow by handling the translation of dependencies of a package.\\n\\nIf you are interested in this project, please contact us at [contact@formal.land](mailto:contact@formal.land) or go to our [GitHub repository](https://github.com/formal-land/coq-of-go-experiment)."},{"id":"/2024/02/14/experiment-coq-of-hs","metadata":{"permalink":"/blog/2024/02/14/experiment-coq-of-hs","source":"@site/blog/2024-02-14-experiment-coq-of-hs.md","title":"\u03bb Experiment on translation from Haskell to Coq","description":"We present an experiment coq-of-hs that we have made on the translation of Haskell programs to the proof system Coq \ud83d\udc13. The goal is to formally verify Haskell programs to make them totally bug-free.","date":"2024-02-14T00:00:00.000Z","formattedDate":"February 14, 2024","tags":[{"label":"coq-of-hs","permalink":"/blog/tags/coq-of-hs"},{"label":"Haskell","permalink":"/blog/tags/haskell"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"}],"readingTime":4.365,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\u03bb Experiment on translation from Haskell to Coq","tags":["coq-of-hs","Haskell","Coq","translation"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\uddab Translating Go to Coq, part 1","permalink":"/blog/2024/02/22/journey-coq-of-go"},"nextItem":{"title":"The importance of formal verification","permalink":"/blog/2024/02/02/formal-verification-for-aleph-zero"}},"content":"We present an experiment [coq-of-hs](https://github.com/formal-land/coq-of-hs-experiment) that we have made on the translation of [Haskell](https://www.haskell.org/) programs to the proof system [Coq \ud83d\udc13](https://coq.inria.fr/). The goal is to formally verify Haskell programs to make them totally bug-free.\\n\\nIndeed, even with the use of a strict type system, there can still be bugs for properties that cannot be expressed with types. An example of such a property is the backward compatibility of an API endpoint for the new release of a web service when there has been code refactoring. Only formal verification can cover all execution cases and kinds of properties.\\n\\nThe code of the tool is at: [github.com/formal-land/coq-of-hs-experiment](https://github.com/formal-land/coq-of-hs-experiment) (AGPL license)\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Contact\\n\\nWe propose tools to make your codebase totally bug-free. Contact us at [contact@formal.land](mailto:contact@formal.land) for more information! We offer a free audit to assess the feasibility of formal verification for your case.\\n\\n:::\\n\\n:::info Info\\n\\nWe estimate that the cost of formal verification should be 20% of the development cost. There are no reasons to still have bugs today!\\n\\n:::\\n\\n![Haskell Logo](2024-02-14/haskell_logo.svg)\\n\\n## Goal of the experiment\\n\\nThere are already some tools to formally verify Haskell programs:\\n\\n- [\ud83d\udc13 hs-to-coq](https://github.com/plclub/hs-to-coq) translation from Haskell to Coq\\n- [\ud83d\udca7 Liquid Haskell](https://en.wikipedia.org/wiki/Liquid_Haskell) verification using [SMT solvers](https://en.wikipedia.org/wiki/Satisfiability_modulo_theories)\\n\\nIn this experiment, we want to check the feasibility of translation from Haskell to Coq:\\n\\n- \ud83d\udc4d covering all the language without manual configuration or code changes,\\n- \ud83d\udc4e even if this is at the cost of a more verbose and low-level translation.\\n\\n## Example\\n\\nHere is an example of a Haskell function:\\n\\n```haskell\\nfixObvious :: (a -> a) -> a\\nfixObvious f = f (fixObvious f)\\n```\\n\\nthat `coq-of-hs` translates to this valid Coq code:\\n\\n```coq\\nCoFixpoint fixObvious : Val.t :=\\n (Val.Lam (fun (f : Val.t) => (Val.App f (Val.App fixObvious f)))).\\n```\\n\\n## Infrastructure\\n\\nWe read the [Haskell Core](https://serokell.io/blog/haskell-to-core) representation of Haskell using the GHC plugin system. Thus, we read the exact same code version as the one that is compiled down to assembly code by [GHC](https://www.haskell.org/ghc/), to take into account all compilation options.\\n\\nHaskell Core is an intermediate representation of Haskell that is close to the lambda calculus and used by the Haskell compiler for various optimizations passes. Here are all the constructors of the `Expr` type of Haskell Core:\\n\\n```haskell\\ndata Expr b\\n = Var Id\\n | Lit Literal\\n | App (Expr b) (Arg b)\\n | Lam b (Expr b)\\n | Let (Bind b) (Expr b)\\n | Case (Expr b) b Type [Alt b]\\n | Cast (Expr b) Coercion\\n | Tick (Tickish Id) (Expr b)\\n | Type Type\\n | Coercion Coercion\\n```\\n\\nThis paper [System FC, as implemented in GHC](https://repository.brynmawr.edu/cgi/viewcontent.cgi?article=1015&context=compsci_pubs) presents it as [System F](https://en.wikipedia.org/wiki/System_F) plus coercions. We translate Haskell code to an untyped version of the lambda calculus in Coq, with co-induction to allow for infinite data structures:\\n\\n```coq\\nModule Val.\\n #[bypass_check(positivity)]\\n CoInductive t : Set :=\\n | Lit (_ : Lit.t)\\n | Con (_ : string) (_ : list t)\\n | App (_ _ : t)\\n | Lam (_ : t -> t)\\n | Case (_ : t) (_ : t -> list (Case.t t))\\n | Impossible.\\nEnd Val.\\n```\\n\\nWe make the translation by induction over the Haskell Core representation, and we translate each constructor to a corresponding constructor of the Coq representation. We pretty-print the Coq code directly without using an intermediate representation. We use the [prettyprinter](https://github.com/quchen/prettyprinter) package with the two main following primitives:\\n\\n```haskell\\nconcatNest :: [Doc ()] -> Doc ()\\nconcatNest = group . nest 2 . vsep\\n\\nconcatGroup :: [Doc ()] -> Doc ()\\nconcatGroup = group . vsep\\n```\\n\\nto display a sub-term with or without indentation when splitting lines that are too long. This translation works well on all the Haskell expressions that we have tested.\\n\\n## Missing features\\n\\n### Semantics\\n\\nWe have not yet defined a semantics. For now, the terms that we generate in Coq are purely descriptive. We will wait to have examples of things to verify to define semantics that are practical to use.\\n\\n### Type-classes\\n\\nWe have not yet translated typeclasses. The Haskell Core language hides most of the typeclasses-related code. For example, it represents instances as additional function parameters for functions that have a typeclass constraints. But we still need to declare the functions corresponding to the member of the typeclasses, what we have not done yet.\\n\\n### Multi-file projects\\n\\nWe have not yet implemented the translation of multi-file projects. We have only tested the translation of a single-file project.\\n\\n### Standard library\\n\\nSimilarly to the handling of multi-file projects, we have not yet tested the translation of projects using external libraries or translating the base library of Haskell.\\n\\n### Strict positivity\\n\\nWe had to turn off the strict positivity condition for the definition of `Val.t` in Coq with:\\n\\n```coq\\n#[bypass_check(positivity)]\\n```\\n\\nThis is for to the case:\\n\\n```coq\\n| Lam (_ : t -> t)\\n```\\n\\nwhere `t` appears as a parameter of a function (negative position). We do not know if this causes any problem in practice, on values that correspond to well-typed Haskell programs.\\n\\n## Conclusion\\n\\nWe have presented an experiment on the translation of Haskell programs to Coq. If you are interested in this project, please get in touch with us at [contact@formal.land](mailto:contact@formal.land) or go to the [GitHub repository](https://github.com/formal-land/coq-of-hs-experiment) of the project."},{"id":"/2024/02/02/formal-verification-for-aleph-zero","metadata":{"permalink":"/blog/2024/02/02/formal-verification-for-aleph-zero","source":"@site/blog/2024-02-02-formal-verification-for-aleph-zero.md","title":"The importance of formal verification","description":"Ensuring Flawless Software in a Flawed World","date":"2024-02-02T00:00:00.000Z","formattedDate":"February 2, 2024","tags":[],"readingTime":5.53,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"The importance of formal verification","authors":[]},"unlisted":false,"prevItem":{"title":"\u03bb Experiment on translation from Haskell to Coq","permalink":"/blog/2024/02/14/experiment-coq-of-hs"},"nextItem":{"title":"Upgrade the Rust version of coq-of-rust","permalink":"/blog/2024/01/18/update-coq-of-rust"}},"content":"> Ensuring Flawless Software in a Flawed World\\n\\nIn this blog post, we present what formal verification is and why this is such a valuable tool to improve the security of your applications.\\n\\n\x3c!-- truncate --\x3e\\n\\n![Formal verification](2024-02-02/formal_verification.png)\\n\\n:::tip Contact\\n\\nIf you want to formally verify your codebase to improve the security of your application, contact us at [contact@formal.land](mailto:contact@formal.land)! We offer a free audit of your codebase to assess the feasibility of formal verification.\\n\\n:::\\n\\n:::info Thanks\\n\\nThe current development of our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust), for the formal verification of Rust code, is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation and its [Ecosystem Funding Program](https://alephzero.org/ecosystem-funding-program). The aim is to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n## What is formal verification?\\n\\nFormal verification is a set of techniques to check for the complete correctness of a program, reasoning at a symbolic level rather than executing a particular instance of the code. By symbolic reasoning, we mean following the values of the variables by tracking their names and constraints, without necessarily giving them an example value. This is what we would do in our heads to understand a code where a variable `username` appears, following which functions it is given to, to know where we use the user name. The concrete user name that we consider is irrelevant, although some people prefer to think with an example.\\n\\nIn formal verification, we rely on precise mathematical reasoning to make sure that there are no mistakes or missing cases. We check this reasoning with a dedicated program ([SMT](https://en.wikipedia.org/wiki/Satisfiability_modulo_theories) solver, [Coq](https://coq.inria.fr/) proof system, ...). Indeed, as programs grow in complexity, it could be easy to forget an `if` branch or an error case.\\n\\nFor example, to say that the following Rust program is valid:\\n\\n```coq\\n/// Return the maximum of [a] and [b]\\nfn get_max(a: u128, b: u128) -> u128 {\\n if a > b {\\n a\\n } else {\\n b\\n }\\n}\\n```\\n\\nwe reason on two cases (reasoning by disjunction):\\n\\n- `a > b` where `a` is the maximum,\\n- `a <= b` where `b` is the maximum,\\n\\nwith the values of `a` and `b` being irrelevant (symbolic). In both cases, we can conclude that `get_max` returns the maximum.\\n\\nThis is in contrast with testing, where we need to execute the program with all possible instances of `a` and `b` to check that the program is correct with 100% certainty. This is infeasible in this case as the type `u128` is too large to be tested exhaustively: there are `2^256` possible values for `a` and `b`, meaning `115792089237316195423570985008687907853269984665640564039457584007913129639936` possible values!\\n\\nA program is shown correct with respect to an expected behavior, called a _formal specification_. This is expressed in a mathematical language to be non-ambiguous. For example, we can specify the behavior of the previous program as:\\n\\n```\\nFORALL (a b : u128),\\n (get_max a b = a OR get_max a b = b) AND\\n (get_max a b >= a AND get_max a b >= b)\\n```\\n\\nstating that we indeed return the maximum of `a` and `b`.\\n\\nWhen a program is formally verified, we are mathematically sure it will always follow its specifications. This is a way to eliminate all bugs, as long as we have a complete specification of what it is supposed to do or not do. This corresponds to the highest level of Evaluation Assurance Level, [EAL7](https://en.wikipedia.org/wiki/Evaluation_Assurance_Level#EAL7:_Formally_Verified_Design_and_Tested). This is used for critical applications, such as space rocket software, where a single bug can be extremely expensive (the loss of a rocket!).\\n\\nThere are various formal verification tools, such as the proof system [Coq](https://coq.inria.fr/). The C compiler [CompCert](https://en.wikipedia.org/wiki/CompCert) is an example of large software verified in Coq. It is proven correct, in contrast to most other C compilers that contain [subtle bugs](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf). CompCert is now used by Airbus to compile C programs embedded in planes \ud83d\udeeb.\\n\\n## Why is it such a useful tool?\\n\\nFormal verification is extremely useful as it can anticipate all the bugs by exploring all possible execution cases of a program. Here is a quote from [Edsger W. Dijkstra](https://en.wikipedia.org/wiki/Formal_verification):\\n\\n> Program testing can be used to show the presence of bugs, but never to show their absence!\\n\\nIt offers the possibility to make software that never fails. This is often required for applications with human life at stake, such as planes or medical devices. But it can also be useful for applications where a single bug can be extremely expensive, such as financial applications.\\n\\nSmart contracts are a good example of such applications. They are programs that are executed on a blockchain and are used to manage assets worth billions of dollars. A single bug in a smart contract can lead to the loss of all the assets managed by the contract. In the first half of 2023, some estimate that attacks on web3 platforms resulted in a loss of [$655.61 million](https://www.linkedin.com/pulse/h1-2023-global-web3-security-report-aml-analysis-crypto-regulatory/), with most of these losses due to bugs in smart contracts. These bugs could be prevented using formally verified smart contracts.\\n\\nFinally, formal verification is useful to improve the quality of a program by enforcing the need to use:\\n\\n- clear programming constructs,\\n- an explicit specification of the behavior of the program.\\n\\n## Comparison of formal verification and testing\\n\\nCompared to testing, formal verification is more complex as:\\n\\n- it typically takes much more time to formally verify a program than to test it on a reasonable set of inputs,\\n- it requires a formal specification of the program, which is not always available,\\n- it requires some specific expertise to use the formal verification tools and to write the specifications.\\n\\nIn addition, formal verification assumes a certain model of the environment of the program, which is not always accurate. When actually executing the code, we also exercise all the dependencies (libraries, operating system, network, ...) that might cause issues at runtime.\\n\\nHowever, formal verification is the only way to have an exhaustive check of the program. It verifies all corner cases, such as integer overflows, or hard-to-reproduce issues, such as concurrency bugs. We recommend combining both approaches as they do not catch the same kinds of bugs.\\n\\nAt [Formal Land](https://formal.land/), we consider it critical to lower the cost of formal verification to apply it to a larger scope of programs and prevent more bugs and attacks. We work on the formal verification of Rust with [coq-of-rust](https://github.com/formal-land/coq-of-rust) and OCaml with [coq-of-ocaml](https://github.com/formal-land/coq-of-ocaml).\\n\\n## Conclusion\\n\\nFormal verification is a powerful tool to improve the security of your applications. It is the only way to prevent all bugs by exploring all possible executions of your programs. It complements existing testing methods. It is particularly useful for critical applications, such as smart contracts, where a single bug can be extremely expensive."},{"id":"/2024/01/18/update-coq-of-rust","metadata":{"permalink":"/blog/2024/01/18/update-coq-of-rust","source":"@site/blog/2024-01-18-update-coq-of-rust.md","title":"Upgrade the Rust version of coq-of-rust","description":"We continue our work on the coq-of-rust tool to formally verify Rust programs with the Coq proof assistant. We have upgraded the Rust version that we support, simplified the translation of the traits, and are adding better support for the standard library of Rust.","date":"2024-01-18T00:00:00.000Z","formattedDate":"January 18, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"Aleph-Zero","permalink":"/blog/tags/aleph-zero"}],"readingTime":3.5,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Upgrade the Rust version of coq-of-rust","tags":["coq-of-rust","Rust","Coq","Aleph-Zero"],"authors":[]},"unlisted":false,"prevItem":{"title":"The importance of formal verification","permalink":"/blog/2024/02/02/formal-verification-for-aleph-zero"},"nextItem":{"title":"Translating Rust match patterns to Coq with coq-of-rust","permalink":"/blog/2024/01/04/rust-translating-match"}},"content":"We continue our work on the [coq-of-rust](https://github.com/formal-land/coq-of-rust) tool to formally verify Rust programs with the [Coq proof assistant](https://coq.inria.fr/). We have upgraded the Rust version that we support, simplified the translation of the traits, and are adding better support for the standard library of Rust.\\n\\nOverall, we are now able to translate **about 80%** of the Rust examples from the [Rust by Example](https://doc.rust-lang.org/stable/rust-by-example/) book into valid Coq files. This means we support a large subset of the Rust language.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nTo formally verify your Rust codebase and improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)! Formal verification is the only way to prevent all bugs by exploring all possible executions of your programs \ud83c\udfaf.\\n\\n:::\\n\\n:::info Thanks\\n\\nThis work and the development of [coq-of-rust](https://github.com/formal-land/coq-of-rust) is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation, to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n![Rust rooster](2024-01-18/rooster.png)\\n\\n## Upgrade of the Rust version\\n\\nThe tool `coq-of-rust` is tied to a particular version of the Rust compiler that we use to parse and type-check a `cargo` project. We now support the `nightly-2023-12-15` version of Rust, up from `nightly-2023-04-30`. Most of the changes were minor, but it is good to handle these regularly to have smooth upgrades. The corresponding pull request is [coq-of-rust/pull/445](https://github.com/formal-land/coq-of-rust/pull/445). We also got more [Clippy](https://github.com/rust-lang/rust-clippy) warnings thanks to the new version of Rust.\\n\\n## Simplify the translation of traits\\n\\nThe traits of Rust are similar to the [type-classes of Coq](https://coq.inria.fr/refman/addendum/type-classes.html). This is how we translate traits to Coq.\\n\\nBut there are a lot of subtle differences between the two languages. The type-class inference mechanism of Coq does not work all the time on generated Rust code, even when adding a lot of code annotations. We think that the only reliable way to translate Rust traits would be to explicit the implementations inferred by the Rust compiler, but the Rust compiler currently throws away this information.\\n\\nInstead, our new solution is to use a Coq tactic:\\n\\n```coq\\n(** Try first to infer the trait instance, and if unsuccessful, delegate it at\\n proof time. *)\\nLtac get_method method :=\\n exact (M.pure (method _)) ||\\n exact (M.get_method method).\\n```\\n\\nthat first tries to infer the trait instance for a particular method, and if it fails, delegates its definition to the user at proof time. This is a bit unsafe, as a user could provide invalid instances at proof time, by giving some custom instance definitions instead of the ones generated by `coq-of-rust`. So, one should be careful to only apply generated instances to fill the hole made by this tactic in case of failure. We believe this to be a reasonable assumption that we could enforce someday if needed.\\n\\nWe are also starting to remove the trait constraints on polymorphic functions (the `where` clauses). We start by doing it in our manual definition of the standard library of Rust. The rationale is that we can provide the actual trait instances at proof time by having the right hypothesis replicating the constraints of the `where` clauses. Having fewer `where` clauses reduces the complexity of the type inference of Coq on the generated code. There are still some cases that we need to clarify, for example, the handling of [associated types](https://doc.rust-lang.org/rust-by-example/generics/assoc_items/types.html) in the absence of traits.\\n\\n## Handling more of the standard library\\n\\nWe have a definition of the standard library of Rust, mainly composed of axiomatized[^1] definitions, in these three folders:\\n\\n- [CoqOfRust/alloc](https://github.com/formal-land/coq-of-rust/tree/main/CoqOfRust/alloc)\\n- [CoqOfRust/core](https://github.com/formal-land/coq-of-rust/tree/main/CoqOfRust/core)\\n- [CoqOfRust/std](https://github.com/formal-land/coq-of-rust/tree/main/CoqOfRust/std)\\n\\nBy adding more of these axioms, as well as with some small changes to the `coq-of-rust` tool, we are now able to successfully translate around 80% of the examples of the [Rust by Example](https://doc.rust-lang.org/stable/rust-by-example/) book. There can still be some challenges on larger programs, but this showcases the good support of `coq-of-rust` for the Rust language.\\n\\n## Conclusion\\n\\nWe are continuing to improve our tool `coq-of-rust` to support more of the Rust language and are making good progress. If you need to improve the security of critical applications written in Rust, contact us at [contact@formal.land](mailto:contact@formal.land) to start formally verifying your code!\\n\\n[^1]: An axiom in Coq is either a theorem whose proof is admitted, or a function/constant definition left for latter. This is the equivalent in Rust of the `todo!` macro."},{"id":"/2024/01/04/rust-translating-match","metadata":{"permalink":"/blog/2024/01/04/rust-translating-match","source":"@site/blog/2024-01-04-rust-translating-match.md","title":"Translating Rust match patterns to Coq with coq-of-rust","description":"Our tool coq-of-rust enables formal verification of \ud83e\udd80 Rust code to make sure that a program has no bugs. This technique checks all possible execution paths using mathematical techniques. This is important for example to ensure the security of smart contracts written in Rust language.","date":"2024-01-04T00:00:00.000Z","formattedDate":"January 4, 2024","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"Aleph-Zero","permalink":"/blog/tags/aleph-zero"}],"readingTime":6.005,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Translating Rust match patterns to Coq with coq-of-rust","tags":["coq-of-rust","Rust","Coq","Aleph-Zero"],"authors":[]},"unlisted":false,"prevItem":{"title":"Upgrade the Rust version of coq-of-rust","permalink":"/blog/2024/01/18/update-coq-of-rust"},"nextItem":{"title":"Verifying an ERC-20 smart contract in Rust","permalink":"/blog/2023/12/13/rust-verify-erc-20-smart-contract"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) enables [formal verification](https://en.wikipedia.org/wiki/Formal_verification) of [\ud83e\udd80 Rust](https://www.rust-lang.org/) code to make sure that a program has no bugs. This technique checks all possible execution paths using mathematical techniques. This is important for example to ensure the security of smart contracts written in Rust language.\\n\\nOur tool `coq-of-rust` works by translating Rust programs to the general proof system [\ud83d\udc13 Coq](https://coq.inria.fr/). Here we explain how we translate[ `match` patterns](https://doc.rust-lang.org/book/ch06-02-match.html) from Rust to Coq. The specificity of Rust patterns is to be able to match values either by value or reference.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nTo formally verify your Rust codebase and improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)! Formal verification is the only way to prevent all bugs by exploring all possible executions of your program.\\n\\n:::\\n\\n:::info Thanks\\n\\nThis work and the development of [coq-of-rust](https://github.com/formal-land/coq-of-rust) is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation, to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n![Rust rooster](2024-01-04/rust-rooster.png)\\n\\n## Rust example \ud83e\udd80\\n\\nTo illustrate the pattern matching in Rust, we will use the following example featuring a match by reference:\\n\\n```rust\\npub(crate) fn is_option_equal
(\\n is_equal: fn(x: &A, y: &A) -> bool,\\n lhs: Option,\\n rhs: &A,\\n) -> bool {\\n match lhs {\\n None => false,\\n Some(ref value) => is_equal(value, rhs),\\n }\\n}\\n```\\n\\nWe take a function `is_equal` as a parameter, operating only on references to the type `A`. We apply it to compare two values `lhs` and `rhs`:\\n\\n- if `lhs` is `None`, we return `false`,\\n- if `lhs` is `Some`, we get its value by reference and apply `is_equal`.\\n\\nWhen we apply the pattern:\\n\\n```rust\\nSome(ref value) => ...\\n```\\n\\nwe do something interesting: we read the value of `lhs` to know if we are in a `Some` case but leave it in place and return `value` the reference to its content.\\n\\nTo simulate this behavior in Coq, we need to match in two steps:\\n\\n1. match the value of `lhs` to know if we are in a `Some` case or not,\\n2. if we are in a `Some` case, create the reference to the content of a `Some` case based on the reference to `lhs`.\\n\\n## Coq translation \ud83d\udc13\\n\\nThe Coq translation that our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) generates is the following:\\n\\n```coq\\nDefinition is_option_equal\\n {A : Set}\\n (is_equal : (ref A) -> (ref A) -> M bool.t)\\n (lhs : core.option.Option.t A)\\n (rhs : ref A)\\n : M bool.t :=\\n let* is_equal := M.alloc is_equal in\\n let* lhs := M.alloc lhs in\\n let* rhs := M.alloc rhs in\\n let* \u03b10 : M.Val bool.t :=\\n match_operator\\n lhs\\n [\\n fun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.None => M.alloc false\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t);\\n fun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.Some _ =>\\n let \u03b30_0 := \u03b3.[\\"Some.0\\"] in\\n let* value := M.alloc (borrow \u03b30_0) in\\n let* \u03b10 : (ref A) -> (ref A) -> M bool.t := M.read is_equal in\\n let* \u03b11 : ref A := M.read value in\\n let* \u03b12 : ref A := M.read rhs in\\n let* \u03b13 : bool.t := M.call (\u03b10 \u03b11 \u03b12) in\\n M.alloc \u03b13\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t)\\n ] in\\n M.read \u03b10.\\n```\\n\\nWe run the `match_operator` on `lhs` and the two branches of the `match`. This operator is of type:\\n\\n```coq\\nDefinition match_operator {A B : Set}\\n (scrutinee : A)\\n (arms : list (A -> M B)) :\\n M B :=\\n ...\\n```\\n\\nIt takes a `scrutinee` value to match as a parameter, and runs a sequence of functions `arms` on it. Each function `arms` takes the value of the `scrutinee` and returns a monadic value `M B`. This monadic value can either be a success value if the pattern matches, or a special failure value if the pattern does not match. We evaluate the branches until one succeeds.\\n\\n### `None` branch\\n\\nThe `None` branch is the simplest one. We read the value at the address given by `lhs` (we represent each Rust variable by its address) and match it with the `None` constructor:\\n\\n```coq\\nfun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.None => M.alloc false\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t)\\n```\\n\\nIf it matches, we return `false`. If it does not, we return the special value `M.break_match` to indicate that the pattern does not match.\\n\\n### `Some` branch\\n\\nIn the `Some` branch, we first also read the value at the address given by `lhs` and match it with the `Some` constructor:\\n\\n```coq\\nfun \u03b3 =>\\n (let* \u03b10 := M.read \u03b3 in\\n match \u03b10 with\\n | core.option.Option.Some _ =>\\n let \u03b30_0 := \u03b3.[\\"Some.0\\"] in\\n let* value := M.alloc (borrow \u03b30_0) in\\n let* \u03b10 : (ref A) -> (ref A) -> M bool.t := M.read is_equal in\\n let* \u03b11 : ref A := M.read value in\\n let* \u03b12 : ref A := M.read rhs in\\n let* \u03b13 : bool.t := M.call (\u03b10 \u03b11 \u03b12) in\\n M.alloc \u03b13\\n | _ => M.break_match\\n end) :\\n M (M.Val bool.t)\\n```\\n\\nIf we are in that case, we create the value:\\n\\n```coq\\nlet \u03b30_0 := \u03b3.[\\"Some.0\\"] in\\n```\\n\\nwith the address of the first field of the `Some` constructor, relative to the address of `lhs` given in `\u03b3`. We define the operator `.[\\"Some.0\\"]` when we define the option type and generate such definitions for all user-defined enum types.\\n\\nWe then encapsulate the address `\u03b30_0` in a proper Rust reference:\\n\\n```coq\\nlet* value := M.alloc (borrow \u03b30_0) in\\n```\\n\\nof type `ref A` in the original Rust code. Finally, we call the function `is_equal` on the two references `value` and `rhs`, with some boilerplate code to read and allocate the variables.\\n\\n## General translation\\n\\nWe generalize this translation to all patterns by:\\n\\n- flattening all the or patterns `|` so that only patterns with a single choice remain,\\n- evaluating each match branch in order with the `match_operator` operator,\\n- in each branch, evaluating the inner patterns in order. This evaluation might fail at any point if the pattern does not match. In this case, we return the special value `M.break_match` and continue with the next branch.\\n\\nAt least one branch should succeed as the Rust compiler checks that all cases are covered. We still have a special value `M.impossible` in Coq for the case where no patterns match and satisfy the type checker.\\n\\nWe distinguish and handle the following kind of patterns (and all their combinations):\\n\\n- wild patterns `_`,\\n- binding patterns `(ref) name` or `(ref) name as pattern` (the `ref` keyword is optional),\\n- struct patterns `Name { field1: pattern1, ... }` or `Name(pattern1, ...)`\\n- tuple patterns `(pattern1, ...)`,\\n- literal patterns `12`, `true`, ...,\\n- slice patterns `[first, second, tail @ ..]`,\\n- dereference patterns `&pattern`.\\n\\nThis was enough to cover all of our examples. The Rust compiler can also automatically add some `ref` patterns when matching on references. We do not need to handle this case as this is automatically done by the Rust compiler during its compilation to the intermediate [THIR](https://rustc-dev-guide.rust-lang.org/thir.html) representation, and e directly read the THIR code.\\n\\n## Conclusion\\n\\nIn this blog post, we have presented how we translate Rust patterns to the proof system Coq. The difficult part is handling the `ref` patterns, which we do by matching in two steps: matching on the values and then computing the addresses of the sub-fields.\\n\\nIf you have Rust smart contracts or programs to verify, feel free to email us at [contact@formal.land](mailto:contact@formal.land). We will be happy to help!"},{"id":"/2023/12/13/rust-verify-erc-20-smart-contract","metadata":{"permalink":"/blog/2023/12/13/rust-verify-erc-20-smart-contract","source":"@site/blog/2023-12-13-rust-verify-erc-20-smart-contract.md","title":"Verifying an ERC-20 smart contract in Rust","description":"Our tool coq-of-rust enables formal verification of \ud83e\udd80 Rust code to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system \ud83d\udc13 Coq.","date":"2023-12-13T00:00:00.000Z","formattedDate":"December 13, 2023","tags":[{"label":"Aleph-Zero","permalink":"/blog/tags/aleph-zero"},{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"ERC-20","permalink":"/blog/tags/erc-20"},{"label":"ink!","permalink":"/blog/tags/ink"}],"readingTime":20.115,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Verifying an ERC-20 smart contract in Rust","tags":["Aleph-Zero","coq-of-rust","Rust","Coq","ERC-20","ink!"],"authors":[]},"unlisted":false,"prevItem":{"title":"Translating Rust match patterns to Coq with coq-of-rust","permalink":"/blog/2024/01/04/rust-translating-match"},"nextItem":{"title":"Translation of function bodies from Rust to Coq","permalink":"/blog/2023/11/26/rust-function-body"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) enables formal verification of [\ud83e\udd80 Rust](https://www.rust-lang.org/) code to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system [\ud83d\udc13 Coq](https://coq.inria.fr/).\\n\\nHere, we show how we formally verify an [ERC-20 smart contract](https://github.com/paritytech/ink/blob/master/integration-tests/erc20/lib.rs) written in Rust for the [Aleph Zero](https://alephzero.org/) blockchain. [ERC-20](https://en.wikipedia.org/wiki/Ethereum#ERC20) smart contracts are used to create new kinds of tokens in an existing blockchain. Examples are stablecoins such as the [\ud83d\udcb2USDT](https://tether.to/).\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nTo formally verify your Rust codebase and improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)! Formal verification is the only way to prevent all bugs by exploring all possible executions of your program.\\n\\n:::\\n\\n:::info Thanks\\n\\nThis work and the development of [coq-of-rust](https://github.com/formal-land/coq-of-rust) is made possible thanks to the [Aleph Zero](https://alephzero.org/)\'s Foundation, to develop an extra safe platform to build decentralized applications with formally verified smart contracts.\\n\\n:::\\n\\n![Rooster verifying](2023-12-13/rooster-verifying.png)\\n\\n## Smart contract code \ud83e\udd80\\n\\nHere is the Rust code of the smart contract that we want to verify:\\n\\n```rust\\n#[ink::contract]\\nmod erc20 {\\n use ink::storage::Mapping;\\n\\n #[ink(storage)]\\n #[derive(Default)]\\n pub struct Erc20 {\\n total_supply: Balance,\\n balances: Mapping,\\n allowances: Mapping<(AccountId, AccountId), Balance>,\\n }\\n\\n #[ink(event)]\\n pub struct Transfer {\\n // ...\\n }\\n\\n #[ink(event)]\\n pub struct Approval {\\n // ...\\n }\\n\\n #[derive(Debug, PartialEq, Eq)]\\n #[ink::scale_derive(Encode, Decode, TypeInfo)]\\n pub enum Error {\\n // ...\\n }\\n\\n pub type Result = core::result::Result;\\n\\n impl Erc20 {\\n #[ink(constructor)]\\n pub fn new(total_supply: Balance) -> Self {\\n let mut balances = Mapping::default();\\n let caller = Self::env().caller();\\n balances.insert(caller, &total_supply);\\n Self::env().emit_event(Transfer {\\n from: None,\\n to: Some(caller),\\n value: total_supply,\\n });\\n Self {\\n total_supply,\\n balances,\\n allowances: Default::default(),\\n }\\n }\\n\\n #[ink(message)]\\n pub fn total_supply(&self) -> Balance {\\n self.total_supply\\n }\\n\\n #[ink(message)]\\n pub fn balance_of(&self, owner: AccountId) -> Balance {\\n self.balance_of_impl(&owner)\\n }\\n\\n #[inline]\\n fn balance_of_impl(&self, owner: &AccountId) -> Balance {\\n self.balances.get(owner).unwrap_or_default()\\n }\\n\\n #[ink(message)]\\n pub fn allowance(&self, owner: AccountId, spender: AccountId) -> Balance {\\n self.allowance_impl(&owner, &spender)\\n }\\n\\n #[inline]\\n fn allowance_impl(&self, owner: &AccountId, spender: &AccountId) -> Balance {\\n self.allowances.get((owner, spender)).unwrap_or_default()\\n }\\n\\n #[ink(message)]\\n pub fn transfer(&mut self, to: AccountId, value: Balance) -> Result<()> {\\n let from = self.env().caller();\\n self.transfer_from_to(&from, &to, value)\\n }\\n\\n #[ink(message)]\\n pub fn approve(&mut self, spender: AccountId, value: Balance) -> Result<()> {\\n let owner = self.env().caller();\\n self.allowances.insert((&owner, &spender), &value);\\n self.env().emit_event(Approval {\\n owner,\\n spender,\\n value,\\n });\\n Ok(())\\n }\\n\\n #[ink(message)]\\n pub fn transfer_from(\\n &mut self,\\n from: AccountId,\\n to: AccountId,\\n value: Balance,\\n ) -> Result<()> {\\n let caller = self.env().caller();\\n let allowance = self.allowance_impl(&from, &caller);\\n if allowance < value {\\n return Err(Error::InsufficientAllowance)\\n }\\n self.transfer_from_to(&from, &to, value)?;\\n // We checked that allowance >= value\\n #[allow(clippy::arithmetic_side_effects)]\\n self.allowances\\n .insert((&from, &caller), &(allowance - value));\\n Ok(())\\n }\\n\\n fn transfer_from_to(\\n &mut self,\\n from: &AccountId,\\n to: &AccountId,\\n value: Balance,\\n ) -> Result<()> {\\n let from_balance = self.balance_of_impl(from);\\n if from_balance < value {\\n return Err(Error::InsufficientBalance)\\n }\\n // We checked that from_balance >= value\\n #[allow(clippy::arithmetic_side_effects)]\\n self.balances.insert(from, &(from_balance - value));\\n let to_balance = self.balance_of_impl(to);\\n self.balances\\n .insert(to, &(to_balance.checked_add(value).unwrap()));\\n self.env().emit_event(Transfer {\\n from: Some(*from),\\n to: Some(*to),\\n value,\\n });\\n Ok(())\\n }\\n }\\n}\\n```\\n\\nThis whole code is rather short and contains no loops, which will simplify our verification process. It uses a lot of macros, such as `#[ink(message)]`, that are specific to the [ink!](https://use.ink/) language for smart contracts, built on top of Rust. To verify this smart contract, we removed all the macros and added a mock of the dependencies, such as `ink::storage::Mapping` to get a map data structure.\\n\\n## The Coq translation \ud83d\udc13\\n\\nBy running our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) we automatically obtain the corresponding Coq code for the contract [erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/erc20.v). Here is an extract for the `transfer` function:\\n\\n```coq\\n(*\\n fn transfer(&mut self, to: AccountId, value: Balance) -> Result<()> {\\n let from = self.env().caller();\\n self.transfer_from_to(&from, &to, value)\\n }\\n*)\\nDefinition transfer\\n (self : mut_ref ltac:(Self))\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance))\\n : M ltac:(erc20.Result unit) :=\\n let* self : M.Val (mut_ref ltac:(Self)) := M.alloc self in\\n let* to : M.Val erc20.AccountId.t := M.alloc to in\\n let* value : M.Val ltac:(erc20.Balance) := M.alloc value in\\n let* from : M.Val erc20.AccountId.t :=\\n let* \u03b10 : mut_ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : erc20.Env.t :=\\n M.call (erc20.Erc20.t::[\\"env\\"] (borrow (deref \u03b10))) in\\n let* \u03b12 : M.Val erc20.Env.t := M.alloc \u03b11 in\\n let* \u03b13 : erc20.AccountId.t :=\\n M.call (erc20.Env.t::[\\"caller\\"] (borrow \u03b12)) in\\n M.alloc \u03b13 in\\n let* \u03b10 : mut_ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : u128.t := M.read value in\\n let* \u03b12 : core.result.Result.t unit erc20.Error.t :=\\n M.call\\n (erc20.Erc20.t::[\\"transfer_from_to\\"] \u03b10 (borrow from) (borrow to) \u03b11) in\\n let* \u03b10 : M.Val (core.result.Result.t unit erc20.Error.t) := M.alloc \u03b12 in\\n M.read \u03b10.\\n```\\n\\nMore details of the translation are given in previous blog posts, but basically:\\n\\n- we make explicit all memory and implicit operations (like borrowing and dereferencing),\\n- we apply a monadic translation to chain the primitive operations with `let*`.\\n\\n## Proof strategy\\n\\n![Proof strategy](2023-12-13/proof-strategy.png)\\n\\nWe verify the code in two steps:\\n\\n1. Show that a simpler, purely functional Coq code can simulate all the smart contract code.\\n2. Show that the simulation is correct.\\n\\nThat way, we can eliminate all the memory-related operations by showing the equivalence with a simulation. Then, we can focus on the functional code, which is more straightforward to reason about. We can cite another project, [Aeneas](https://github.com/AeneasVerif/aeneas), which proposes to do the first step (removing memory operations) automatically.\\n\\n## Simulations\\n\\n### Simulation code\\n\\nWe will work on the example of the `transfer` function. We define the simulations in [Simulations/erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/Simulations/erc20.v). For the `transfer` function this is:\\n\\n```coq\\nDefinition transfer\\n (env : erc20.Env.t)\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n MS? State.t ltac:(erc20.Result unit) :=\\n transfer_from_to (Env.caller env) to value.\\n```\\n\\nThe function `transfer` is a wrapper around `transfer_from_to`, using the smart contract caller as the `from` account. The monad `MS?` combines the state and error effect. The state is given by the `State.t` type:\\n\\n```coq\\nModule State.\\n Definition t : Set := erc20.Erc20.t * list erc20.Event.t.\\nEnd State.\\n```\\n\\nIt combines the state of the contract (type `Self` in the Rust code) and a list of events to represent the logs. The errors of the monad include panic errors, as well as control flow primitives such as `return` or `break` that we implement with exceptions.\\n\\n### Equivalence statement\\n\\nWe write all our proofs in [Proofs/erc20.v](https://github.com/formal-land/coq-of-rust/blob/main/CoqOfRust/examples/default/examples/ink_contracts/Proofs/erc20.v). The lemma stating that the simulation is equivalent to the original code is:\\n\\n```coq\\nLemma run_transfer\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance))\\n (H_storage : Erc20.Valid.t storage)\\n (H_value : Integer.Valid.t value) :\\n let state := State.of_storage storage in\\n let self := Ref.mut_ref Address.storage in\\n let simulation :=\\n lift_simulation\\n (Simulations.erc20.transfer env to value) storage in\\n {{ Environment.of_env env, state |\\n erc20.Impl_erc20_Erc20_t_2.transfer self to value \u21d3\\n simulation.(Output.result)\\n | simulation.(Output.state) }}.\\n```\\n\\nThe main predicate is:\\n\\n```coq\\n{{ env, state | translated_code \u21d3 result | final_state }}.\\n```\\n\\nThis predicate defines our semantics, explaining how to evaluate a translated Rust code in an environment `env` and a state `state`, to obtain a result `result` and a final state `final_state`. We use an environment in addition to a state to initialize various globals and other information related to the execution context. For example, here, we use the environment to store the `caller` of the contract and the pointer to the list of logs.\\n\\n### Semantics\\n\\nWe define our monad for the translated code `M A` in a style by continuation:\\n\\n```coq\\nInductive t (A : Set) : Set :=\\n| Pure : A -> t A\\n| CallPrimitive {B : Set} : Primitive.t B -> (B -> t A) -> t A\\n| Cast {B1 B2 : Set} : B1 -> (B2 -> t A) -> t A\\n| Impossible : t A.\\nArguments Pure {_}.\\nArguments CallPrimitive {_ _}.\\nArguments Cast {_ _ _}.\\nArguments Impossible {_}.\\n```\\n\\nFor now, we use the primitives to access the memory and the environment:\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set -> Set :=\\n | StateAlloc {A : Set} : A -> t (Ref.t A)\\n | StateRead {Address A : Set} : Address -> t A\\n | StateWrite {Address A : Set} : Address -> A -> t unit\\n | EnvRead {A : Set} : t A.\\nEnd Primitive.\\n```\\n\\nFor each of our monad constructs, we add a case to our evaluation predicate that we will describe:\\n\\n- `Pure` The result is the value itself, and the state is unchanged:\\n ```coq\\n | Pure :\\n {{ env, state\' | LowM.Pure result \u21d3 result | state\' }}\\n ```\\n- `Cast` The evaluation is only possible when `B1` and `B2` are the same type `B`:\\n ```coq\\n | Cast {B : Set} (state : State) (v : B) (k : B -> LowM A) :\\n {{ env, state | k v \u21d3 result | state\' }} ->\\n {{ env, state | LowM.Cast v k \u21d3 result | state\' }}\\n ```\\n In this case, we return the result of the continuation `k` of the cast. We do not change the state in the cast.\\n- We read the state using the primitive `State.read`, checking that the `address` is indeed allocated (it returns `None` otherwise). Note that the type of `v` depends on its address. We directly allocate values with their original type, to avoid serializations/deserializations to represent the state.\\n ```coq\\n | CallPrimitiveStateRead\\n (address : Address) (v : State.get_Set address)\\n (state : State)\\n (k : State.get_Set address -> LowM A) :\\n State.read address state = Some v ->\\n {{ env, state | k v \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateRead address) k \u21d3 result\\n | state\' }}\\n ```\\n- Similarly, we write into the state with `State.alloc_write`, that only succeeds for allocated addresses:\\n ```coq\\n | CallPrimitiveStateWrite\\n (address : Address) (v : State.get_Set address)\\n (state state_inter : State)\\n (k : unit -> LowM A) :\\n State.alloc_write address state v = Some state_inter ->\\n {{ env, state_inter | k tt \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateWrite address v) k \u21d3 result\\n | state\' }}\\n ```\\n- To allocate a new value in memory, we have to make a choice depending on whether we want this value to be writable or not. For immutable values, we do not create a new address and instead say that the address is the value itself:\\n ```coq\\n | CallPrimitiveStateAllocNone {B : Set}\\n (state : State) (v : B)\\n (k : Ref B -> LowM A) :\\n {{ env, state | k (Ref.Imm v) \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateAlloc v) k \u21d3 result\\n | state\' }}\\n ```\\n If we later attempt to update this value, it will not be possible to define a semantics and we will be stuck. It is up to the user to correctly anticipate if a value will be updated or not to define the semantics. For values that might be updated, we use:\\n ```coq\\n | CallPrimitiveStateAllocSome\\n (address : Address) (v : State.get_Set address)\\n (state : State)\\n (k : Ref (State.get_Set address) -> LowM A) :\\n let r :=\\n Ref.MutRef (A := State.get_Set address) (B := State.get_Set address)\\n address (fun full_v => full_v) (fun v _full_v => v) in\\n State.read address state = None ->\\n State.alloc_write address state v = Some state\' ->\\n {{ env, state | k r \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive (Primitive.StateAlloc v) k \u21d3 result\\n | state\' }}\\n ```\\n We need to provide an address not already allocated: `State.read` should return `None`. At this point, we can make any choice of unallocated address in order to simplify the proofs later.\\n- Finally, we read the whole environment with:\\n ```coq\\n | CallPrimitiveEnvRead\\n (state : State) (k : Env -> LowM A) :\\n {{ env, state | k env \u21d3 result | state\' }} ->\\n {{ env, state |\\n LowM.CallPrimitive Primitive.EnvRead k \u21d3 result\\n | state\' }}\\n ```\\n\\n### Semantics remarks\\n\\nWe can make a few remarks about our semantics:\\n\\n- There are no cases for `M.Impossible` as this primitive corresponds to impossible branches in the code.\\n- The semantics is not computable, in the sense that we cannot define a function `run` to evaluate a monadic program in a certain environment and state. Indeed, the user needs to make a choice during the allocation of new values, to know if we allocate the value as immutable or mutable, and with which address. The `M.Cast` operator is also not computable, as we cannot decide if two types are equal.\\n- We can choose the type that we use for the `State`, as well as the primitives `State.read` and `State.alloc_write`, as long as they verify well-formedness properties. For example, reading after a write at the same address should return the written value. One should choose a `State` that simplifies its proofs the most. To verify the smart contract, we have taken a record with two fields:\\n 1. the storage of the contract (the `Self` type in Rust),\\n 2. the list of events logged by the contract.\\n- Even if the monad is in continuation-passing style, we add a primitive `M.Call` corresponding to a bind, to explicit the points in the code where we call user-defined functions. This is not necessary but helpful to track things in the proofs. Otherwise, the monadic bind is defined as a fixpoint with:\\n ```coq\\n Fixpoint bind {A B : Set} (e1 : t A) (f : A -> t B) : t B :=\\n match e1 with\\n | Pure v => f v\\n | CallPrimitive primitive k =>\\n CallPrimitive primitive (fun v => bind (k v) f)\\n | Cast v k =>\\n Cast v (fun v\' => bind (k v\') f)\\n | Impossible => Impossible\\n end.\\n ```\\n- To handle the panic and `return`/`break` exceptions, we wrap our monad into an error monad:\\n ```coq\\n Definition M (A : Set) : Set :=\\n LowM (A + Exception.t).\\n ```\\n where `LowM` is the monad without errors as defined above and `Exception.t` is:\\n ```coq\\n Module Exception.\\n Inductive t : Set :=\\n (** exceptions for Rust\'s `return` *)\\n | Return {A : Set} : A -> t\\n (** exceptions for Rust\'s `continue` *)\\n | Continue : t\\n (** exceptions for Rust\'s `break` *)\\n | Break : t\\n | Panic : Coq.Strings.String.string -> t.\\n End Exception.\\n ```\\n\\n### Proof of equivalence\\n\\nTo prove that the equivalence between the simulation and the original code holds, we proceed by induction on the monadic code. This corresponds to symbolically evaluating the monadic code, in the proof mode of Coq, applying the primitives of the semantics predicate at each step. We use the following tactic to automate this work:\\n\\n```coq\\nrun_symbolic.\\n```\\n\\nWe manually handle the following cases:\\n\\n- branching (`if` or `match`),\\n- external function calls: generally, we apply an existing equivalence proof for a call to another function instead of doing the symbolic evaluation of the function,\\n- memory allocations: we need to choose the type of allocation (mutable or immutable) and the address of the allocation for mutable ones.\\n\\nHere is the proof for the `transfer` function:\\n\\n```coq\\nProof.\\n unfold erc20.Impl_erc20_Erc20_t_2.transfer,\\n Simulations.erc20.transfer,\\n lift_simulation.\\n Opaque erc20.transfer_from_to.\\n run_symbolic.\\n eapply Run.Call. {\\n apply run_env.\\n }\\n run_symbolic.\\n eapply Run.Call. {\\n apply Env.run_caller.\\n }\\n run_symbolic.\\n eapply Run.Call. {\\n now apply run_transfer_from_to.\\n }\\n unfold lift_simulation.\\n destruct erc20.transfer_from_to as [[] [?storage ?logs]]; run_symbolic.\\n Transparent erc20.transfer_from_to.\\nQed.\\n```\\n\\n## Proofs\\n\\n### Handling of integers\\n\\nWe distinguish the various types of integers used in Rust:\\n\\n- unsigned ones: `u8`, `u16`, `u32`, `u64`, `u128`, `usize`,\\n- signed ones: `i8`, `i16`, `i32`, `i64`, `i128`, `isize`.\\n\\nWe define a separate type for each of them, that is to say, a wrapper around the `Z` type of unbounded integers from Coq:\\n\\n```coq\\nModule u8.\\n Inductive t : Set := Make (z : Z) : t.\\nEnd u8.\\n```\\n\\nTo enforce the bounds, we define a validity predicate for each type:\\n\\n```coq\\nModule Valid.\\n Definition t {A : Set} `{Integer.C A} (v : A) : Prop :=\\n Integer.min <= Integer.to_Z v <= Integer.max.\\nEnd Valid.\\n```\\n\\nAll integer types are of the class `Integer.C` with a `min`, `max`, and `to_Z` functions. We do not embed this predicate with the integer type ([refinement type](https://en.wikipedia.org/wiki/Refinement_type)) to avoid mixing proofs and code. We pay a cost by having to handle the values and the validity proofs separately.\\n\\nDepending on the configuration mode of Rust, integer operations can overflow or panic. We have several implementations of the arithmetic operations, depending on the mode:\\n\\n```coq\\nModule BinOp.\\n (** Operators with panic, in the monad. *)\\n Module Panic.\\n Definition add {A : Set} `{Integer.C A} (v1 v2 : A) : M A :=\\n (* ... *)\\n\\n Definition sub (* ... *)\\n End Panic.\\n\\n (** Operators with overflow, outside of the monad as\\n there cannot be any errors. *)\\n Module Wrap.\\n Definition add {A : Set} `{Integer.C A} (v1 v2 : A) : A :=\\n (* ... *)\\n\\n Definition sub (* ... *)\\n End Wrap.\\nEnd BinOp.\\n```\\n\\nWe also have additional operators, useful for the definition of simulations:\\n\\n- optimistic operators, operating on `Z` without checking the bounds of the result (for cases where we can prove that the result is never out of bounds),\\n- operators returning in the option monad, to handle the case where the result is out of bounds.\\n\\nNote that the comparison operators (`=`, `<`, ...) never panic or overflow. In the context of these smart contracts, the arithmetic operators are panicking in case of overflow.\\n\\n### Definition of messages\\n\\nWe can call the smart contract with three read primitives (`total_supply`, `balance_of`, `allowance`) and three write primitives (`transfer`, `approve`, `transfer_from`). We define two message types to formalize these access points. This will later allow us to express properties over all possible read and write messages:\\n\\n```coq\\nModule ReadMessage.\\n (** The type parameter is the type of result of the call. *)\\n Inductive t : Set -> Set :=\\n | total_supply :\\n t ltac:(erc20.Balance)\\n | balance_of\\n (owner : erc20.AccountId.t) :\\n t ltac:(erc20.Balance)\\n | allowance\\n (owner : erc20.AccountId.t)\\n (spender : erc20.AccountId.t) :\\n t ltac:(erc20.Balance).\\nEnd ReadMessage.\\n\\nModule WriteMessage.\\n Inductive t : Set :=\\n | transfer\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n t\\n | approve\\n (spender : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n t\\n | transfer_from\\n (from : erc20.AccountId.t)\\n (to : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n t.\\nEnd WriteMessage.\\n```\\n\\n### No panics on read messages\\n\\nWe show that for all possible read messages, the smart contract does not panic:\\n\\n```coq\\nLemma read_message_no_panic\\n (env : erc20.Env.t)\\n (message : ReadMessage.t ltac:(erc20.Balance))\\n (storage : erc20.Erc20.t) :\\n let state := State.of_storage storage in\\n exists result,\\n {{ Environment.of_env env, state |\\n ReadMessage.dispatch message \u21d3\\n (* [inl] means success (no panics) *)\\n inl result\\n | state }}.\\n```\\n\\nThis is done by symbolic evaluation of the simulations:\\n\\n```coq\\nProof.\\n destruct message; simpl.\\n { eexists.\\n apply run_total_supply.\\n }\\n { eexists.\\n apply run_balance_of.\\n }\\n { eexists.\\n apply run_allowance.\\n }\\nQed.\\n```\\n\\n### Invariants\\n\\nThe data structure of the storage of the smart contract is as follows:\\n\\n```rust\\npub struct Erc20 {\\n total_supply: Balance,\\n balances: Mapping,\\n allowances: Mapping<(AccountId, AccountId), Balance>,\\n}\\n```\\n\\nAn invariant is that the total supply is always equal to the sum of all the balances in the mapping `Mapping`. We define this invariant in Coq as:\\n\\n```coq\\nDefinition sum_of_money (storage : erc20.Erc20.t) : Z :=\\n Lib.Mapping.sum Integer.to_Z storage.(erc20.Erc20.balances).\\n\\nModule Valid.\\n Definition t (storage : erc20.Erc20.t) : Prop :=\\n Integer.to_Z storage.(erc20.Erc20.total_supply) =\\n sum_of_money storage.\\nEnd Valid.\\n```\\n\\nWe show that this invariant holds for any output of the write messages, given that it holds for the input storage:\\n\\n```coq\\nLemma write_dispatch_is_valid\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (write_message : WriteMessage.t)\\n (H_storage : Erc20.Valid.t storage)\\n (H_write_message : WriteMessage.Valid.t write_message) :\\n let state := State.of_storage storage in\\n let \'(result, (storage, _)) :=\\n WriteMessage.simulation_dispatch env write_message (storage, []) in\\n match result with\\n | inl _ => Erc20.Valid.t storage\\n | _ => True\\n end.\\n```\\n\\nWe assume that the initial storage is valid with the hypothesis:\\n\\n```coq\\n(H_storage : Erc20.Valid.t storage)\\n```\\n\\nWe show the property in the case without panics with:\\n\\n```coq\\nmatch result with\\n | inl _ => ...\\n```\\n\\nWhen the smart contract panics (integer overflow), the storage is discarded anyways, and it might actually by invalid. For example, in the `transfer_from_to` function we have:\\n\\n```rust\\nself.balances.insert(*from, from_balance - value);\\nlet to_balance = self.balance_of_impl(to);\\nself.balances.insert(*to, to_balance + value);\\n```\\n\\nSo if there is a panic during the addition `+`, like an overflow, the final storage can have the `from` account modified but not the `to` account. So here, the balance sum is no longer equal to the total supply.\\n\\n### Total supply is constant\\n\\nWe show that the total supply is also a constant, meaning that no calls to the smart contract can modify its value. The statement is the following:\\n\\n```coq\\nLemma write_dispatch_is_constant\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (write_message : WriteMessage.t) :\\n let state := State.of_storage storage in\\n let \'(result, (storage\', _)) :=\\n WriteMessage.simulation_dispatch env write_message (storage, []) in\\n match result with\\n | inl _ =>\\n storage.(erc20.Erc20.total_supply) =\\n storage\'.(erc20.Erc20.total_supply)\\n | _ => True\\n end.\\n```\\n\\nIt says that for any initial `storage` and `write_message` sent to the smart contract, if we return a result without panicking (`inl _`), then the total supply in the final storage `storage\'` is equal to the initial one. We verify this fact by symbolic evaluation of all the branches of the simulation. There are no difficulties in this proof as the code never modifies the `total_supply`.\\n\\n### Action from the logs\\n\\nWe infer the action of the smart contract on the storage from its logs. This characterizes exactly what we modifications we can deduce on the storage from the logs. We define an action as a function from the storage to a set of possible new storages, given the knowledge of the logs of the contract:\\n\\n```coq\\nModule Action.\\n Definition t : Type := erc20.Erc20.t -> erc20.Erc20.t -> Prop.\\nEnd Action.\\n```\\n\\nThe main statement is the following:\\n\\n```coq\\nLemma retrieve_action_from_logs\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (write_message : WriteMessage.t)\\n (events : list erc20.Event.t) :\\n match\\n WriteMessage.simulation_dispatch env write_message (storage, [])\\n with\\n | (inl (result.Result.Ok tt), (storage\', events)) =>\\n action_of_events events storage storage\'\\n | _ => True\\n end.\\n```\\n\\nThis relates the final storage `storage\'` to the initial storage `storage` using the logs `events` when there are no panics. We define the `action_of_events` predicate as the successive application of the `action_of_event` predicate, which is defined as:\\n\\n```coq\\nDefinition action_of_event (event : erc20.Event.t) : Action.t :=\\n fun storage storage\' =>\\n match event with\\n | erc20.Event.Transfer (erc20.Transfer.Build_t\\n (option.Option.Some from)\\n (option.Option.Some to)\\n value\\n ) =>\\n (* In case of transfer event, we do not know how the allowances are\\n updated. *)\\n exists allowances\',\\n storage\' =\\n storage <|\\n erc20.Erc20.balances := balances_of_transfer storage from to value\\n |> <|\\n erc20.Erc20.allowances := allowances\'\\n |>\\n | erc20.Event.Transfer (erc20.Transfer.Build_t _ _ _) => False\\n | erc20.Event.Approval (erc20.Approval.Build_t owner spender value) =>\\n storage\' =\\n storage <|\\n erc20.Erc20.allowances :=\\n Lib.Mapping.insert (owner, spender) value\\n storage.(erc20.Erc20.allowances)\\n |>\\n end.\\n```\\n\\nWhen the `event` in the logs is of kind `erc20.Event.Transfer`, the resulting storage has:\\n\\n- the `balances` updated according to the function `balances_of_transfer`;\\n- the `allowances` updated to an unknown value `allowances\'`.\\n\\nWhen the `event` in the logs is of kind `erc20.Event.Approval`, the resulting storage has:\\n\\n- the `allowances` updated calling `Lib.Mapping.insert` on `(owner, spender)`;\\n- the `balances` unchanged.\\n\\n### Approve only on caller\\n\\nWe added one last proof to say that when the `approve` function succeeds, it only modifies the allowance of the caller:\\n\\n```coq\\nLemma approve_only_changes_owner_allowance\\n (env : erc20.Env.t)\\n (storage : erc20.Erc20.t)\\n (spender : erc20.AccountId.t)\\n (value : ltac:(erc20.Balance)) :\\n let \'(result, (storage\', _)) :=\\n Simulations.erc20.approve env spender value (storage, []) in\\n match result with\\n | inl (result.Result.Ok tt) =>\\n forall owner spender,\\n Integer.to_Z (Simulations.erc20.allowance storage\' owner spender) <>\\n Integer.to_Z (Simulations.erc20.allowance storage owner spender) ->\\n owner = Simulations.erc20.Env.caller env\\n | _ => True\\n end.\\n```\\n\\nIf an allowance changes after the call to `approve`, then the owner of the allowance is the caller of the smart contract. This is done by symbolic evaluation of the simulation.\\n\\n## Conclusion\\n\\nIn this example, we have shown how we formally verify the ERC-20 smart contract written in Rust for the [Aleph Zero](https://alephzero.org/) project. Formally verifying smart contracts is extremely important as they can hold a lot of money, and a single bug can prove fatal as recent attacks continue to show: [List of crypto hacks in 2023](https://www.ccn.com/education/crypto-hacks-2023-full-list-of-scams-and-exploits-as-millions-go-missing/).\\n\\nIf you have Rust smart contracts to verify, feel free to email us at [contact@formal.land](mailto:contact@formal.land). We will be happy to help!"},{"id":"/2023/11/26/rust-function-body","metadata":{"permalink":"/blog/2023/11/26/rust-function-body","source":"@site/blog/2023-11-26-rust-function-body.md","title":"Translation of function bodies from Rust to Coq","description":"Our tool coq-of-rust enables formal verification of \ud83e\udd80 Rust code, to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system \ud83d\udc13 Coq.","date":"2023-11-26T00:00:00.000Z","formattedDate":"November 26, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"}],"readingTime":4.975,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Translation of function bodies from Rust to Coq","tags":["coq-of-rust","Rust","Coq"],"authors":[]},"unlisted":false,"prevItem":{"title":"Verifying an ERC-20 smart contract in Rust","permalink":"/blog/2023/12/13/rust-verify-erc-20-smart-contract"},"nextItem":{"title":"Optimizing Rust translation to Coq with THIR and bundled traits","permalink":"/blog/2023/11/08/rust-thir-and-bundled-traits"}},"content":"Our tool [coq-of-rust](https://github.com/formal-land/coq-of-rust) enables formal verification of [\ud83e\udd80 Rust](https://www.rust-lang.org/) code, to make sure that a program has no bugs given a precise specification. We work by translating Rust programs to the general proof system [\ud83d\udc13 Coq](https://coq.inria.fr/).\\n\\nHere, we present how we translate function bodies from Rust to Coq in an example. We also show some of the optimizations we made to reduce the size of the translation.\\n\\n\x3c!-- truncate --\x3e\\n\\n:::tip Purchase\\n\\nIf you need to formally verify your Rust codebase to improve the security of your application, email us at [contact@formal.land](mailto:contact@formal.land)!\\n\\n:::\\n\\n![Rust and Coq](2023-11-26/rust_and_coq.png)\\n\\n## Translating a function body\\n\\nWe take the following Rust example as input:\\n\\n```rust\\n// fn balance_of_impl(&self, owner: &AccountId) -> Balance { ... }\\n\\nfn balance_of(&self, owner: AccountId) -> Balance {\\n self.balance_of_impl(&owner)\\n}\\n```\\n\\nHere is the corresponding Coq code that `coq-of-rust` generates _without optimizations_:\\n\\n```coq\\nDefinition balance_of\\n (self : ref ltac:(Self))\\n (owner : erc20.AccountId.t)\\n : M ltac:(erc20.Balance) :=\\n let* self : M.Val (ref ltac:(Self)) := M.alloc self in\\n let* owner : M.Val erc20.AccountId.t := M.alloc owner in\\n let* \u03b10 : ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : M.Val erc20.Erc20.t := deref \u03b10 in\\n let* \u03b12 : ref erc20.Erc20.t := borrow \u03b11 in\\n let* \u03b13 : M.Val (ref erc20.Erc20.t) := M.alloc \u03b12 in\\n let* \u03b14 : ref erc20.Erc20.t := M.read \u03b13 in\\n let* \u03b15 : ref erc20.AccountId.t := borrow owner in\\n let* \u03b16 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b15 in\\n let* \u03b17 : ref erc20.AccountId.t := M.read \u03b16 in\\n let* \u03b18 : M.Val erc20.AccountId.t := deref \u03b17 in\\n let* \u03b19 : ref erc20.AccountId.t := borrow \u03b18 in\\n let* \u03b110 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b19 in\\n let* \u03b111 : ref erc20.AccountId.t := M.read \u03b110 in\\n let* \u03b112 : u128.t := erc20.Erc20.t::[\\"balance_of_impl\\"] \u03b14 \u03b111 in\\n let* \u03b113 : M.Val u128.t := M.alloc \u03b112 in\\n M.read \u03b113.\\n```\\n\\nThis code is much more verbose than the original Rust code as we make all pointer manipulations explicit. We will see just after how to simplify it. We start with the function declaration:\\n\\n```coq\\nDefinition balance_of\\n (self : ref ltac:(Self))\\n (owner : erc20.AccountId.t)\\n : M ltac:(erc20.Balance) :=\\n```\\n\\nthat repeats the parameters in the Rust source. Note that the final result is wrapped into the monad type `M`. This is a monad representing all the side-effects used in Rust programs (state, panic, non-termination, ...). Then, we allocate all the function parameters:\\n\\n```coq\\n let* self : M.Val (ref ltac:(Self)) := M.alloc self in\\n let* owner : M.Val erc20.AccountId.t := M.alloc owner in\\n```\\n\\nThis ensures that both `self` and `owner` have an address in memory, in case we borrow them later. This allocation is also fresh, so we cannot access the address of the values from the caller by mistake. We use the monadic let `let*` as allocations can modify the memory state.\\n\\nThen we start by the body of the function itself. We do all the necessary pointer manipulations to compute the parameters `self` and `&owner` of the function `balance_of_impl`. These representations are directly taken from the abstract syntax tree of the Rust compiler (using the [THIR](https://rustc-dev-guide.rust-lang.org/thir.html) version).\\n\\nFor example, for the first parameter `self`, named `\u03b14` in this translation, we do:\\n\\n```coq\\n let* \u03b10 : ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : M.Val erc20.Erc20.t := deref \u03b10 in\\n let* \u03b12 : ref erc20.Erc20.t := borrow \u03b11 in\\n let* \u03b13 : M.Val (ref erc20.Erc20.t) := M.alloc \u03b12 in\\n let* \u03b14 : ref erc20.Erc20.t := M.read \u03b13 in\\n```\\n\\nWe combine the operators:\\n\\n- `M.read`: to get a value of type `A` from a value with an address `M.Val`,\\n- `deref`: to get the value with an address `M.Val A` pointed by a reference `ref A`,\\n- `borrow`: to get the reference `ref A` to a value with an address `M.Val A`,\\n- `M.alloc`: to allocate a new value `A` in memory, returning a value with address `M.Val A`.\\n\\nWe do the same to compute the second parameter `&owner` of `balance_of_impl` with:\\n\\n```coq\\n let* \u03b15 : ref erc20.AccountId.t := borrow owner in\\n let* \u03b16 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b15 in\\n let* \u03b17 : ref erc20.AccountId.t := M.read \u03b16 in\\n let* \u03b18 : M.Val erc20.AccountId.t := deref \u03b17 in\\n let* \u03b19 : ref erc20.AccountId.t := borrow \u03b18 in\\n let* \u03b110 : M.Val (ref erc20.AccountId.t) := M.alloc \u03b19 in\\n let* \u03b111 : ref erc20.AccountId.t := M.read \u03b110 in\\n```\\n\\nFinally, we call the `balance_of_impl` function and return the result:\\n\\n```coq\\n let* \u03b112 : u128.t := erc20.Erc20.t::[\\"balance_of_impl\\"] \u03b14 \u03b111 in\\n let* \u03b113 : M.Val u128.t := M.alloc \u03b112 in\\n M.read \u03b113.\\n```\\n\\nWe do not keep the address of the result, as it will be allocated again by the caller function.\\n\\n## Optimizations\\n\\nSome operations can always be removed, namely:\\n\\n- `M.read (M.alloc v) ==> v`: we do not need to allocate and give an address to a value if it will be immediately read,\\n- `deref (borrow v) ==> v` and `borrow (deref v) ==> v`: the borrowing and dereferencing operators are doing the opposite, so they cancel each other. We need to be careful of the mutability status of the borrowing and dereferencing.\\n\\nApplying these simple simplification rules, we get the following slimed-down translation:\\n\\n```coq\\nDefinition balance_of\\n (self : ref ltac:(Self))\\n (owner : erc20.AccountId.t)\\n : M ltac:(erc20.Balance) :=\\n let* self : M.Val (ref ltac:(Self)) := M.alloc self in\\n let* owner : M.Val erc20.AccountId.t := M.alloc owner in\\n let* \u03b10 : ref erc20.Erc20.t := M.read self in\\n let* \u03b11 : ref erc20.AccountId.t := borrow owner in\\n erc20.Erc20.t::[\\"balance_of_impl\\"] \u03b10 \u03b11.\\n```\\n\\nThis is much shorter and easier to verify!\\n\\n## Conclusion\\n\\nWe have illustrated in an example how we translate a simple function from Rust to Coq. In this example, we saw how the pointer operations are made explicit in the abstract syntax tree of Rust, and how we simplify them for the frequent cases.\\n\\nIf you have any comments or suggestions, feel free to email us at [contact@formal.land](mailto:contact@formal.land). In future posts, we will go into more detail about the verification process itself."},{"id":"/2023/11/08/rust-thir-and-bundled-traits","metadata":{"permalink":"/blog/2023/11/08/rust-thir-and-bundled-traits","source":"@site/blog/2023-11-08-rust-thir-and-bundled-traits.md","title":"Optimizing Rust translation to Coq with THIR and bundled traits","description":"We continued our work on coq-of-rust, a tool to formally verify Rust programs using the proof system Coq \ud83d\udc13. This tool translates Rust programs to an equivalent Coq program, which can then be verified using Coq\'s proof assistant. It opens the door to building mathematically proven bug-free Rust programs.","date":"2023-11-08T00:00:00.000Z","formattedDate":"November 8, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"trait","permalink":"/blog/tags/trait"},{"label":"THIR","permalink":"/blog/tags/thir"},{"label":"HIR","permalink":"/blog/tags/hir"}],"readingTime":5.22,"hasTruncateMarker":true,"authors":[{"name":"Guillaume Claret"}],"frontMatter":{"title":"Optimizing Rust translation to Coq with THIR and bundled traits","tags":["coq-of-rust","Rust","Coq","trait","THIR","HIR"],"author":"Guillaume Claret"},"unlisted":false,"prevItem":{"title":"Translation of function bodies from Rust to Coq","permalink":"/blog/2023/11/26/rust-function-body"},"nextItem":{"title":"Trait representation in Coq","permalink":"/blog/2023/08/25/trait-representation-in-coq"}},"content":"We continued our work on [coq-of-rust](https://github.com/formal-land/coq-of-rust), a tool to formally verify [Rust](https://www.rust-lang.org/) programs using the proof system [Coq \ud83d\udc13](https://coq.inria.fr/). This tool translates Rust programs to an equivalent Coq program, which can then be verified using Coq\'s proof assistant. It opens the door to building mathematically proven bug-free Rust programs.\\n\\nWe present two main improvements we made to `coq-of-rust`:\\n\\n- Using the THIR intermediate language of Rust to have more information during the translation to Coq.\\n- Bundling the type-classes representing the traits of Rust to have faster type-checking in Coq.\\n\\n\x3c!-- truncate --\x3e\\n\\n![Rust and Coq](2023-11-08/rust_and_coq.png)\\n\\n## THIR intermediate language\\n\\nTo translate Rust programs to Coq, we plug into the compiler of Rust, which operates on a series of intermediate languages:\\n\\n- source code (`.rs` files);\\n- abstract syntax tree (AST): immediately after parsing;\\n- [High-Level Intermediate Representation](https://rustc-dev-guide.rust-lang.org/hir.html) (HIR): after macro expansion, with name resolution and close to the AST;\\n- [Typed High-Level Intermediate Representation](https://rustc-dev-guide.rust-lang.org/thir.html) (THIR): after the type-checking;\\n- [Mid-level Intermediate Representation](https://rustc-dev-guide.rust-lang.org/mir/index.html) (MIR): low-level representation based on a [control-flow graph](https://en.wikipedia.org/wiki/Control-flow_graph), inlining traits and polymorphic functions, and with [borrow checking](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html);\\n- machine code (assembly, LLVM IR, ...).\\n\\nWe were previously using the HIR language to start our translation to Coq, because it is not too low-level and close to what the user has originally in the `.rs` file. This helps relate the generated Coq code to the original Rust code.\\n\\nHowever, at the level of HIR, there is still a lot of implicit information. For example, Rust has [automatic dereferencing rules](https://users.rust-lang.org/t/automatic-dereferencing/53828) that are not yet explicit in HIR. In order not to make any mistakes during our translation to Coq, we prefer to use the next representation, THIR, that makes explicit such rules.\\n\\nIn addition, the THIR representation shows when a method call is from a trait (and which trait) or from a standalone `impl` block. Given that we still have trouble translating the traits with [type-classes](https://coq.inria.fr/doc/V8.18.0/refman/addendum/type-classes.html) that are inferrable by Coq, this helps a lot.\\n\\nA downside of the THIR representation is that it is much more verbose. For example, here is a formatting function generated from HIR:\\n\\n```coq\\nDefinition fmt\\n `{\u210b : State.Trait}\\n (self : ref Self)\\n (f : mut_ref core.fmt.Formatter)\\n : M core.fmt.Result :=\\n let* \u03b10 := format_argument::[\\"new_display\\"] (addr_of self.[\\"radius\\"]) in\\n let* \u03b11 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"Circle of radius \\" ])\\n (addr_of [ \u03b10 ]) in\\n f.[\\"write_fmt\\"] \u03b11.\\n```\\n\\nThis is the kind of functions generated by the `#[derive(Debug)]` macro of Rust, to implement a formatting function on a type. Here is the version translated from THIR, with explicit borrowing and dereferencing:\\n\\n```coq\\nDefinition fmt\\n `{\u210b : State.Trait}\\n (self : ref Self)\\n (f : mut_ref core.fmt.Formatter)\\n : M ltac:(core.fmt.Result) :=\\n let* \u03b10 := deref f core.fmt.Formatter in\\n let* \u03b11 := borrow_mut \u03b10 core.fmt.Formatter in\\n let* \u03b12 := borrow [ mk_str \\"Circle of radius \\" ] (list (ref str)) in\\n let* \u03b13 := deref \u03b12 (list (ref str)) in\\n let* \u03b14 := borrow \u03b13 (list (ref str)) in\\n let* \u03b15 := pointer_coercion \\"Unsize\\" \u03b14 in\\n let* \u03b16 := deref self converting_to_string.Circle in\\n let* \u03b17 := \u03b16.[\\"radius\\"] in\\n let* \u03b18 := borrow \u03b17 i32 in\\n let* \u03b19 := deref \u03b18 i32 in\\n let* \u03b110 := borrow \u03b19 i32 in\\n let* \u03b111 := core.fmt.rt.Argument::[\\"new_display\\"] \u03b110 in\\n let* \u03b112 := borrow [ \u03b111 ] (list core.fmt.rt.Argument) in\\n let* \u03b113 := deref \u03b112 (list core.fmt.rt.Argument) in\\n let* \u03b114 := borrow \u03b113 (list core.fmt.rt.Argument) in\\n let* \u03b115 := pointer_coercion \\"Unsize\\" \u03b114 in\\n let* \u03b116 := core.fmt.Arguments::[\\"new_v1\\"] \u03b15 \u03b115 in\\n core.fmt.Formatter::[\\"write_fmt\\"] \u03b11 \u03b116.\\n```\\n\\nWe went from a function having two intermediate variables to seventeen intermediate variables. This code is much more verbose, but it is also more explicit. In particular, it details when the:\\n\\n- borrowing (going from a value of type `T` to `&T`), and the\\n- dereferencing (going from a value of type `&T` to `T`)\\n\\noccur. It also shows that the method `write_fmt` is a method from the implementation of the type `core.fmt.Formatter`, generating:\\n\\n```coq\\ncore.fmt.Formatter::[\\"write_fmt\\"] \u03b11 \u03b116\\n```\\n\\ninstead of:\\n\\n```coq\\nf.[\\"write_fmt\\"] \u03b11\\n```\\n\\n## Bundled traits\\n\\nSome Rust codebases can have a lot of traits. For example in [paritytech/ink/crates/env/src/types.rs](https://github.com/paritytech/ink/blob/ccb38d2c3ac27523fe3108f2bb7bffbbe908cdb7/crates/env/src/types.rs#L120) the trait `Environment` references more than forty other traits:\\n\\n```rust\\npub trait Environment: Clone {\\n const MAX_EVENT_TOPICS: usize;\\n\\n type AccountId: \'static\\n + scale::Codec\\n + CodecAsType\\n + Clone\\n + PartialEq\\n + ...;\\n\\n type Balance: \'static\\n + scale::Codec\\n + CodecAsType\\n + ...;\\n\\n ...\\n```\\n\\nWe first used an unbundled approach to represent this trait by a type-class in Coq, as it felt more natural:\\n\\n```coq\\nModule Environment.\\n Class Trait (Self : Set) `{Clone.Trait Self}\\n {AccountId : Set}\\n `{scale.Codec.Trait AccountId}\\n `{CodecAsType AccountId}\\n `{Clone AccountId}\\n `{PartialEq AccountId}\\n ...\\n```\\n\\nHowever, the backquote operator generated too many implicit arguments, and the type-checker of Coq was very slow. We then switched to a bundled approach, as advocated in this blog post: [Exponential blowup when using unbundled typeclasses to model algebraic hierarchies](https://www.ralfj.de/blog/2019/05/15/typeclasses-exponential-blowup.html). The Coq code for this trait now looks like this:\\n\\n```coq\\nModule Environment.\\n Class Trait `{\u210b : State.Trait} (Self : Set) : Type := {\\n \u210b_0 :: Clone.Trait Self;\\n MAX_EVENT_TOPICS : usize;\\n AccountId : Set;\\n \u2112_0 :: parity_scale_codec.codec.Codec.Trait AccountId;\\n \u2112_1 :: ink_env.types.CodecAsType.Trait AccountId;\\n \u2112_2 :: core.clone.Clone.Trait AccountId;\\n \u2112_3 ::\\n core.cmp.PartialEq.Trait AccountId\\n (Rhs := core.cmp.PartialEq.Default.Rhs AccountId);\\n ...;\\n Balance : Set;\\n \u2112_8 :: parity_scale_codec.codec.Codec.Trait Balance;\\n \u2112_9 :: ink_env.types.CodecAsType.Trait Balance;\\n ...;\\n\\n ...\\n```\\n\\nWe use the notation `::` for fields that are trait instances. With this approach, traits have types as parameters but no other traits.\\n\\nThe type-checking is now much faster, and in particular, we avoid some cases with exponential blowup or non-terminating type-checking. But this is not a perfect solution as we still have cases where the instance inference does not terminate or fails with hard-to-understand error messages.\\n\\n## Conclusion\\n\\nWe have illustrated here some improvements we recently made to our [coq-of-rust](https://github.com/formal-land/coq-of-rust) translator for two key areas:\\n\\n- the translation of traits;\\n- the translation of the implicit borrowing and dereferencing, that can occur every time we call a function.\\n\\nThese improvements will allow us to formally verify some more complex Rust codebases. In particular, we are applying `coq-of-rust` to verify smart contracts written for the [ink!](https://use.ink/) platform, that is a subset of Rust.\\n\\n:::tip Contact\\n\\nIf you have comments, similar experiences to share, or wish to formally verify your codebase to improve the security of your application, contact us at [contact@formal.land](mailto:contact@formal.land)!\\n\\n:::"},{"id":"/2023/08/25/trait-representation-in-coq","metadata":{"permalink":"/blog/2023/08/25/trait-representation-in-coq","source":"@site/blog/2023-08-25-trait-representation-in-coq.md","title":"Trait representation in Coq","description":"In our project coq-of-rust we translate programs written in Rust to equivalent programs in the language of the proof system Coq \ud83d\udc13, which will later allow us to formally verify them.","date":"2023-08-25T00:00:00.000Z","formattedDate":"August 25, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"trait","permalink":"/blog/tags/trait"}],"readingTime":7.58,"hasTruncateMarker":true,"authors":[{"name":"Bart\u0142omiej Kr\xf3likowski"}],"frontMatter":{"title":"Trait representation in Coq","tags":["coq-of-rust","Rust","Coq","trait"],"author":"Bart\u0142omiej Kr\xf3likowski"},"unlisted":false,"prevItem":{"title":"Optimizing Rust translation to Coq with THIR and bundled traits","permalink":"/blog/2023/11/08/rust-thir-and-bundled-traits"},"nextItem":{"title":"Monad for side effects in Rust","permalink":"/blog/2023/05/28/monad-for-side-effects-in-rust"}},"content":"In our project [coq-of-rust](https://github.com/formal-land/coq-of-rust) we translate programs written in [Rust](https://www.rust-lang.org/) to equivalent programs in the language of the proof system [Coq \ud83d\udc13](https://coq.inria.fr/), which will later allow us to formally verify them.\\nBoth Coq and Rust have many unique features, and there are many differences between them, so in the process of translation we need to treat the case of each language construction separately.\\nIn this post, we discuss how we translate the most complicated one: [traits](https://doc.rust-lang.org/book/ch10-02-traits.html).\\n\\n\x3c!-- truncate --\x3e\\n\\n## \ud83e\udd80 Traits in Rust\\n\\nTrait is the way to define a shared behaviour for a group of types in Rust.\\nTo define a trait we have to specify a list of signatures of the methods we want to be implemented for the types implementing our trait.\\nWe can also create a generic definition of a trait with the same syntax as in every Rust definition.\\nOptionally, we can add a default implementation to any method or extend the list with associated types.\\nTraits can also extend a behaviour of one or more other traits, in which case, to implement a trait for a type we would have to implement all its supertraits first.\\n\\nConsider the following example (adapted from the [Rust Book](https://doc.rust-lang.org/book/)):\\n\\n```rust\\nstruct Sheep {\\n naked: bool,\\n name: &\'static str,\\n}\\n\\ntrait Animal {\\n // Associated function signature; `Self` refers to the implementor type.\\n fn new(name: &\'static str) -> Self;\\n\\n // Method signatures; these will return a string.\\n fn name(&self) -> &\'static str;\\n fn noise(&self) -> &\'static str;\\n\\n // Traits can provide default method definitions.\\n fn talk(&self) {\\n println!(\\"{} says {}\\", self.name(), self.noise());\\n }\\n}\\n\\nimpl Sheep {\\n fn is_naked(&self) -> bool {\\n self.naked\\n }\\n}\\n\\n// Implement the `Animal` trait for `Sheep`.\\nimpl Animal for Sheep {\\n // `Self` is the implementor type: `Sheep`.\\n fn new(name: &\'static str) -> Sheep {\\n Sheep {\\n name: name,\\n naked: false,\\n }\\n }\\n\\n fn name(&self) -> &\'static str {\\n self.name\\n }\\n\\n fn noise(&self) -> &\'static str {\\n if self.is_naked() {\\n \\"baaaaah?\\"\\n } else {\\n \\"baaaaah!\\"\\n }\\n }\\n\\n // Default trait methods can be overridden.\\n fn talk(&self) {\\n // For example, we can add some quiet contemplation.\\n println!(\\"{} pauses briefly... {}\\", self.name, self.noise());\\n }\\n}\\n\\nimpl Sheep {\\n fn shear(&mut self) {\\n if self.is_naked() {\\n // Implementor methods can use the implementor\'s trait methods.\\n println!(\\"{} is already naked...\\", self.name());\\n } else {\\n println!(\\"{} gets a haircut!\\", self.name);\\n\\n self.naked = true;\\n }\\n }\\n}\\n\\nfn main() {\\n // Type annotation is necessary in this case.\\n let mut dolly = Animal::new(\\"Dolly\\"): Sheep;\\n\\n dolly.talk();\\n dolly.shear();\\n dolly.talk();\\n}\\n```\\n\\nWe have a type `Sheep`, a trait `Animal`, and an implementation of `Animal` for `Sheep`.\\nAs we can see in `main`, after a trait is implemented for a type, we can use the methods of the trait like normal methods of the type.\\n\\n## Our translation\\n\\nRust notion of trait is very similar to the concept of [typeclasses](https://en.wikipedia.org/wiki/Type_class) in [functional programming](https://en.wikipedia.org/wiki/Functional_programming).\\nTypeclasses are also present in Coq, so translation of this construction is quite straightforward.\\n\\nFor a given trait we create a typeclass with fields being just translated signatures of the methods of the trait.\\nTo allow for the use of method syntax, we also define instances of `Notation.Dot` for every method name of the trait.\\nWe also add a parameter of type `Set` for every type parameter of the trait and translate trait bounds of the types into equivalent typeclass parameters.\\n\\n## Translation of associated types\\n\\nAssociated types are a bit harder than methods to translate, because it is possible to use `::` notation to access them.\\nFor that purpose, we created another typeclass in `Notation` module:\\n\\n```coq\\nClass DoubleColonType {Kind : Type} (type : Kind) (name : string) : Type := {\\n double_colon_type : Set;\\n}.\\n```\\n\\nwith a notation:\\n\\n```coq\\nNotation \\"e1 ::type[ e2 ]\\" := (Notation.double_colon_type e1 e2)\\n (at level 0).\\n```\\n\\nFor every associated type, we create a parameter and a field of the typeclass resulting from the trait translation, and below, we create an instance of `Notation.DoubleColonType`.\\n\\n## The example in Coq\\n\\nHere is our Coq translation of the example code above:\\n\\n```coq\\n(* Generated by coq-of-rust *)\\nRequire Import CoqOfRust.CoqOfRust.\\n\\nModule Sheep.\\n Unset Primitive Projections.\\n Record t : Set := {\\n naked : bool;\\n name : ref str;\\n }.\\n Global Set Primitive Projections.\\n\\n Global Instance Get_naked : Notation.Dot \\"naked\\" := {\\n Notation.dot \'(Build_t x0 _) := x0;\\n }.\\n Global Instance Get_name : Notation.Dot \\"name\\" := {\\n Notation.dot \'(Build_t _ x1) := x1;\\n }.\\nEnd Sheep.\\nDefinition Sheep : Set := @Sheep.t.\\n\\nModule Animal.\\n Class Trait (Self : Set) : Set := {\\n new `{H : State.Trait} : (ref str) -> (M (H := H) Self);\\n name `{H : State.Trait} : (ref Self) -> (M (H := H) (ref str));\\n noise `{H : State.Trait} : (ref Self) -> (M (H := H) (ref str));\\n }.\\n\\n Global Instance Method_new `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"new\\" := {\\n Notation.dot := new;\\n }.\\n Global Instance Method_name `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"name\\" := {\\n Notation.dot := name;\\n }.\\n Global Instance Method_noise `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"noise\\" := {\\n Notation.dot := noise;\\n }.\\n Global Instance Method_talk `{H : State.Trait} `(Trait)\\n : Notation.Dot \\"talk\\" := {\\n Notation.dot (self : ref Self):=\\n (let* _ :=\\n let* _ :=\\n let* \u03b10 := self.[\\"name\\"] in\\n let* \u03b11 := format_argument::[\\"new_display\\"] (addr_of \u03b10) in\\n let* \u03b12 := self.[\\"noise\\"] in\\n let* \u03b13 := format_argument::[\\"new_display\\"] (addr_of \u03b12) in\\n let* \u03b14 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" says \\"; \\"\\n\\" ])\\n (addr_of [ \u03b11; \u03b13 ]) in\\n std.io.stdio._print \u03b14 in\\n Pure tt in\\n Pure tt\\n : M (H := H) unit);\\n }.\\nEnd Animal.\\n\\nModule Impl_traits_Sheep.\\n Definition Self := traits.Sheep.\\n\\n Definition is_naked `{H : State.Trait} (self : ref Self) : M (H := H) bool :=\\n Pure self.[\\"naked\\"].\\n\\n Global Instance Method_is_naked `{H : State.Trait} :\\n Notation.Dot \\"is_naked\\" := {\\n Notation.dot := is_naked;\\n }.\\nEnd Impl_traits_Sheep.\\n\\nModule Impl_traits_Animal_for_traits_Sheep.\\n Definition Self := traits.Sheep.\\n\\n Definition new\\n `{H : State.Trait}\\n (name : ref str)\\n : M (H := H) traits.Sheep :=\\n Pure {| traits.Sheep.name := name; traits.Sheep.naked := false; |}.\\n\\n Global Instance AssociatedFunction_new `{H : State.Trait} :\\n Notation.DoubleColon Self \\"new\\" := {\\n Notation.double_colon := new;\\n }.\\n\\n Definition name `{H : State.Trait} (self : ref Self) : M (H := H) (ref str) :=\\n Pure self.[\\"name\\"].\\n\\n Global Instance Method_name `{H : State.Trait} : Notation.Dot \\"name\\" := {\\n Notation.dot := name;\\n }.\\n\\n Definition noise\\n `{H : State.Trait}\\n (self : ref Self)\\n : M (H := H) (ref str) :=\\n let* \u03b10 := self.[\\"is_naked\\"] in\\n if (\u03b10 : bool) then\\n Pure \\"baaaaah?\\"\\n else\\n Pure \\"baaaaah!\\".\\n\\n Global Instance Method_noise `{H : State.Trait} : Notation.Dot \\"noise\\" := {\\n Notation.dot := noise;\\n }.\\n\\n Definition talk `{H : State.Trait} (self : ref Self) : M (H := H) unit :=\\n let* _ :=\\n let* _ :=\\n let* \u03b10 := format_argument::[\\"new_display\\"] (addr_of self.[\\"name\\"]) in\\n let* \u03b11 := self.[\\"noise\\"] in\\n let* \u03b12 := format_argument::[\\"new_display\\"] (addr_of \u03b11) in\\n let* \u03b13 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" pauses briefly... \\"; \\"\\n\\" ])\\n (addr_of [ \u03b10; \u03b12 ]) in\\n std.io.stdio._print \u03b13 in\\n Pure tt in\\n Pure tt.\\n\\n Global Instance Method_talk `{H : State.Trait} : Notation.Dot \\"talk\\" := {\\n Notation.dot := talk;\\n }.\\n\\n Global Instance I : traits.Animal.Trait Self := {\\n traits.Animal.new `{H : State.Trait} := new;\\n traits.Animal.name `{H : State.Trait} := name;\\n traits.Animal.noise `{H : State.Trait} := noise;\\n }.\\nEnd Impl_traits_Animal_for_traits_Sheep.\\n\\nModule Impl_traits_Sheep_3.\\n Definition Self := traits.Sheep.\\n\\n Definition shear `{H : State.Trait} (self : mut_ref Self) : M (H := H) unit :=\\n let* \u03b10 := self.[\\"is_naked\\"] in\\n if (\u03b10 : bool) then\\n let* _ :=\\n let* _ :=\\n let* \u03b10 := self.[\\"name\\"] in\\n let* \u03b11 := format_argument::[\\"new_display\\"] (addr_of \u03b10) in\\n let* \u03b12 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" is already naked...\\n\\" ])\\n (addr_of [ \u03b11 ]) in\\n std.io.stdio._print \u03b12 in\\n Pure tt in\\n Pure tt\\n else\\n let* _ :=\\n let* _ :=\\n let* \u03b10 := format_argument::[\\"new_display\\"] (addr_of self.[\\"name\\"]) in\\n let* \u03b11 :=\\n format_arguments::[\\"new_v1\\"]\\n (addr_of [ \\"\\"; \\" gets a haircut!\\n\\" ])\\n (addr_of [ \u03b10 ]) in\\n std.io.stdio._print \u03b11 in\\n Pure tt in\\n let* _ := assign self.[\\"naked\\"] true in\\n Pure tt.\\n\\n Global Instance Method_shear `{H : State.Trait} : Notation.Dot \\"shear\\" := {\\n Notation.dot := shear;\\n }.\\nEnd Impl_traits_Sheep_3.\\n\\n(* #[allow(dead_code)] - function was ignored by the compiler *)\\nDefinition main `{H : State.Trait} : M (H := H) unit :=\\n let* dolly :=\\n let* \u03b10 := traits.Animal.new \\"Dolly\\" in\\n Pure (\u03b10 : traits.Sheep) in\\n let* _ := dolly.[\\"talk\\"] in\\n let* _ := dolly.[\\"shear\\"] in\\n let* _ := dolly.[\\"talk\\"] in\\n Pure tt.\\n```\\n\\nAs we can see, the trait `Animal` is translated to a module `Animal`. Every time we want to refer to the trait we use the name `Trait` or `Animal.Trait`, depending on whether we do it inside or outside its module.\\n\\n## Conclusion\\n\\nTraits are similar enough to Coq classes to make the translation relatively intuitive.\\nThe only hard case is a translation of associated types, for which we need a special notation.\\n\\n:::tip Contact\\n\\nIf you have a Rust codebase that you wish to formally verify, or need advice in your work, contact us at [contact@formal.land](mailto:contact@formal.land). We will be happy to set up a call with you.\\n\\n:::"},{"id":"/2023/05/28/monad-for-side-effects-in-rust","metadata":{"permalink":"/blog/2023/05/28/monad-for-side-effects-in-rust","source":"@site/blog/2023-05-28-monad-for-side-effects-in-rust.md","title":"Monad for side effects in Rust","description":"To formally verify Rust programs, we are building coq-of-rust, a translator from Rust \ud83e\udd80 code to the proof system Coq \ud83d\udc13. We generate Coq code that is as similar as possible to the original Rust code, so that the user can easily understand the generated code and write proofs about it. In this blog post, we explain how we are representing side effects in Coq.","date":"2023-05-28T00:00:00.000Z","formattedDate":"May 28, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"monad","permalink":"/blog/tags/monad"},{"label":"side effects","permalink":"/blog/tags/side-effects"}],"readingTime":5.03,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Monad for side effects in Rust","tags":["coq-of-rust","Rust","Coq","monad","side effects"]},"unlisted":false,"prevItem":{"title":"Trait representation in Coq","permalink":"/blog/2023/08/25/trait-representation-in-coq"},"nextItem":{"title":"Representation of Rust methods in Coq","permalink":"/blog/2023/04/26/representation-of-rust-methods-in-coq"}},"content":"To formally verify Rust programs, we are building [coq-of-rust](https://github.com/formal-land/coq-of-rust), a translator from Rust \ud83e\udd80 code to the proof system [Coq \ud83d\udc13](https://coq.inria.fr/). We generate Coq code that is as similar as possible to the original Rust code, so that the user can easily understand the generated code and write proofs about it. In this blog post, we explain how we are representing side effects in Coq.\\n\\n\x3c!-- truncate --\x3e\\n\\n## \ud83e\udd80 Side effects in Rust\\n\\nIn programming, [side effects]() are all what is not representable by pure functions (mathematical functions, functions that always return the same output for given input parameters). In Rust there are various kinds of side effects:\\n\\n- errors (the [panic!](https://doc.rust-lang.org/core/macro.panic.html) macro) that propagate and do appear in the return type of functions,\\n- non-termination, with some potentially non-terminating loops (never returning a result is considered as a side-effect),\\n- control-flow, with the `break`, `continue`, `return` keywords, that can jump to a different part of the code,\\n- memory allocations and memory mutations,\\n- I/O, with for example the [println!](https://doc.rust-lang.org/std/macro.println.html) macro, that prints a message to the standard output,\\n- concurrency, with the [thread::spawn](https://doc.rust-lang.org/std/thread/fn.spawn.html) function, that creates a new thread.\\n\\n## \ud83d\udc13 Coq, a purely functional language\\n\\nLike most proof systems, Coq is a purely functional language. This means we need to find an encoding for the side effects. The reason for most proof systems to forbid side effects is to be logically consistent. Otherwise, it would be easy to write a proof of `False` by writing a term that does not terminate for example.\\n\\n## \ud83d\udd2e Monads in Coq\\n\\nMonads are a common way to represent side effects in a functional language. A monad is a type constructor `M`:\\n\\n```coq\\nDefinition M (A : Set) : Set :=\\n ...\\n```\\n\\nrepresenting computations returning values of type `A`. As an example we can take the error monad of computations that can fail with an error message, using the [Result](https://doc.rust-lang.org/std/result/enum.Result.html) type like in Rust:\\n\\n```coq\\nDefinition M (A : Set) : Set :=\\n Result A string.\\n```\\n\\nIt must have two operators, `Pure` and `Bind`.\\n\\n### The `Pure` operator\\n\\nThe `Pure` operator has type:\\n\\n```coq\\nDefinition Pure {A : Set} (v : A) : M A :=\\n ...\\n```\\n\\nIt lifts a pure value `v` into the monad. For our error monad, the `Pure` operator is:\\n\\n```coq\\nDefinition Pure {A : Set} (v : A) : M A :=\\n Ok v.\\n```\\n\\n### The `Bind` operator\\n\\nThe `Bind` operator has type:\\n\\n```coq\\nDefinition Bind {A B : Set} (e1 : M A) (f : A -> M B) : M B :=\\n ...\\n```\\n\\nIt sequences two computations `e1` with `f`, where `f` is a function that takes the result of `e1` as input and returns a new computation. We also note the `Bind` operator:\\n\\n```coq\\nlet* x := e1 in\\ne2\\n```\\n\\nassuming that `f` is a function that takes `x` as input and returns `e2`. Requiring this operator for all monads shows that sequencing computations is a very fundamental operation for side effects.\\n\\nFor our error monad, the `Bind` operator is:\\n\\n```coq\\nDefinition Bind {A B : Set} (e1 : M A) (f : A -> M B) : M B :=\\n match e1 with\\n | Ok v => f v\\n | Err msg => Err msg\\n end.\\n```\\n\\n## \ud83d\udea7 State, exceptions, non-termination, control-flow\\n\\nWe use a single monad to represent all the side effects that interest us in Rust. This monad is called `M` and is defined as follows:\\n\\n```coq\\nDefinition RawMonad `{State.Trait} :=\\n ...\\n\\nModule Exception.\\n Inductive t (R : Set) : Set :=\\n | Return : R -> t R\\n | Continue : t R\\n | Break : t R\\n | Panic {A : Set} : A -> t R.\\n Arguments Return {_}.\\n Arguments Continue {_}.\\n Arguments Break {_}.\\n Arguments Panic {_ _}.\\nEnd Exception.\\nDefinition Exception := Exception.t.\\n\\nDefinition Monad `{State.Trait} (R A : Set) : Set :=\\n nat -> State -> RawMonad ((A + Exception R) * State).\\n\\nDefinition M `{State.Trait} (A : Set) : Set :=\\n Monad Empty_set A.\\n```\\n\\nWe assume the definition of some `RawMonad` for memory handling that we will describe in a later post. Our monad `M` is a particular case of the monad `Monad` with `R = Empty_set`. It is a combination four monads:\\n\\n1. The `RawMonad`.\\n2. A state monad, that takes a `State` as input and a return an updated state as output. The trait `State.Trait` provides read/write operations on the `State` type.\\n3. An error monad with errors of type `Exception R`. There errors include the `Return`, `Continue`, `Break` and `Panic` constructors. The `Return` constructor is used to return a value from a function. The `Continue` constructor is used to continue the execution of a loop. The `Break` constructor is used to break the execution of a loop. The `Panic` constructor is used to panic with an error message. We implement all these operations as exceptions, even if only `Panic` is really an error, as they behave in the same way: interrupting the execution of the current sub-expression to bubble up to a certain level.\\n4. A fuel monad for non-termination, with the additional `nat` parameter.\\n\\nThe parameter `R` of the type constructor `Monad` is used to represent the type of values that can be returned in the body of a function. It is the same as the return type of the function. So for a function returning a value of type `A`, we define its body in `Monad A A`. Then, we wrap it in an operator:\\n\\n```coq\\nDefinition catch_return {A : Set} (e : Monad A A) : M A :=\\n ...\\n```\\n\\nthat catches the `Return` exceptions and returns the value.\\n\\n## Conclusion\\n\\nWe will see in the next post how we define the `RawMonad` to handle the Rust state of a program and memory allocation.\\n\\n:::tip Contact\\n\\nIf you have a Rust codebase that you wish to formally verify, or need advice in your work, contact us at [contact@formal.land](mailto:contact@formal.land). We will be happy to set up a call with you.\\n\\n:::"},{"id":"/2023/04/26/representation-of-rust-methods-in-coq","metadata":{"permalink":"/blog/2023/04/26/representation-of-rust-methods-in-coq","source":"@site/blog/2023-04-26-representation-of-rust-methods-in-coq.md","title":"Representation of Rust methods in Coq","description":"With our project coq-of-rust we aim to translate high-level Rust code to similar-looking Coq code, to formally verify Rust programs. One of the important constructs in the Rust language is the method syntax. In this post, we present our technique to translate Rust methods using type-classes in Coq.","date":"2023-04-26T00:00:00.000Z","formattedDate":"April 26, 2023","tags":[{"label":"coq-of-rust","permalink":"/blog/tags/coq-of-rust"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"Coq","permalink":"/blog/tags/coq"}],"readingTime":4.57,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Representation of Rust methods in Coq","tags":["coq-of-rust","Rust","Coq"]},"unlisted":false,"prevItem":{"title":"Monad for side effects in Rust","permalink":"/blog/2023/05/28/monad-for-side-effects-in-rust"},"nextItem":{"title":"Current formal verification efforts \ud83d\udcaa","permalink":"/blog/2023/01/24/current-verification-efforts"}},"content":"With our project [coq-of-rust](https://github.com/formal-land/coq-of-rust) we aim to translate high-level Rust code to similar-looking [Coq](https://coq.inria.fr/) code, to [formally verify](https://en.wikipedia.org/wiki/Formal_verification) Rust programs. One of the important constructs in the Rust language is the [method syntax](https://doc.rust-lang.org/book/ch05-03-method-syntax.html). In this post, we present our technique to translate Rust methods using type-classes in Coq.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Rust Code To Translate\\n\\nConsider the following Rust example, which contains a method (adapted from the [Rust Book](https://doc.rust-lang.org/book/)):\\n\\n```rust\\nstruct Rectangle {\\n width: u32,\\n height: u32,\\n}\\n\\nimpl Rectangle {\\n // Here \\"area\\" is a method\\n fn area(&self) -> u32 {\\n self.width * self.height\\n }\\n}\\n\\nfn main() {\\n let rect1 = Rectangle {\\n width: 30,\\n height: 50,\\n };\\n\\n println!(\\n \\"The area of the rectangle is {} square pixels.\\",\\n // We are calling this method there\\n rect1.area()\\n );\\n}\\n```\\n\\nThe Rust compiler can find the implementation of the `.area()` method call because it knows that the type of `rect1` is `Rectangle`. There could be other `area` methods defined for different types, and the code would still compile calling the `area` method of `Rectangle`.\\n\\nCoq has no direct equivalent for calling a function based on its name and type.\\n\\n## Our Translation\\n\\nHere is our Coq translation of the code above:\\n\\n```coq\\n 1: (* Generated by coq-of-rust *)\\n 2: Require Import CoqOfRust.CoqOfRust.\\n 3:\\n 4: Import Root.std.prelude.rust_2015.\\n 5:\\n 6: Module Rectangle.\\n 7: Record t : Set := {\\n 8: width : u32;\\n 9: height : u32;\\n10: }.\\n11:\\n12: Global Instance Get_width : Notation.Dot \\"width\\" := {\\n13: Notation.dot \'(Build_t x0 _) := x0;\\n14: }.\\n15: Global Instance Get_height : Notation.Dot \\"height\\" := {\\n16: Notation.dot \'(Build_t _ x1) := x1;\\n17: }.\\n18: End Rectangle.\\n19: Definition Rectangle : Set := Rectangle.t.\\n20:\\n21: Module ImplRectangle.\\n22: Definition Self := Rectangle.\\n23:\\n24: Definition area (self : ref Self) : u32 :=\\n25: self.[\\"width\\"].[\\"mul\\"] self.[\\"height\\"].\\n26:\\n27: Global Instance Method_area : Notation.Dot \\"area\\" := {\\n28: Notation.dot := area;\\n29: }.\\n30: End ImplRectangle.\\n31:\\n32: Definition main (_ : unit) : unit :=\\n33: let rect1 := {| Rectangle.width := 30; Rectangle.height := 50; |} in\\n34: _crate.io._print\\n35: (_crate.fmt.Arguments::[\\"new_v1\\"]\\n36: [ \\"The area of the rectangle is \\"; \\" square pixels.\\\\n\\" ]\\n37: [ _crate.fmt.ArgumentV1::[\\"new_display\\"] rect1.[\\"area\\"] ]) ;;\\n38: tt ;;\\n39: tt.\\n```\\n\\nOn line `24` we define the `area` function. On line `27` we declare that `area` is a method. On line `37` we call the `area` method on `rect1` with:\\n\\n```coq\\nrect1.[\\"area\\"]\\n```\\n\\nwhich closely resembles the source Rust code:\\n\\n```rust\\nrect1.area()\\n```\\n\\nCoq can automatically find the code of the `area` method to call.\\n\\n## How It Works\\n\\nThe code:\\n\\n```coq\\nrect1.[\\"area\\"]\\n```\\n\\nis actually a notation for:\\n\\n```coq\\nNotation.dot \\"area\\" rect1\\n```\\n\\nThen we leverage the inference mechanism of type-classes in Coq to find the code of the `area` method:\\n\\n```coq\\nModule Notation.\\n (** A class to represent the notation [e1.e2]. This is mainly used to call\\n methods, or access to named or indexed fields of structures.\\n The kind is either a string or an integer. *)\\n Class Dot {Kind : Set} (name : Kind) {T : Set} : Set := {\\n dot : T;\\n }.\\n Arguments dot {Kind} name {T Dot}.\\nEnd Notation.\\n```\\n\\nThe `Dot` class has three parameters: `Kind`, `name`, and `T`. `Kind` is the type of the name of the method (generally a string but it could be an integer in rare cases), `name` is the name of the method, and `T` is the type of the method. The `dot` field of the class is the code of the method.\\n\\nWhen we define the class instance:\\n\\n```coq\\n27: Global Instance Method_area : Notation.Dot \\"area\\" := {\\n28: Notation.dot := area;\\n29: }.\\n```\\n\\nwe instantiate the class `Notation.Dot` with three parameters:\\n\\n- `Kind` (inferred) is `string` because the name of the method is a string,\\n- `name` is `\\"area\\"` because the name of the method is `area`,\\n- `T` (inferred) is `ref Rectangle -> u32` because the method is declared as `fn area(&self) -> u32`.\\n\\nThen we define the `dot` field of the class instance to be the `area` function.\\n\\nWhen we call:\\n\\n```coq\\nNotation.dot \\"area\\" rect1\\n```\\n\\nCoq will automatically find the class instance `Method_area` because the type of `rect1` is `Rectangle` and the name of the method is `\\"area\\"`.\\n\\n## Other Use Cases\\n\\nThe `Dot` class is also used to access to named or indexed fields of structures or traits. We use a similar mechanism for associated functions. For example, the Rust code:\\n\\n```rust\\nlet rect1 = Rectangle::square(3);\\n```\\n\\nis translated to:\\n\\n```coq\\nlet rect1 := Rectangle::[\\"square\\"] 3 in\\n```\\n\\nwith a type-class for the `type::[name]` notation as follows:\\n\\n```coq\\nModule Notation.\\n (** A class to represent associated functions (the notation [e1::e2]). The\\n kind might be [Set] for functions associated to a type,\\n or [Set -> Set] for functions associated to a trait. *)\\n Class DoubleColon {Kind : Type} (type : Kind) (name : string) {T : Set} :\\n Set := {\\n double_colon : T;\\n }.\\n Arguments double_colon {Kind} type name {T DoubleColon}.\\nEnd Notation.\\n```\\n\\n## In Conclusion\\n\\nThe type-classes mechanism of Coq appears flexible enough to represent our current use cases involving methods and associated functions. It remains to be seen whether this approach will suffice for future use cases.\\n\\n:::tip Contact\\n\\nIf you have a Rust codebase that you wish to formally verify, or need advice in your work, contact us at [contact@formal.land](mailto:contact@formal.land). We will be happy to set up a call with you.\\n\\n:::"},{"id":"/2023/01/24/current-verification-efforts","metadata":{"permalink":"/blog/2023/01/24/current-verification-efforts","source":"@site/blog/2023-01-24-current-verification-efforts.md","title":"Current formal verification efforts \ud83d\udcaa","description":"We are diversifying ourselves to apply formal verification on 3\ufe0f\u20e3 new languages with Solidity, Rust, and TypeScript. In this article we describe our approach. For these three languages, we translate the code to the proof system \ud83d\udc13 Coq. We generate the cleanest \ud83e\uddfc possible output to simplify the formal verification \ud83d\udcd0 effort that comes after.","date":"2023-01-24T00:00:00.000Z","formattedDate":"January 24, 2023","tags":[{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"OCaml","permalink":"/blog/tags/o-caml"},{"label":"Solidity","permalink":"/blog/tags/solidity"},{"label":"Rust","permalink":"/blog/tags/rust"},{"label":"TypeScript","permalink":"/blog/tags/type-script"}],"readingTime":4.89,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Current formal verification efforts \ud83d\udcaa","tags":["coq-of-ocaml","OCaml","Solidity","Rust","TypeScript"]},"unlisted":false,"prevItem":{"title":"Representation of Rust methods in Coq","permalink":"/blog/2023/04/26/representation-of-rust-methods-in-coq"},"nextItem":{"title":"Latest blog posts on our formal verification effort on Tezos","permalink":"/blog/2022/12/13/latest-blog-posts-on-tezos"}},"content":"We are diversifying ourselves to apply [formal verification](https://en.wikipedia.org/wiki/Formal_verification) on 3\ufe0f\u20e3 new languages with **Solidity**, **Rust**, and **TypeScript**. In this article we describe our approach. For these three languages, we translate the code to the proof system [\ud83d\udc13 Coq](https://coq.inria.fr/). We generate the cleanest \ud83e\uddfc possible output to simplify the formal verification \ud83d\udcd0 effort that comes after.\\n\\n> Formal verification is a way to ensure that a program follows its specification in \ud83d\udcaf% of cases thanks to the use of mathematical methods. It removes far more bugs and security issues than testing, and is necessary to deliver software of the highest quality \ud83d\udc8e.\\n\\n\x3c!-- truncate --\x3e\\n\\n## \ud83d\uddfa\ufe0f General plan\\nTo apply formal verification to real-sized applications, we need to handle thousands of lines of code in a seamless way. We rely on the proof system Coq to write our proofs, as it has a mature ecosystem, and automated (SMT) and interactive ways to write proofs. To keep the proofs simple, we must find an efficient way to convert an existing and evolving codebase to Coq.\\n\\nFor example, given the following TypeScript example:\\n```typescript\\nexport function checkIfEnoughCredits(user: User, credits: number): boolean {\\n if (user.isAdmin) {\\n return credits >= 0;\\n }\\n\\n return credits >= 1000;\\n}\\n```\\nwe want to generate the corresponding Coq code in an automated way:\\n```coq\\nDefinition checkIfEnoughCredits (user : User) (credits : number) : bool :=\\n if user.(User.isAdmin) then\\n credits >= 0\\n else\\n credits >= 1000.\\n```\\nThis is the exact equivalent written using the Coq syntax, where we check the `credits` condition depending on the user\'s status. This is the `checkIfEnoughCredits` definition a Coq developer would directly write, in an idiomatic way.\\n\\nWe make some hypothesis on the input code. In TypeScript we assume the code does not contain mutations, which is often the case to simplify asynchronous code. In Rust we have other hypothesis as making safe mutations is one of the keys features of the language and a frequent pattern. For each language we look for a correct subset to work on, to support common use cases and still generate a clean Coq code.\\n\\n## \ud83c\uddf8 Solidity\\n\u27a1\ufe0f [Project page](/docs/verification/solidity) \u2b05\ufe0f\\n\\nThe [Solidity language](https://soliditylang.org/) is the main language to write smart contracts on the [Ethereum](https://ethereum.org/) blockchain. As smart contracts cannot be easily updated and handle a large amount of money, it is critical to formally verify them to prevent bugs.\\n\\nOur strategy is to develop a translator [coq-of-solidity](https://gitlab.com/formal-land/coq-of-solidity) from Solidity to Coq. We are using an implementation of an [ERC-20](https://en.wikipedia.org/wiki/Ethereum#ERC20) smart contract as an example to guide our translation. Two top difficulties in the translation of Solidity programs are:\\n* the use of object-oriented programming with inheritance on classes,\\n* the use of mutations and errors, that need to be handled in a monad.\\n\\nWe are still trying various approach to handle these difficulties and generate a clean Coq output for most cases.\\n\\nIn addition to our work on Solidity, we are looking at the [EVM code](https://ethereum.org/en/developers/docs/evm/) that is the assembly language of Ethereum. It has the advantage of being more stable and with a simpler semantics than Solidity. However, it is not as expressive and programs in EVM are much harder to read. We have a prototype of translator from EVM to Coq named [ethereum-vm-to-coq](https://gitlab.com/formal-land/ethereum-vm-to-coq). An interesting goal will be to connect the translation of Solidity and of EVM in Coq to show that they have the same semantics on a given smart contract.\\n\\nNote that EVM is the target language of many verification project on Ethereum such as [Certora](https://www.certora.com/) or static analyzers. We prefer to target Solidity as it is more expressive and the generated code in Coq will thus be easier to verify.\\n\\n## \ud83e\udd80 Rust\\n\u27a1\ufe0f [Project page](/docs/verification/rust) \u2b05\ufe0f\\n\\nThe [Rust language](https://www.rust-lang.org/) is a modern systems programming language that is gaining popularity. It is a safe language that prevents many common errors such as buffer overflows or use-after-free. It is also a language that is used to write low-level code, such as drivers or operating systems. As such, it is critical to formally verify Rust programs to prevent bugs.\\n\\nWe work in collaboration with the team developing the [Aeneas](https://github.com/AeneasVerif) project, with people from Inria and Microsoft. The aim is to translate Rust code with mutations to a purely functional form in Coq (without mutations) to simplify the verification effort and avoid the need of separation logic. The idea of this translation is explained in the [Aeneas paper](https://dl.acm.org/doi/abs/10.1145/3547647).\\n\\nThere are two steps in the translation:\\n1. **From [MIR](https://rustc-dev-guide.rust-lang.org/mir/index.html) (low-level intermediate form of Rust) to LLBC.** This is a custom language for the project that contains all the information of MIR but is better suited for analysis. For example, instead of using a control-flow graph it uses control structures and an abstract syntax tree. This step is implemented in Rust.\\n2. **From LLBC to Coq.** This is the heart of the project and is implemented in OCaml. This is where the translation from mutations to a purely functional form occurs.\\n\\nFor now we are focusing on adding new features to LLBC and improving the user experience: better error messages, generation of an output with holes for unhandled Rust features.\\n\\n## \ud83c\udf10 TypeScript\\n\u27a1\ufe0f [Project page](/docs/verification/typescript) \u2b05\ufe0f\\n\\nWe have a [\ud83d\udcfd\ufe0f demo project](https://formal-land.github.io/coq-of-js/) to showcase the translation of a purely functional subset of JavaScript to Coq. We handle functions and basic data types such as records, enums and discriminated unions. We are now porting the code to TypeScript in [coq-of-ts](https://github.com/formal-land/coq-of-ts). We prefer to work on TypeScript rather than JavaScript as type information are useful to guide the translation, and avoid the need of additional annotations on the source code.\\n\\nOur next target will be to make `coq-of-ts` usable on real-life project example.\\n\\n:::info Social media\\nFollow us on Twitter at [Twitter](https://twitter.com/LandFoobar) \ud83d\udc26 and [Telegram](https://t.me/formal_land) to get the latest news about our projects. If you think our work is interesting, please share it with your friends and colleagues. \ud83d\ude4f\\n:::"},{"id":"/2022/12/13/latest-blog-posts-on-tezos","metadata":{"permalink":"/blog/2022/12/13/latest-blog-posts-on-tezos","source":"@site/blog/2022-12-13-latest-blog-posts-on-tezos.md","title":"Latest blog posts on our formal verification effort on Tezos","description":"Here we recall some blog articles that we have written since this summer, on the formal verification of the protocol of Tezos. For this project, we are verifying a code base of around 100,000 lines of OCaml code. We automatically convert the OCaml code to the proof system Coq using the converter coq-of-ocaml. We then apply various proof techniques to make sure that the protocol of Tezos does not contain bugs.","date":"2022-12-13T00:00:00.000Z","formattedDate":"December 13, 2022","tags":[{"label":"coq-tezos-of-ocaml","permalink":"/blog/tags/coq-tezos-of-ocaml"},{"label":"Tezos","permalink":"/blog/tags/tezos"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"}],"readingTime":1.755,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Latest blog posts on our formal verification effort on Tezos","tags":["coq-tezos-of-ocaml","Tezos","coq-of-ocaml"]},"unlisted":false,"prevItem":{"title":"Current formal verification efforts \ud83d\udcaa","permalink":"/blog/2023/01/24/current-verification-efforts"},"nextItem":{"title":"Upgrade coq-of-ocaml to OCaml 4.14","permalink":"/blog/2022/06/23/upgrade-coq-of-ocaml-4.14"}},"content":"Here we recall some blog articles that we have written since this summer, on the [formal verification of the protocol of Tezos](https://formal-land.gitlab.io/coq-tezos-of-ocaml/). For this project, we are verifying a code base of around 100,000 lines of OCaml code. We automatically convert the OCaml code to the proof system Coq using the converter [coq-of-ocaml](https://github.com/formal-land/coq-of-ocaml). We then apply various proof techniques to make sure that the protocol of Tezos does not contain bugs.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Blog articles \ud83d\udcdd\\nHere is the list of articles about the work we have done since this summer. We believe that some of this work is very unique and specific to Tezos.\\n\\n* [The error monad, internal errors and validity predicates, step-by-step](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/12/12/internal-errors-step-by-step/) by *Pierre Vial*: a detailed explanation of what we are doing to verify the absence of unexpected errors in the whole code base;\\n* [Absence of internal errors](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/10/18/absence-of-internal-errors/) by *Guillaume Claret*: the current state of our proofs to verify the absence of unexpected errors;\\n* [Skip-list verification. Using inductive predicates](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/10/03/verifying-the-skip-list-inductive-predicates/) by *Bart\u0142omiej Kr\xf3likowski* and *Natalie Klaus*: a presentation of our verification effort on the skip-list algorithm implementation (part 2);\\n* [Verifying the skip-list](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/10/03/verifying-the-skip-list/) by *Natalie Klaus* and *Bart\u0142omiej Kr\xf3likowski*: a presentation of our verification effort on the skip-list algorithm implementation (part 1);\\n* [Verifying json-data-encoding](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/08/15/verify-json-data-encoding/) by *Tait van Strien*: our work to verify an external library used by the Tezos protocol, to safely serialize data to JSON values;\\n* [Fixing reused proofs](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/07/19/fixing-proofs/) by *Bart\u0142omiej Kr\xf3likowski*: a presentation, with examples, of the work we do to maintain existing proofs and specifications as the code evolves;\\n* [Formal verification of property based tests](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/06/07/formal-verification-of-property-based-tests/) by *Guillaume Claret*: the principle and status of our work to formally verify the generalized case of property-based tests;\\n* [Plan for backward compatibility verification](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/06/02/plan-backward-compatibility) by *Guillaume Claret*: an explanation of the strategy we use to show that two successive versions of the Tezos protocol are fully backward compatible.\\n\\nTo follow more of our activity, feel free to register on our [Twitter account \ud83d\udc26](https://twitter.com/LandFoobar)! If you need services or advices to formally verify your code base, you can drop us an [email \ud83d\udce7](mailto:contact@formal.land)!"},{"id":"/2022/06/23/upgrade-coq-of-ocaml-4.14","metadata":{"permalink":"/blog/2022/06/23/upgrade-coq-of-ocaml-4.14","source":"@site/blog/2022-06-23-upgrade-coq-of-ocaml-4.14.md","title":"Upgrade coq-of-ocaml to OCaml 4.14","description":"In an effort to support the latest version of the protocol of Tezos we upgraded coq-of-ocaml to add compatibility with OCaml 4.14. The result is available in the branch ocaml-4.14. We describe here how we made this upgrade.","date":"2022-06-23T00:00:00.000Z","formattedDate":"June 23, 2022","tags":[{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"ocaml","permalink":"/blog/tags/ocaml"},{"label":"4.14","permalink":"/blog/tags/4-14"}],"readingTime":2.195,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Upgrade coq-of-ocaml to OCaml 4.14","tags":["coq-of-ocaml","ocaml","4.14"]},"unlisted":false,"prevItem":{"title":"Latest blog posts on our formal verification effort on Tezos","permalink":"/blog/2022/12/13/latest-blog-posts-on-tezos"},"nextItem":{"title":"Status update on the verification of Tezos","permalink":"/blog/2022/06/15/status update-tezos"}},"content":"In an effort to support the latest version of the [protocol of Tezos](https://gitlab.com/tezos/tezos/-/tree/master/src/proto_alpha/lib_protocol) we upgraded [`coq-of-ocaml`](https://github.com/formal-land/coq-of-ocaml) to add compatibility with OCaml 4.14. The result is available in the branch [`ocaml-4.14`](https://github.com/formal-land/coq-of-ocaml/pull/217). We describe here how we made this upgrade.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Usage of Merlin\\nIn `coq-of-ocaml` we are using [Merlin](https://github.com/ocaml/merlin) to get the typed [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of OCaml files. We see the AST through the [Typedtree](https://docs.mirage.io/ocaml/Typedtree/index.html) interface, together with an access to all the definitions of the current compilation environment. Merlin computes the current environment by understanding how an OCaml project is configured and connecting to the [dune](https://dune.build/) build system. The environment is mandatory for certain transformations in `coq-of-ocaml`, like:\\n* finding a canonical name for module types;\\n* propagating phantom types.\\n\\nIn order to use Merlin as a library (rather than as a daemon), we vendor the [LSP version](https://github.com/rgrinberg/merlin/tree/lsp) of [rgrinberg](https://github.com/rgrinberg) in the folder [`vendor/`](https://github.com/formal-land/coq-of-ocaml/tree/master/vendor). This vendored version works with no extra configurations.\\n\\n## Upgrade\\nWhen a new version of OCaml is out, we upgrade our vendored version of Merlin to a compatible one. Then we do the necessary changes to `coq-of-ocaml`, as the interface of the AST generally evolves with small changes. For OCaml 4.14, the main change was some types becoming abstract such as `Types.type_expr`. To access to the fields of these types, we now need to use a specific getter and do changes such as:\\n```diff\\n+ match typ.desc with\\n- match Types.get_desc typ with\\n```\\nThis made some patterns in `match` expressions more complex, but otherwise the changes were very minimal. We ran all the unit-tests of `coq-of-ocaml` after the upgrade and they were still valid.\\n\\n## Git submodule or copy & paste?\\nTo vendor Merlin we have two possibilities:\\n1. Using a [Git submodule](https://git-scm.com/book/en/v2/Git-Tools-Submodules).\\n2. Doing a copy & paste of the code.\\n\\nThe first possibility is more efficient in terms of space, but there are a few disadvantages:\\n* we cannot make small modifications if needed;\\n* the archives generated by Github do not contain the code of the submodules (see this [issue](https://github.com/dear-github/dear-github/issues/214))\\n* if a commit in the repository for the submodule disappears, then the submodule is unusable.\\n\\nThe last reason forced us to do a copy & paste for OCaml 4.14. We now have to be cautious not to commit the generate `.ml` file for the OCaml parser.\\n\\n## Next\\nThe next change will be doing the upgrade to OCaml 5. There should be much more changes, and in particular a new way of handling the effects. We do not know yet if it will be possible to translate the effect handlers to Coq in a nice way."},{"id":"/2022/06/15/status update-tezos","metadata":{"permalink":"/blog/2022/06/15/status update-tezos","source":"@site/blog/2022-06-15-status update-tezos.md","title":"Status update on the verification of Tezos","description":"Here we give an update on our verification effort on the protocol of Tezos. We add the marks:","date":"2022-06-15T00:00:00.000Z","formattedDate":"June 15, 2022","tags":[{"label":"tezos","permalink":"/blog/tags/tezos"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"coq","permalink":"/blog/tags/coq"}],"readingTime":7.53,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Status update on the verification of Tezos","tags":["tezos","coq-of-ocaml","coq"]},"unlisted":false,"prevItem":{"title":"Upgrade coq-of-ocaml to OCaml 4.14","permalink":"/blog/2022/06/23/upgrade-coq-of-ocaml-4.14"},"nextItem":{"title":"Make Tezos the first formally verified cryptocurrency","permalink":"/blog/2022/02/02/make-tezos-a-formally-verified-crypto"}},"content":"Here we give an update on our [verification effort](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the protocol of Tezos. We add the marks:\\n* \u2705 for \\"rather done\\"\\n* \ud83c\udf0a for \\"partially done\\"\\n* \u274c for \\"most is yet to do\\"\\n\\nOn the website of project, we also automatically generates pages such as [Compare](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/status/compare/) to follow the status of the tasks.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Maintenance of the translation \u2705\\nWe were able to maintain most of the translation from OCaml to Coq of the protocol of Tezos using [coq-of-ocaml](https://github.com/formal-land/coq-of-ocaml), including all the translation of the Michelson interpreter. There was an increase in the size of the OCaml code base in recent months, due to new features added in Tezos like the [rollups](https://research-development.nomadic-labs.com/tezos-is-scaling.html). Here are the numbers of lines of code (`.ml` and `.mli` files) for the various protocol versions:\\n* protocol H: `51147`\\n* protocol I: `59535`\\n* protocol J: `83271` (increase mainly due to the rollups)\\n* protocol Alpha (development version of K): `90716`\\n\\nWe still translate most of the protocol code up to version J. We stayed on version J for a while as we wanted to add as many proofs as possible before doing a proof of backward compatibility between J and K. We are currently updating the translation to support the protocol version Alpha, preparing for the translation of K.\\n\\nFor protocol J, we needed to add a [blacklist.txt](https://gitlab.com/nomadic-labs/coq-tezos-of-ocaml/-/blob/master/blacklist.txt) of files that we do not support. Indeed, we need to add new changes to `coq-of-ocaml` to support these or do hard-to-maintain changes to [our fork](https://gitlab.com/tezos/tezos/-/merge_requests/3303) of the Tezos protocol. We plan to complete the translation and remove this black-list for the protocol J soon (in a week or two).\\n\\n## Size of the proofs \u2705\\nOne of our plans is to have a reasonable quantity of proofs, to cover a reasonable quantity of code and properties from the protocol. We believe we have a good quantity of proofs now, as we have more than 50,000 lines of Coq code (for an OCaml codebase of 80,000 lines).\\n\\nIn addition to our main targets, we verify many \\"smaller\\" properties, such as:\\n* conversion functions are inverses (when there are two `to_int` and `of_int` functions in a file, we show that they are inverses);\\n* the `compare` functions, to order elements, are well defined (see our blog post [Verifying the compare functions of OCaml](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/04/04/verifying-the-compare-functions));\\n* invariants are preserved. For example, [here](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/proofs/carbonated_map#Make.update_is_valid) we show that updating a carbonated map preserves the property of having a size field actually equal to the number of elements.\\n\\nWe should note that the size of Coq proofs tends to grow faster than the size of the verified code. We have no coverage metrics to know how much of the code is covered by these proofs.\\n\\n## Data-encodings \ud83c\udf0a\\nThe [data-encoding](https://gitlab.com/nomadic-labs/data-encoding) library is a set of combinators to write serialization/de-serialization functions. We verify that the encodings defined for each protocol data type are bijective. The good thing we have is a semi-automated tactic to verify the use of the `data-encoding` primitives. We detail this approach in our blog post [Automation of `data_encoding` proofs](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/11/22/data-encoding-automation). We can verify most of the encoding functions that we encounter. From there, we also express the **invariant** associated with each data type, which the encodings generally check at runtime. The invariants are then the domain of definition of the encodings.\\n\\nHowever, we have a hole: we do not verify the `data-encoding` library itself. Thus the [axioms we made](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/environment/proofs/data_encoding) on the data-encoding primitives may have approximations. And indeed, we missed one issue in the development code of the protocol. This is thus a new high-priority target to verify the `data-encoding` library itself. One of the challenges for the proof is the use of side-effects (references and exceptions) in this library.\\n\\n## Property-based tests \ud83c\udf0a\\nThe property-based tests on the protocol are located in [`src/proto_alpha/lib_protocol/test/pbt`](https://gitlab.com/tezos/tezos/-/tree/master/src/proto_alpha/lib_protocol/test/pbt). These tests are composed of:\\n* a generator, generating random inputs of a certain shape;\\n* a property function, a boolean function taking a generated input and supposed to always answer `true`.\\n\\nWe translated a part of these tests to Coq, to convert them to theorems and have specifications extracted from the code. The result of this work is summarized in this blog post: [Formal verification of property based tests](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2022/06/07/formal-verification-of-property-based-tests). We have fully translated and verified four test files over a total of twelve. We are continuing the work of translations and proofs.\\n\\nHowever, we found that for some of the files the proofs were taking a long time to write compared to the gains in safety. Indeed, the statements made in the tests are sometimes too complex when translated into general theorems. For example, for [test_carbonated_map.ml](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/test/pbt/test_carbonated_map.ml) we have to deal with:\\n* gas exhaustion (seemingly impossible in the tests);\\n* data structures of size greater than `max_int` (impossible in practice).\\n\\nAll of that complicate the proofs for little gain in safety. So I would say that not all the property-based tests have a nice and useful translation to Coq. We should still note that for some of the tests, like with saturation arithmetic, we have proofs that work well. For these, we rely on the automated linear arithmetic tactic [`lia`](https://coq.inria.fr/refman/addendum/micromega.html) of Coq to verify properties over integer overflows.\\n\\n## Storage system \ud83c\udf0a\\nBy \\"storage system\\" we understand the whole set of functors defined in [`storage_functors.ml`](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/storage_functors.ml) and how we apply them to define the protocol storage in [`storage.ml`](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/storage_functors.ml). These functors create sub-storages with signatures such as:\\n```ocaml\\nmodule type Non_iterable_indexed_data_storage = sig\\n type t\\n type context = t\\n type key\\n type value\\n val mem : context -> key -> bool Lwt.t\\n val get : context -> key -> value tzresult Lwt.t\\n val find : context -> key -> value option tzresult Lwt.t\\n val update : context -> key -> value -> Raw_context.t tzresult Lwt.t\\n val init : context -> key -> value -> Raw_context.t tzresult Lwt.t\\n val add : context -> key -> value -> Raw_context.t Lwt.t\\n val add_or_remove : context -> key -> value option -> Raw_context.t Lwt.t\\n val remove_existing : context -> key -> Raw_context.t tzresult Lwt.t\\n val remove : context -> key -> Raw_context.t Lwt.t\\nend\\n```\\nThis `Non_iterable_indexed_data_storage` API looks like the API of an OCaml\'s [Map](https://v2.ocaml.org/api/Map.Make.html). As a result, our goal for the storage is to show that is can be simulated by standard OCaml data structures such as sets and maps. This is a key step to unlock further reasoning about code using the storage.\\n\\nUnfortunately, we were not able to verify the whole storage system yet. Among the difficulties are that:\\n* there are many layers in the definition of the storage;\\n* the storage functors use a lot of abstractions, and sometimes it is unclear how to specify them in the general case.\\n\\nStill, we have verified some of the functors as seen in [`Proofs/Storage_functors.v`](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/proofs/storage_functors) and specified the `storage.ml` file in [`Proos/Storage.v`](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/storage). We believe in having the correct specifications for all of the storage abstractions now. We plan to complete all these proofs later.\\n\\n## Michelson\\nThe verification of the Michelson interpreter is what occupied most of our time. By considering the OCaml files whose name starts by `script_`, the size of the Michelson interpreter is around 20,000 lines of OCaml code.\\n\\n### Simulations \ud83c\udf0a\\nThe interpreter relies heavily on [GADTs](https://v2.ocaml.org/manual/gadts.html) in OCaml. Because these do not translate nicely in Coq, we need to write simulations in dependent types of the interpreter functions, and prove them correct in Coq. We describe this process in our [Michelson Guide](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/guides/michelson).\\n\\nThe main difficulties we encountered are:\\n* the number of simulations to write (covering the 20,000 lines of OCaml);\\n* the execution time of the proof of correctness of the simulations. This is due to the large size of the inductive types describing the Michelson AST, and the use of dependent types generating large proof terms. For example, there are around 30 cases for the types and 150 for the instructions node in the AST.\\n\\nWhen writing the simulations, we are also verifying the termination of all the functions and the absence of reachable `assert false`. We have defined the simulation of many functions, but are still missing important ones such as [`parse_instr_aux`](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/script_ir_translator/#parse_instr_aux) to parse Michelson programs.\\n\\n### Mi-Cho-Coq \ud83c\udf0a\\nWe have a project to verify that the [Mi-Cho-Coq](https://gitlab.com/nomadic-labs/mi-cho-coq) framework, used to formally verify smart contracts written in Michelson, is compatible with the implementation of the Michelson interpreter in OCaml. We have a partial proof of compatibility in [Micho_to_dep.v](https://formal-land.gitlab.io/coq-tezos-of-ocaml/docs/simulations/micho_to_dep). We still need to complete this proof, especially to handle instructions with loops. Our goal is to show a complete inclusion of the semantics of Mi-Cho-Coq into the semantics of the implementation.\\n\\n### Parse/unparse \u274c\\nWe wanted to verify that the various parsing and unparsing functions over Michelson are inverses. These functions exist for:\\n* comparable types\\n* types\\n* comparable data\\n* data\\n\\nBecause we are still focused on writing, verifying or updating the simulations, we are still not done for this task.\\n\\n## Conclusion\\nWe have many ongoing projects but few fully completed tasks. We will focus more on having terminated proofs."},{"id":"/2022/02/02/make-tezos-a-formally-verified-crypto","metadata":{"permalink":"/blog/2022/02/02/make-tezos-a-formally-verified-crypto","source":"@site/blog/2022-02-02-make-tezos-a-formally-verified-crypto.md","title":"Make Tezos the first formally verified cryptocurrency","description":"Elephants","date":"2022-02-02T00:00:00.000Z","formattedDate":"February 2, 2022","tags":[{"label":"tezos","permalink":"/blog/tags/tezos"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"coq","permalink":"/blog/tags/coq"}],"readingTime":3.675,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"Make Tezos the first formally verified cryptocurrency","tags":["tezos","coq-of-ocaml","coq"]},"unlisted":false,"prevItem":{"title":"Status update on the verification of Tezos","permalink":"/blog/2022/06/15/status update-tezos"},"nextItem":{"title":"New blog posts and Meetup talk","permalink":"/blog/2021/11/12/new-blog-posts-and-meetup-talk"}},"content":"![Elephants](elephants-elmira-gokoryan.webp)\\n\\nOur primary goal at [Formal Land \ud83c\udf32](https://formal.land/) is to make [Tezos](https://tezos.com/) the first crypto-currency with a formally verified implementation. With [formal verification](https://en.wikipedia.org/wiki/Formal_verification), thanks to mathematical methods, we can check that a program behaves as expected for all possible inputs. Formal verification goes beyond what testing can do, as testing can only handle a finite amount of cases. That is critical as cryptocurrencies hold a large amount of money (around $3B for Tezos today). The current result of our verification project is available on [nomadic-labs.gitlab.io/coq-tezos-of-ocaml](https://formal-land.gitlab.io/coq-tezos-of-ocaml/). Formal verification is also key to allowing Tezos to evolve constantly in a safe and backward compatible manner.\\n\\n\x3c!-- truncate --\x3e\\n\\nWe proceed in two steps:\\n1. we translate the code of Tezos, written in [OCaml](https://ocaml.org/), to the proof language [Coq](https://coq.inria.fr/) using the translator [coq-of-ocaml](https://github.com/foobar-land/coq-of-ocaml);\\n2. we write our specifications and proofs in the Coq language.\\n\\nWe believe this is one of the most efficient ways to proceed, as we can work on an almost unmodified version of the codebase and use the full power of the mature proof system Coq. The code of Tezos is composed of around:\\n* 50,000 lines for the protocol (the kernel of Tezos), and\\n* 200,000 lines for the shell (everything else, including the peer-to-peer layer and the storage backend).\\n\\nWe are currently focusing on verifying the protocol for the following modules.\\n\\n## Data-encoding\\nThe [data-encoding](https://gitlab.com/nomadic-labs/data-encoding) library offers serialization and deserialization to binary and JSON formats. It is used in various parts of the Tezos protocol, especially on all the data types ending up in the storage system. In practice, many encodings are defined in the OCaml files named `*_repr.ml`. We verify that the `data-encoding` library is correctly used to define the encodings. We check that converting a value to binary format and from binary returns the initial value. We explicit the domain of validity of such conversions. This verification work generally reveals and propagates invariants about the data structures of the protocol. As an invariant example, all the account amounts should always be positive. Having these invariants will be helpful for the verification of higher-level layers of the protocol.\\n\\n## Michelson smart contracts\\nThe smart contract language of Tezos is [Michelson](https://tezos.gitlab.io/active/michelson.html). The interpreter and type-checker of smart contracts is one of the most complex and critical parts of the protocol. We are verifying two things about this code:\\n* The equivalence of the interpreter and the Coq semantics for Michelson defined in the project [Mi-Cho-Coq](https://gitlab.com/nomadic-labs/mi-cho-coq). Thanks to this equivalence, we can make sure that the formal verification of smart contracts is sound for the current version of the protocol.\\n* The compatibility of the parsing and unparsing functions for the Michelson types and values. The parsing functions take care of the type-checking and do a lot of sanity checks on Michelson expressions with appropriate error messages. Showing that the parsing and unparsing functions are inverses is important for security reasons. The Michelson values are always unparsed at the end of a smart contract execution to be stored on disk.\\n\\nTo do these proofs, we also give a new semantics of Michelson, expressed using dependent types rather than [GADTs](https://ocaml.org/manual/gadts-tutorial.html) in the OCaml implementation.\\n\\n## Storage system\\nCryptocurrencies typically take a lot of space on disk (in the hundreds of gigabytes). In Tezos, we use the key-value database [Irmin](https://irmin.org/). The protocol provides a lot of [abstractions](https://gitlab.com/tezos/tezos/-/blob/master/src/proto_alpha/lib_protocol/storage_functors.ml) over this database to expose higher-level interfaces with set and map-like APIs. We verify that these abstractions are valid doing a proof by simulation, where we show that the whole system is equivalent to an [in-memory database](https://en.wikipedia.org/wiki/In-memory_database) using simpler data structures. Thanks to this simulation, we will be able to reason about code using the storage as if we were using the simpler in-memory version.\\n\\n## In addition\\nWe also plan to verify:\\n* The implementation of the `data-encoding` library itself. This code is challenging for formal verification as it contains many imperative features. Another specificity of this library is that it sits outside of the protocol of Tezos, and we might need to adapt `coq-of-ocaml` to support it.\\n* The [property-based tests of the protocol](https://gitlab.com/tezos/tezos/-/tree/master/src/proto_alpha/lib_protocol/test/pbt). These tests are written as boolean functions (or functions raising exceptions), which must return `true` on any possible inputs. We will verify them in the general case by importing their definitions to Coq and verifying with mathematical proofs that they are always correct.\\n\\n:::tip Contact\\nFor any questions or remarks, contact us on \ud83d\udc49 [contact@formal.land](mailto:contact@formal.land) \ud83d\udc48.\\n:::"},{"id":"/2021/11/12/new-blog-posts-and-meetup-talk","metadata":{"permalink":"/blog/2021/11/12/new-blog-posts-and-meetup-talk","source":"@site/blog/2021-11-12-new-blog-posts-and-meetup-talk.md","title":"New blog posts and Meetup talk","description":"Recently, we added two new blog posts about the verification of the crypto-currency Tezos:","date":"2021-11-12T00:00:00.000Z","formattedDate":"November 12, 2021","tags":[{"label":"tezos","permalink":"/blog/tags/tezos"},{"label":"mi-cho-coq","permalink":"/blog/tags/mi-cho-coq"},{"label":"coq-of-ocaml","permalink":"/blog/tags/coq-of-ocaml"},{"label":"meetup","permalink":"/blog/tags/meetup"}],"readingTime":0.58,"hasTruncateMarker":false,"authors":[],"frontMatter":{"title":"New blog posts and Meetup talk","tags":["tezos","mi-cho-coq","coq-of-ocaml","meetup"]},"unlisted":false,"prevItem":{"title":"Make Tezos the first formally verified cryptocurrency","permalink":"/blog/2022/02/02/make-tezos-a-formally-verified-crypto"},"nextItem":{"title":"Verification of the use of data-encoding","permalink":"/blog/2021/10/27/verification-data-encoding"}},"content":"Recently, we added two new blog posts about the verification of the crypto-currency [Tezos](https://tezos.com/):\\n* [Verify the Michelson types of Mi-Cho-Coq](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/11/01/verify-michelson-types-mi-cho-coq/) to compare the types defined in the Tezos code for the [Michelson](http://tezos.gitlab.io/active/michelson.html) interpreter and in the [Mi-Cho-Coq library](https://gitlab.com/nomadic-labs/mi-cho-coq) to verify smart contracts;\\n* [Translate the Tenderbake\'s code to Coq](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/11/08/translate-tenderbake/) to explain how we translated the recent changes in Tezos to the Coq using [coq-of-ocaml](https://github.com/foobar-land/coq-of-ocaml). In particular we translated the code of the new [Tenderbake](https://research-development.nomadic-labs.com/a-look-ahead-to-tenderbake.html) consensus algorithm.\\n\\nWe also talked at the [Lambda Lille Meetup](https://www.meetup.com/LambdaLille/events/281374644/) (in French) to present our work on `coq-of-ocaml` for Tezos. A video on the [Youtube channel](https://www.youtube.com/channel/UC-hC7y_ilQBq0QCa9xDu1iA) of the Meetup should be available shortly. We thanks the organizers for hosting the talk."},{"id":"/2021/10/27/verification-data-encoding","metadata":{"permalink":"/blog/2021/10/27/verification-data-encoding","source":"@site/blog/2021-10-27-verification-data-encoding.md","title":"Verification of the use of data-encoding","description":"We added a blog post about the verification of the use of data-encodings in the protocol of Tezos. Currently, we work on the verification of Tezos and publish our blog articles there. We use coq-of-ocaml to translate the OCaml code to Coq and do our verification effort.","date":"2021-10-27T00:00:00.000Z","formattedDate":"October 27, 2021","tags":[{"label":"data-encoding","permalink":"/blog/tags/data-encoding"}],"readingTime":0.235,"hasTruncateMarker":false,"authors":[],"frontMatter":{"title":"Verification of the use of data-encoding","tags":["data-encoding"]},"unlisted":false,"prevItem":{"title":"New blog posts and Meetup talk","permalink":"/blog/2021/11/12/new-blog-posts-and-meetup-talk"},"nextItem":{"title":"Welcome","permalink":"/blog/2021/10/10/welcome"}},"content":"We added a blog post about the [verification of the use of data-encodings](https://formal-land.gitlab.io/coq-tezos-of-ocaml/blog/2021/10/20/data-encoding-usage) in the protocol of Tezos. Currently, we work on the verification of Tezos and publish our blog articles there. We use [coq-of-ocaml](https://foobar-land.github.io/coq-of-ocaml/) to translate the OCaml code to Coq and do our verification effort."},{"id":"/2021/10/10/welcome","metadata":{"permalink":"/blog/2021/10/10/welcome","source":"@site/blog/2021-10-10-welcome.md","title":"Welcome","description":"Welcome to the blog of Formal Land. Here we will post various updates about the work we are doing.","date":"2021-10-10T00:00:00.000Z","formattedDate":"October 10, 2021","tags":[{"label":"Welcome","permalink":"/blog/tags/welcome"}],"readingTime":0.095,"hasTruncateMarker":false,"authors":[],"frontMatter":{"title":"Welcome","tags":["Welcome"]},"unlisted":false,"prevItem":{"title":"Verification of the use of data-encoding","permalink":"/blog/2021/10/27/verification-data-encoding"}},"content":"Welcome to the blog of [Formal Land](/). Here we will post various updates about the work we are doing."}]}')}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.9f651554.js b/assets/js/runtime~main.8a70afab.js similarity index 99% rename from assets/js/runtime~main.9f651554.js rename to assets/js/runtime~main.8a70afab.js index b1834b71..49b4a8ba 100644 --- a/assets/js/runtime~main.9f651554.js +++ b/assets/js/runtime~main.8a70afab.js @@ -1 +1 @@ -(()=>{"use strict";var e,a,f,b,d,c={},t={};function r(e){var a=t[e];if(void 0!==a)return a.exports;var f=t[e]={exports:{}};return c[e].call(f.exports,f,f.exports,r),f.exports}r.m=c,e=[],r.O=(a,f,b,d)=>{if(!f){var c=1/0;for(i=0;i=d)&&Object.keys(r.O).every((e=>r.O[e](f[o])))?f.splice(o--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[f,b,d]},r.n=e=>{var a=e&&e.__esModule?()=>e.default:()=>e;return r.d(a,{a:a}),a},f=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,b){if(1&b&&(e=this(e)),8&b)return e;if("object"==typeof e&&e){if(4&b&&e.__esModule)return e;if(16&b&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var c={};a=a||[null,f({}),f([]),f(f)];for(var t=2&b&&e;"object"==typeof t&&!~a.indexOf(t);t=f(t))Object.getOwnPropertyNames(t).forEach((a=>c[a]=()=>e[a]));return c.default=()=>e,r.d(d,c),d},r.d=(e,a)=>{for(var f in a)r.o(a,f)&&!r.o(e,f)&&Object.defineProperty(e,f,{enumerable:!0,get:a[f]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((a,f)=>(r.f[f](e,a),a)),[])),r.u=e=>"assets/js/"+({65:"3926085a",127:"1993cab4",291:"0e068da8",339:"6fdb0820",468:"40a5438f",471:"4ab1c6f7",501:"b985990b",574:"890e518c",607:"4761cca9",621:"25e70bc2",640:"8ca4d6e2",656:"72c84e71",721:"bb3f9e72",727:"f813a603",878:"c7369102",909:"3c80015b",910:"eb6aa549",952:"282cd1c8",970:"3229a8e9",1019:"720a56e4",1079:"e93a9b61",1130:"37dfae32",1131:"36f6f17a",1338:"99451683",1377:"615dbb02",1477:"2df98331",1543:"d73ecefa",1599:"cec855a0",1691:"806c182d",1703:"79c03ce4",1716:"523d8f7d",1805:"c445085e",1864:"d94865d5",1921:"59b56f69",1924:"8bd24425",1975:"0d56dc46",1979:"a173baa0",1991:"b2b675dd",2094:"861700e4",2109:"bd0ed3e1",2173:"2519c48f",2176:"b3ad03cc",2206:"940891e7",2257:"f3a15648",2258:"bdf7c199",2442:"d38380b3",2445:"74ab7bc8",2470:"b146e155",2644:"1fb147df",2676:"e704f625",2711:"9e4087bc",2728:"aa1d233f",3030:"30f8bb4f",3094:"d8ad77ec",3096:"439d3734",3186:"68b0dfa4",3187:"4688d424",3214:"08b4cf62",3249:"ccc49370",3290:"b12698d1",3437:"d31ecc05",3568:"7d628d86",3719:"4548c361",3787:"9e4c2aa5",3860:"b039747e",3909:"8e0f4841",4002:"452bbf79",4070:"34b0601c",4116:"fd759d37",4222:"48ac1a5d",4282:"4f24d1ff",4285:"62537946",4347:"9bab1bd5",4354:"2adc0ba4",4358:"ffcdbdee",4402:"0928b497",4414:"f1f43052",4467:"02e13650",4478:"98e459ed",4496:"a3c4aff9",4497:"4071a8ab",4576:"f1c3ede6",4583:"1df93b7f",4617:"ae7616e5",4809:"c190410d",4813:"6875c492",4827:"cce51cf2",4828:"1680c68e",4845:"a7cac7dd",4936:"4e6ea248",4946:"a701e9b4",4970:"c9adbec0",4997:"a8f0412d",5213:"c9eb5c8c",5302:"a24beb83",5345:"2d92dfb9",5361:"65cc9109",5380:"c5b47efc",5391:"7c0fea77",5428:"8db1271b",5439:"4d658fd1",5440:"834fe8c4",5457:"ff6b4ecf",5578:"a8e6f3e5",5739:"69205f08",5756:"560c153f",5767:"8eb4e46b",5801:"2836b1c5",5845:"e6b868b1",5894:"b2f554cd",5993:"6b1d1fec",5996:"45d50612",6103:"fc3deafd",6204:"2240d8ed",6244:"63152ce2",6260:"5dc20450",6385:"42af9969",6390:"278479fb",6487:"f8de77c0",6640:"b2014bd4",6722:"74ae6181",6739:"fde865fd",6820:"0fb5280d",6837:"232c92ba",6924:"1e7a46a0",6940:"16329161",7054:"59025a76",7065:"3ba35b71",7097:"cf3d20aa",7098:"a7bd4aaa",7175:"f6f9690d",7199:"f21d102b",7224:"91ac000a",7261:"6fd34e84",7266:"094a728b",7358:"be4406bf",7381:"a926bf88",7472:"814f3328",7520:"f6ba3702",7580:"97c52b50",7604:"87b14ec7",7630:"e566aea2",7643:"a6aa9e1f",7650:"ad4ab9ff",7802:"c5d15731",7838:"ae69f024",7900:"a5d7f2f4",8013:"4a2980b2",8059:"9cf8b934",8209:"01a85c17",8305:"396effda",8382:"d41fa627",8387:"8ee64c0c",8401:"17896441",8411:"c5903cab",8457:"a06ffc17",8468:"b6692631",8530:"2e0973e2",8540:"b44e231f",8543:"54a1f05d",8581:"935f2afb",8630:"24926dff",8718:"ae41b95b",8772:"82eef687",8790:"92999a1c",8806:"76bc59ef",8821:"892f03bb",8827:"90c66bca",8840:"324f91f6",9048:"a94703ab",9166:"90d4f0a7",9267:"a7023ddc",9317:"0228dac2",9324:"e32cc564",9328:"36cb36bb",9347:"d7c1c49e",9363:"09cb7d6a",9384:"13e421cf",9424:"b1f9f584",9451:"f3233157",9607:"b6982b7f",9611:"0ab8b207",9623:"3ed9a774",9647:"5e95c892",9741:"20f81dd5",9888:"a8fa71a4",9890:"0b4df4b7",9984:"97b22f94"}[e]||e)+"."+{65:"cd8b6055",127:"3a9ef318",291:"d5641d1d",339:"d42af4f1",468:"cb3b4242",471:"1f64822e",501:"524e58e1",574:"a4a0b2bd",607:"191ec598",621:"63a8f50d",640:"af8dbf98",656:"1723715b",721:"f27c6f46",727:"4b9cd53e",878:"956e30ef",909:"a90a7aec",910:"a8e7640e",952:"9c43247e",970:"5431314d",1019:"1b167f27",1079:"45e3c0f2",1130:"6803d05b",1131:"a01b7f91",1135:"5688adaf",1338:"3656d2dd",1377:"648ea9db",1477:"d132f837",1543:"c1ba3faf",1599:"6bc81a97",1691:"0a4254eb",1703:"445e72c6",1716:"4d5b97fb",1805:"c9b4383e",1864:"ee525089",1921:"c497a76d",1924:"91e9f4e0",1975:"10ed284e",1979:"af43b3f6",1991:"56965564",2094:"33c113c1",2109:"55f0eabc",2173:"689cb8cb",2176:"69118c59",2206:"95957777",2237:"75d0c357",2257:"d941efb8",2258:"5d9bb219",2442:"30d2b1ab",2445:"2227918b",2470:"8ab06890",2644:"90914a6b",2676:"913e7e1e",2711:"abea12d8",2728:"41ec0a46",2778:"459d8f64",3030:"31f6141a",3094:"1e818bb6",3096:"65cc6bd7",3186:"c50d1d04",3187:"a6ffa19c",3214:"f3bcb01d",3249:"c436d061",3290:"b956148d",3437:"647f7d43",3568:"b7594de7",3719:"ea219df7",3787:"79e29112",3860:"2978860f",3909:"6903e838",4002:"15fd7276",4070:"c974fc8c",4116:"76d547f0",4222:"bba114da",4282:"1bb6a92f",4285:"9ccb9872",4347:"b77a4819",4354:"3e73a865",4358:"185da90f",4402:"2b9c6a7c",4414:"f6495633",4467:"51aaa417",4478:"1ac289d7",4496:"818fe429",4497:"e6c0ffb1",4576:"83e373ba",4583:"642e7e4a",4617:"1884c3a5",4809:"491f1fce",4813:"c9388d9f",4827:"cd88487a",4828:"9c912728",4845:"eff847ea",4936:"808789e2",4946:"8cb27ac3",4970:"28925304",4997:"d5710eaa",5213:"a33d4f7b",5302:"39c5171b",5345:"f3f9e81d",5361:"93d0d593",5380:"a6d64a5f",5391:"e9aba91a",5428:"010b1de8",5439:"c4687111",5440:"f388d996",5457:"c54ec846",5578:"e3b0f9a6",5739:"56ecc9eb",5756:"41ded959",5767:"b9596d69",5801:"d99fa637",5845:"436010fd",5894:"b4593d13",5993:"7125dbed",5996:"af60c555",6103:"a34db9bb",6204:"e998637c",6244:"9c4cecc6",6260:"b8c1b529",6385:"c0f78fc2",6390:"ffc6135e",6487:"89a04e95",6640:"7d3067ef",6722:"4dd5a6b4",6739:"6a43a6f2",6820:"38e18c4c",6837:"49430e0c",6924:"7222bda8",6940:"48a27581",7054:"30e28783",7065:"4977536c",7097:"f0b1ae95",7098:"bd449a7d",7175:"41be760f",7199:"197997a1",7224:"301fb2b3",7261:"8ffa146d",7266:"d135f2b5",7358:"3ae2bd0b",7381:"5d511376",7472:"8918e1a3",7520:"a45cd4cc",7580:"892c1d9c",7604:"f75699cf",7630:"73da8de5",7643:"19e21a39",7650:"5cd78671",7802:"55b8c9f5",7838:"9d276fec",7900:"fbc15c8f",8013:"5aad93b8",8059:"42e17fe6",8209:"de3cf12b",8305:"6560e8a3",8382:"73994109",8387:"72e4c6f2",8401:"d4130c19",8411:"0416d517",8457:"d4f7d6dd",8468:"3bedc53e",8530:"f3dd583a",8540:"b4a2a703",8543:"5780d3dc",8581:"042a3d1c",8630:"ef8131f6",8706:"959c499a",8718:"705b088b",8772:"6ce77a14",8790:"467f49e1",8806:"c73bf180",8821:"eb814a1d",8827:"3813ed39",8840:"8ad64d0e",9048:"bfd106c6",9166:"ca220213",9267:"1b0dc8ef",9317:"eb9a1920",9324:"dac5d5b3",9328:"8595c7a8",9347:"4d4ec702",9363:"f6224dcc",9384:"6d9fb24e",9424:"5cae9820",9451:"669fad86",9607:"3c3d2d54",9611:"d28c208b",9623:"4c3ef134",9647:"25919f8a",9741:"85809e97",9888:"86997124",9890:"3d496f0f",9984:"442a9d5f"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,a)=>Object.prototype.hasOwnProperty.call(e,a),b={},d="formal-land:",r.l=(e,a,f,c)=>{if(b[e])b[e].push(a);else{var t,o;if(void 0!==f)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var d=b[e];if(delete b[e],t.parentNode&&t.parentNode.removeChild(t),d&&d.forEach((e=>e(f))),a)return a(f)},s=setTimeout(u.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=u.bind(null,t.onerror),t.onload=u.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/",r.gca=function(e){return e={16329161:"6940",17896441:"8401",62537946:"4285",99451683:"1338","3926085a":"65","1993cab4":"127","0e068da8":"291","6fdb0820":"339","40a5438f":"468","4ab1c6f7":"471",b985990b:"501","890e518c":"574","4761cca9":"607","25e70bc2":"621","8ca4d6e2":"640","72c84e71":"656",bb3f9e72:"721",f813a603:"727",c7369102:"878","3c80015b":"909",eb6aa549:"910","282cd1c8":"952","3229a8e9":"970","720a56e4":"1019",e93a9b61:"1079","37dfae32":"1130","36f6f17a":"1131","615dbb02":"1377","2df98331":"1477",d73ecefa:"1543",cec855a0:"1599","806c182d":"1691","79c03ce4":"1703","523d8f7d":"1716",c445085e:"1805",d94865d5:"1864","59b56f69":"1921","8bd24425":"1924","0d56dc46":"1975",a173baa0:"1979",b2b675dd:"1991","861700e4":"2094",bd0ed3e1:"2109","2519c48f":"2173",b3ad03cc:"2176","940891e7":"2206",f3a15648:"2257",bdf7c199:"2258",d38380b3:"2442","74ab7bc8":"2445",b146e155:"2470","1fb147df":"2644",e704f625:"2676","9e4087bc":"2711",aa1d233f:"2728","30f8bb4f":"3030",d8ad77ec:"3094","439d3734":"3096","68b0dfa4":"3186","4688d424":"3187","08b4cf62":"3214",ccc49370:"3249",b12698d1:"3290",d31ecc05:"3437","7d628d86":"3568","4548c361":"3719","9e4c2aa5":"3787",b039747e:"3860","8e0f4841":"3909","452bbf79":"4002","34b0601c":"4070",fd759d37:"4116","48ac1a5d":"4222","4f24d1ff":"4282","9bab1bd5":"4347","2adc0ba4":"4354",ffcdbdee:"4358","0928b497":"4402",f1f43052:"4414","02e13650":"4467","98e459ed":"4478",a3c4aff9:"4496","4071a8ab":"4497",f1c3ede6:"4576","1df93b7f":"4583",ae7616e5:"4617",c190410d:"4809","6875c492":"4813",cce51cf2:"4827","1680c68e":"4828",a7cac7dd:"4845","4e6ea248":"4936",a701e9b4:"4946",c9adbec0:"4970",a8f0412d:"4997",c9eb5c8c:"5213",a24beb83:"5302","2d92dfb9":"5345","65cc9109":"5361",c5b47efc:"5380","7c0fea77":"5391","8db1271b":"5428","4d658fd1":"5439","834fe8c4":"5440",ff6b4ecf:"5457",a8e6f3e5:"5578","69205f08":"5739","560c153f":"5756","8eb4e46b":"5767","2836b1c5":"5801",e6b868b1:"5845",b2f554cd:"5894","6b1d1fec":"5993","45d50612":"5996",fc3deafd:"6103","2240d8ed":"6204","63152ce2":"6244","5dc20450":"6260","42af9969":"6385","278479fb":"6390",f8de77c0:"6487",b2014bd4:"6640","74ae6181":"6722",fde865fd:"6739","0fb5280d":"6820","232c92ba":"6837","1e7a46a0":"6924","59025a76":"7054","3ba35b71":"7065",cf3d20aa:"7097",a7bd4aaa:"7098",f6f9690d:"7175",f21d102b:"7199","91ac000a":"7224","6fd34e84":"7261","094a728b":"7266",be4406bf:"7358",a926bf88:"7381","814f3328":"7472",f6ba3702:"7520","97c52b50":"7580","87b14ec7":"7604",e566aea2:"7630",a6aa9e1f:"7643",ad4ab9ff:"7650",c5d15731:"7802",ae69f024:"7838",a5d7f2f4:"7900","4a2980b2":"8013","9cf8b934":"8059","01a85c17":"8209","396effda":"8305",d41fa627:"8382","8ee64c0c":"8387",c5903cab:"8411",a06ffc17:"8457",b6692631:"8468","2e0973e2":"8530",b44e231f:"8540","54a1f05d":"8543","935f2afb":"8581","24926dff":"8630",ae41b95b:"8718","82eef687":"8772","92999a1c":"8790","76bc59ef":"8806","892f03bb":"8821","90c66bca":"8827","324f91f6":"8840",a94703ab:"9048","90d4f0a7":"9166",a7023ddc:"9267","0228dac2":"9317",e32cc564:"9324","36cb36bb":"9328",d7c1c49e:"9347","09cb7d6a":"9363","13e421cf":"9384",b1f9f584:"9424",f3233157:"9451",b6982b7f:"9607","0ab8b207":"9611","3ed9a774":"9623","5e95c892":"9647","20f81dd5":"9741",a8fa71a4:"9888","0b4df4b7":"9890","97b22f94":"9984"}[e]||e,r.p+r.u(e)},(()=>{var e={5354:0,1869:0};r.f.j=(a,f)=>{var b=r.o(e,a)?e[a]:void 0;if(0!==b)if(b)f.push(b[2]);else if(/^(1869|5354)$/.test(a))e[a]=0;else{var d=new Promise(((f,d)=>b=e[a]=[f,d]));f.push(b[2]=d);var c=r.p+r.u(a),t=new Error;r.l(c,(f=>{if(r.o(e,a)&&(0!==(b=e[a])&&(e[a]=void 0),b)){var d=f&&("load"===f.type?"missing":f.type),c=f&&f.target&&f.target.src;t.message="Loading chunk "+a+" failed.\n("+d+": "+c+")",t.name="ChunkLoadError",t.type=d,t.request=c,b[1](t)}}),"chunk-"+a,a)}},r.O.j=a=>0===e[a];var a=(a,f)=>{var b,d,c=f[0],t=f[1],o=f[2],n=0;if(c.some((a=>0!==e[a]))){for(b in t)r.o(t,b)&&(r.m[b]=t[b]);if(o)var i=o(r)}for(a&&a(f);n{"use strict";var e,a,f,b,d,c={},t={};function r(e){var a=t[e];if(void 0!==a)return a.exports;var f=t[e]={exports:{}};return c[e].call(f.exports,f,f.exports,r),f.exports}r.m=c,e=[],r.O=(a,f,b,d)=>{if(!f){var c=1/0;for(i=0;i=d)&&Object.keys(r.O).every((e=>r.O[e](f[o])))?f.splice(o--,1):(t=!1,d0&&e[i-1][2]>d;i--)e[i]=e[i-1];e[i]=[f,b,d]},r.n=e=>{var a=e&&e.__esModule?()=>e.default:()=>e;return r.d(a,{a:a}),a},f=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,b){if(1&b&&(e=this(e)),8&b)return e;if("object"==typeof e&&e){if(4&b&&e.__esModule)return e;if(16&b&&"function"==typeof e.then)return e}var d=Object.create(null);r.r(d);var c={};a=a||[null,f({}),f([]),f(f)];for(var t=2&b&&e;"object"==typeof t&&!~a.indexOf(t);t=f(t))Object.getOwnPropertyNames(t).forEach((a=>c[a]=()=>e[a]));return c.default=()=>e,r.d(d,c),d},r.d=(e,a)=>{for(var f in a)r.o(a,f)&&!r.o(e,f)&&Object.defineProperty(e,f,{enumerable:!0,get:a[f]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((a,f)=>(r.f[f](e,a),a)),[])),r.u=e=>"assets/js/"+({65:"3926085a",127:"1993cab4",291:"0e068da8",339:"6fdb0820",468:"40a5438f",471:"4ab1c6f7",501:"b985990b",574:"890e518c",607:"4761cca9",621:"25e70bc2",640:"8ca4d6e2",656:"72c84e71",721:"bb3f9e72",727:"f813a603",878:"c7369102",909:"3c80015b",910:"eb6aa549",952:"282cd1c8",970:"3229a8e9",1019:"720a56e4",1079:"e93a9b61",1130:"37dfae32",1131:"36f6f17a",1338:"99451683",1377:"615dbb02",1477:"2df98331",1543:"d73ecefa",1599:"cec855a0",1691:"806c182d",1703:"79c03ce4",1716:"523d8f7d",1805:"c445085e",1864:"d94865d5",1921:"59b56f69",1924:"8bd24425",1975:"0d56dc46",1979:"a173baa0",1991:"b2b675dd",2094:"861700e4",2109:"bd0ed3e1",2173:"2519c48f",2176:"b3ad03cc",2206:"940891e7",2257:"f3a15648",2258:"bdf7c199",2442:"d38380b3",2445:"74ab7bc8",2470:"b146e155",2644:"1fb147df",2676:"e704f625",2711:"9e4087bc",2728:"aa1d233f",3030:"30f8bb4f",3094:"d8ad77ec",3096:"439d3734",3186:"68b0dfa4",3187:"4688d424",3214:"08b4cf62",3249:"ccc49370",3290:"b12698d1",3437:"d31ecc05",3568:"7d628d86",3719:"4548c361",3787:"9e4c2aa5",3860:"b039747e",3909:"8e0f4841",4002:"452bbf79",4070:"34b0601c",4116:"fd759d37",4222:"48ac1a5d",4282:"4f24d1ff",4285:"62537946",4347:"9bab1bd5",4354:"2adc0ba4",4358:"ffcdbdee",4402:"0928b497",4414:"f1f43052",4467:"02e13650",4478:"98e459ed",4496:"a3c4aff9",4497:"4071a8ab",4576:"f1c3ede6",4583:"1df93b7f",4617:"ae7616e5",4809:"c190410d",4813:"6875c492",4827:"cce51cf2",4828:"1680c68e",4845:"a7cac7dd",4936:"4e6ea248",4946:"a701e9b4",4970:"c9adbec0",4997:"a8f0412d",5213:"c9eb5c8c",5302:"a24beb83",5345:"2d92dfb9",5361:"65cc9109",5380:"c5b47efc",5391:"7c0fea77",5428:"8db1271b",5439:"4d658fd1",5440:"834fe8c4",5457:"ff6b4ecf",5578:"a8e6f3e5",5739:"69205f08",5756:"560c153f",5767:"8eb4e46b",5801:"2836b1c5",5845:"e6b868b1",5894:"b2f554cd",5993:"6b1d1fec",5996:"45d50612",6103:"fc3deafd",6204:"2240d8ed",6244:"63152ce2",6260:"5dc20450",6385:"42af9969",6390:"278479fb",6487:"f8de77c0",6640:"b2014bd4",6722:"74ae6181",6739:"fde865fd",6820:"0fb5280d",6837:"232c92ba",6924:"1e7a46a0",6940:"16329161",7054:"59025a76",7065:"3ba35b71",7097:"cf3d20aa",7098:"a7bd4aaa",7175:"f6f9690d",7199:"f21d102b",7224:"91ac000a",7261:"6fd34e84",7266:"094a728b",7358:"be4406bf",7381:"a926bf88",7472:"814f3328",7520:"f6ba3702",7580:"97c52b50",7604:"87b14ec7",7630:"e566aea2",7643:"a6aa9e1f",7650:"ad4ab9ff",7802:"c5d15731",7838:"ae69f024",7900:"a5d7f2f4",8013:"4a2980b2",8059:"9cf8b934",8209:"01a85c17",8305:"396effda",8382:"d41fa627",8387:"8ee64c0c",8401:"17896441",8411:"c5903cab",8457:"a06ffc17",8468:"b6692631",8530:"2e0973e2",8540:"b44e231f",8543:"54a1f05d",8581:"935f2afb",8630:"24926dff",8718:"ae41b95b",8772:"82eef687",8790:"92999a1c",8806:"76bc59ef",8821:"892f03bb",8827:"90c66bca",8840:"324f91f6",9048:"a94703ab",9166:"90d4f0a7",9267:"a7023ddc",9317:"0228dac2",9324:"e32cc564",9328:"36cb36bb",9347:"d7c1c49e",9363:"09cb7d6a",9384:"13e421cf",9424:"b1f9f584",9451:"f3233157",9607:"b6982b7f",9611:"0ab8b207",9623:"3ed9a774",9647:"5e95c892",9741:"20f81dd5",9888:"a8fa71a4",9890:"0b4df4b7",9984:"97b22f94"}[e]||e)+"."+{65:"cd8b6055",127:"3a9ef318",291:"d5641d1d",339:"d42af4f1",468:"cb3b4242",471:"1f64822e",501:"524e58e1",574:"a4a0b2bd",607:"191ec598",621:"63a8f50d",640:"af8dbf98",656:"1723715b",721:"f27c6f46",727:"4b9cd53e",878:"956e30ef",909:"a90a7aec",910:"a8e7640e",952:"9c43247e",970:"5431314d",1019:"1b167f27",1079:"45e3c0f2",1130:"6803d05b",1131:"a01b7f91",1135:"5688adaf",1338:"3656d2dd",1377:"648ea9db",1477:"d132f837",1543:"c1ba3faf",1599:"6bc81a97",1691:"0a4254eb",1703:"445e72c6",1716:"4d5b97fb",1805:"c9b4383e",1864:"ee525089",1921:"c497a76d",1924:"91e9f4e0",1975:"10ed284e",1979:"af43b3f6",1991:"56965564",2094:"33c113c1",2109:"55f0eabc",2173:"689cb8cb",2176:"69118c59",2206:"95957777",2237:"75d0c357",2257:"d941efb8",2258:"5d9bb219",2442:"30d2b1ab",2445:"2227918b",2470:"8ab06890",2644:"90914a6b",2676:"913e7e1e",2711:"abea12d8",2728:"41ec0a46",2778:"459d8f64",3030:"31f6141a",3094:"1e818bb6",3096:"65cc6bd7",3186:"c50d1d04",3187:"a6ffa19c",3214:"f3bcb01d",3249:"c436d061",3290:"b956148d",3437:"647f7d43",3568:"b7594de7",3719:"ea219df7",3787:"79e29112",3860:"2978860f",3909:"6903e838",4002:"15fd7276",4070:"c974fc8c",4116:"76d547f0",4222:"bba114da",4282:"1bb6a92f",4285:"9ccb9872",4347:"b77a4819",4354:"3e73a865",4358:"185da90f",4402:"2b9c6a7c",4414:"f6495633",4467:"51aaa417",4478:"1ac289d7",4496:"818fe429",4497:"e6c0ffb1",4576:"83e373ba",4583:"642e7e4a",4617:"1884c3a5",4809:"491f1fce",4813:"c9388d9f",4827:"cd88487a",4828:"9c912728",4845:"eff847ea",4936:"808789e2",4946:"8cb27ac3",4970:"28925304",4997:"d5710eaa",5213:"a33d4f7b",5302:"39c5171b",5345:"f3f9e81d",5361:"93d0d593",5380:"a6d64a5f",5391:"e9aba91a",5428:"010b1de8",5439:"c4687111",5440:"f388d996",5457:"c54ec846",5578:"e3b0f9a6",5739:"56ecc9eb",5756:"41ded959",5767:"b9596d69",5801:"d99fa637",5845:"436010fd",5894:"e8f027dd",5993:"f368363f",5996:"af60c555",6103:"a34db9bb",6204:"e998637c",6244:"9c4cecc6",6260:"b8c1b529",6385:"c0f78fc2",6390:"ffc6135e",6487:"89a04e95",6640:"7d3067ef",6722:"4dd5a6b4",6739:"6a43a6f2",6820:"38e18c4c",6837:"49430e0c",6924:"7222bda8",6940:"48a27581",7054:"30e28783",7065:"4977536c",7097:"f0b1ae95",7098:"bd449a7d",7175:"41be760f",7199:"197997a1",7224:"301fb2b3",7261:"8ffa146d",7266:"d135f2b5",7358:"3ae2bd0b",7381:"5d511376",7472:"8918e1a3",7520:"a45cd4cc",7580:"892c1d9c",7604:"f75699cf",7630:"73da8de5",7643:"19e21a39",7650:"5cd78671",7802:"55b8c9f5",7838:"9d276fec",7900:"fbc15c8f",8013:"5aad93b8",8059:"42e17fe6",8209:"de3cf12b",8305:"6560e8a3",8382:"73994109",8387:"72e4c6f2",8401:"d4130c19",8411:"0416d517",8457:"d4f7d6dd",8468:"3bedc53e",8530:"f3dd583a",8540:"b4a2a703",8543:"5780d3dc",8581:"042a3d1c",8630:"ef8131f6",8706:"959c499a",8718:"705b088b",8772:"6ce77a14",8790:"467f49e1",8806:"c73bf180",8821:"eb814a1d",8827:"3813ed39",8840:"8ad64d0e",9048:"bfd106c6",9166:"ca220213",9267:"1b0dc8ef",9317:"eb9a1920",9324:"dac5d5b3",9328:"8595c7a8",9347:"4d4ec702",9363:"f6224dcc",9384:"6d9fb24e",9424:"5cae9820",9451:"669fad86",9607:"3c3d2d54",9611:"d28c208b",9623:"4c3ef134",9647:"25919f8a",9741:"85809e97",9888:"86997124",9890:"3d496f0f",9984:"442a9d5f"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,a)=>Object.prototype.hasOwnProperty.call(e,a),b={},d="formal-land:",r.l=(e,a,f,c)=>{if(b[e])b[e].push(a);else{var t,o;if(void 0!==f)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var d=b[e];if(delete b[e],t.parentNode&&t.parentNode.removeChild(t),d&&d.forEach((e=>e(f))),a)return a(f)},s=setTimeout(u.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=u.bind(null,t.onerror),t.onload=u.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/",r.gca=function(e){return e={16329161:"6940",17896441:"8401",62537946:"4285",99451683:"1338","3926085a":"65","1993cab4":"127","0e068da8":"291","6fdb0820":"339","40a5438f":"468","4ab1c6f7":"471",b985990b:"501","890e518c":"574","4761cca9":"607","25e70bc2":"621","8ca4d6e2":"640","72c84e71":"656",bb3f9e72:"721",f813a603:"727",c7369102:"878","3c80015b":"909",eb6aa549:"910","282cd1c8":"952","3229a8e9":"970","720a56e4":"1019",e93a9b61:"1079","37dfae32":"1130","36f6f17a":"1131","615dbb02":"1377","2df98331":"1477",d73ecefa:"1543",cec855a0:"1599","806c182d":"1691","79c03ce4":"1703","523d8f7d":"1716",c445085e:"1805",d94865d5:"1864","59b56f69":"1921","8bd24425":"1924","0d56dc46":"1975",a173baa0:"1979",b2b675dd:"1991","861700e4":"2094",bd0ed3e1:"2109","2519c48f":"2173",b3ad03cc:"2176","940891e7":"2206",f3a15648:"2257",bdf7c199:"2258",d38380b3:"2442","74ab7bc8":"2445",b146e155:"2470","1fb147df":"2644",e704f625:"2676","9e4087bc":"2711",aa1d233f:"2728","30f8bb4f":"3030",d8ad77ec:"3094","439d3734":"3096","68b0dfa4":"3186","4688d424":"3187","08b4cf62":"3214",ccc49370:"3249",b12698d1:"3290",d31ecc05:"3437","7d628d86":"3568","4548c361":"3719","9e4c2aa5":"3787",b039747e:"3860","8e0f4841":"3909","452bbf79":"4002","34b0601c":"4070",fd759d37:"4116","48ac1a5d":"4222","4f24d1ff":"4282","9bab1bd5":"4347","2adc0ba4":"4354",ffcdbdee:"4358","0928b497":"4402",f1f43052:"4414","02e13650":"4467","98e459ed":"4478",a3c4aff9:"4496","4071a8ab":"4497",f1c3ede6:"4576","1df93b7f":"4583",ae7616e5:"4617",c190410d:"4809","6875c492":"4813",cce51cf2:"4827","1680c68e":"4828",a7cac7dd:"4845","4e6ea248":"4936",a701e9b4:"4946",c9adbec0:"4970",a8f0412d:"4997",c9eb5c8c:"5213",a24beb83:"5302","2d92dfb9":"5345","65cc9109":"5361",c5b47efc:"5380","7c0fea77":"5391","8db1271b":"5428","4d658fd1":"5439","834fe8c4":"5440",ff6b4ecf:"5457",a8e6f3e5:"5578","69205f08":"5739","560c153f":"5756","8eb4e46b":"5767","2836b1c5":"5801",e6b868b1:"5845",b2f554cd:"5894","6b1d1fec":"5993","45d50612":"5996",fc3deafd:"6103","2240d8ed":"6204","63152ce2":"6244","5dc20450":"6260","42af9969":"6385","278479fb":"6390",f8de77c0:"6487",b2014bd4:"6640","74ae6181":"6722",fde865fd:"6739","0fb5280d":"6820","232c92ba":"6837","1e7a46a0":"6924","59025a76":"7054","3ba35b71":"7065",cf3d20aa:"7097",a7bd4aaa:"7098",f6f9690d:"7175",f21d102b:"7199","91ac000a":"7224","6fd34e84":"7261","094a728b":"7266",be4406bf:"7358",a926bf88:"7381","814f3328":"7472",f6ba3702:"7520","97c52b50":"7580","87b14ec7":"7604",e566aea2:"7630",a6aa9e1f:"7643",ad4ab9ff:"7650",c5d15731:"7802",ae69f024:"7838",a5d7f2f4:"7900","4a2980b2":"8013","9cf8b934":"8059","01a85c17":"8209","396effda":"8305",d41fa627:"8382","8ee64c0c":"8387",c5903cab:"8411",a06ffc17:"8457",b6692631:"8468","2e0973e2":"8530",b44e231f:"8540","54a1f05d":"8543","935f2afb":"8581","24926dff":"8630",ae41b95b:"8718","82eef687":"8772","92999a1c":"8790","76bc59ef":"8806","892f03bb":"8821","90c66bca":"8827","324f91f6":"8840",a94703ab:"9048","90d4f0a7":"9166",a7023ddc:"9267","0228dac2":"9317",e32cc564:"9324","36cb36bb":"9328",d7c1c49e:"9347","09cb7d6a":"9363","13e421cf":"9384",b1f9f584:"9424",f3233157:"9451",b6982b7f:"9607","0ab8b207":"9611","3ed9a774":"9623","5e95c892":"9647","20f81dd5":"9741",a8fa71a4:"9888","0b4df4b7":"9890","97b22f94":"9984"}[e]||e,r.p+r.u(e)},(()=>{var e={5354:0,1869:0};r.f.j=(a,f)=>{var b=r.o(e,a)?e[a]:void 0;if(0!==b)if(b)f.push(b[2]);else if(/^(1869|5354)$/.test(a))e[a]=0;else{var d=new Promise(((f,d)=>b=e[a]=[f,d]));f.push(b[2]=d);var c=r.p+r.u(a),t=new Error;r.l(c,(f=>{if(r.o(e,a)&&(0!==(b=e[a])&&(e[a]=void 0),b)){var d=f&&("load"===f.type?"missing":f.type),c=f&&f.target&&f.target.src;t.message="Loading chunk "+a+" failed.\n("+d+": "+c+")",t.name="ChunkLoadError",t.type=d,t.request=c,b[1](t)}}),"chunk-"+a,a)}},r.O.j=a=>0===e[a];var a=(a,f)=>{var b,d,c=f[0],t=f[1],o=f[2],n=0;if(c.some((a=>0!==e[a]))){for(b in t)r.o(t,b)&&(r.m[b]=t[b]);if(o)var i=o(r)}for(a&&a(f);n - + diff --git a/blog/2021/10/10/welcome.html b/blog/2021/10/10/welcome.html index 3c2e95bb..0dd4a8ce 100644 --- a/blog/2021/10/10/welcome.html +++ b/blog/2021/10/10/welcome.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2021/10/27/verification-data-encoding.html b/blog/2021/10/27/verification-data-encoding.html index 7c4fdb2c..c0cb3979 100644 --- a/blog/2021/10/27/verification-data-encoding.html +++ b/blog/2021/10/27/verification-data-encoding.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2021/11/12/new-blog-posts-and-meetup-talk.html b/blog/2021/11/12/new-blog-posts-and-meetup-talk.html index 30d9bf94..bbe8f0a9 100644 --- a/blog/2021/11/12/new-blog-posts-and-meetup-talk.html +++ b/blog/2021/11/12/new-blog-posts-and-meetup-talk.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2022/02/02/make-tezos-a-formally-verified-crypto.html b/blog/2022/02/02/make-tezos-a-formally-verified-crypto.html index df5511d1..789c36c3 100644 --- a/blog/2022/02/02/make-tezos-a-formally-verified-crypto.html +++ b/blog/2022/02/02/make-tezos-a-formally-verified-crypto.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2022/06/15/status update-tezos.html b/blog/2022/06/15/status update-tezos.html index 487c0f9d..b47e60ad 100644 --- a/blog/2022/06/15/status update-tezos.html +++ b/blog/2022/06/15/status update-tezos.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2022/06/23/upgrade-coq-of-ocaml-4.14.html b/blog/2022/06/23/upgrade-coq-of-ocaml-4.14.html index eb5661a9..35fbf37c 100644 --- a/blog/2022/06/23/upgrade-coq-of-ocaml-4.14.html +++ b/blog/2022/06/23/upgrade-coq-of-ocaml-4.14.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2022/12/13/latest-blog-posts-on-tezos.html b/blog/2022/12/13/latest-blog-posts-on-tezos.html index b4c8ff79..79758998 100644 --- a/blog/2022/12/13/latest-blog-posts-on-tezos.html +++ b/blog/2022/12/13/latest-blog-posts-on-tezos.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/01/24/current-verification-efforts.html b/blog/2023/01/24/current-verification-efforts.html index 72bbf4e9..401cc771 100644 --- a/blog/2023/01/24/current-verification-efforts.html +++ b/blog/2023/01/24/current-verification-efforts.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/04/26/representation-of-rust-methods-in-coq.html b/blog/2023/04/26/representation-of-rust-methods-in-coq.html index 8363d15b..6401623d 100644 --- a/blog/2023/04/26/representation-of-rust-methods-in-coq.html +++ b/blog/2023/04/26/representation-of-rust-methods-in-coq.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/05/28/monad-for-side-effects-in-rust.html b/blog/2023/05/28/monad-for-side-effects-in-rust.html index eec6c0d7..100682bb 100644 --- a/blog/2023/05/28/monad-for-side-effects-in-rust.html +++ b/blog/2023/05/28/monad-for-side-effects-in-rust.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/08/25/trait-representation-in-coq.html b/blog/2023/08/25/trait-representation-in-coq.html index 9d6a02fa..c39dfb01 100644 --- a/blog/2023/08/25/trait-representation-in-coq.html +++ b/blog/2023/08/25/trait-representation-in-coq.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/11/08/rust-thir-and-bundled-traits.html b/blog/2023/11/08/rust-thir-and-bundled-traits.html index c2c2451e..ba5dc44d 100644 --- a/blog/2023/11/08/rust-thir-and-bundled-traits.html +++ b/blog/2023/11/08/rust-thir-and-bundled-traits.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/11/26/rust-function-body.html b/blog/2023/11/26/rust-function-body.html index 4996c5d6..c8a13eb6 100644 --- a/blog/2023/11/26/rust-function-body.html +++ b/blog/2023/11/26/rust-function-body.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2023/12/13/rust-verify-erc-20-smart-contract.html b/blog/2023/12/13/rust-verify-erc-20-smart-contract.html index a68cc4b7..2e88b3d1 100644 --- a/blog/2023/12/13/rust-verify-erc-20-smart-contract.html +++ b/blog/2023/12/13/rust-verify-erc-20-smart-contract.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/01/04/rust-translating-match.html b/blog/2024/01/04/rust-translating-match.html index 9df9e073..15b02d87 100644 --- a/blog/2024/01/04/rust-translating-match.html +++ b/blog/2024/01/04/rust-translating-match.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/01/18/update-coq-of-rust.html b/blog/2024/01/18/update-coq-of-rust.html index e498c9a7..b0023882 100644 --- a/blog/2024/01/18/update-coq-of-rust.html +++ b/blog/2024/01/18/update-coq-of-rust.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/02/02/formal-verification-for-aleph-zero.html b/blog/2024/02/02/formal-verification-for-aleph-zero.html index 5704e366..cc0c2bc5 100644 --- a/blog/2024/02/02/formal-verification-for-aleph-zero.html +++ b/blog/2024/02/02/formal-verification-for-aleph-zero.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/02/14/experiment-coq-of-hs.html b/blog/2024/02/14/experiment-coq-of-hs.html index e8193d27..a75460c3 100644 --- a/blog/2024/02/14/experiment-coq-of-hs.html +++ b/blog/2024/02/14/experiment-coq-of-hs.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/02/22/journey-coq-of-go.html b/blog/2024/02/22/journey-coq-of-go.html index 833ac342..32a42ab4 100644 --- a/blog/2024/02/22/journey-coq-of-go.html +++ b/blog/2024/02/22/journey-coq-of-go.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/02/29/improvements-rust-translation.html b/blog/2024/02/29/improvements-rust-translation.html index 410ebd46..49c11151 100644 --- a/blog/2024/02/29/improvements-rust-translation.html +++ b/blog/2024/02/29/improvements-rust-translation.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/03/08/improvements-rust-translation-part-2.html b/blog/2024/03/08/improvements-rust-translation-part-2.html index 8391a08d..db6665f7 100644 --- a/blog/2024/03/08/improvements-rust-translation-part-2.html +++ b/blog/2024/03/08/improvements-rust-translation-part-2.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/03/22/improvements-rust-translation-part-3.html b/blog/2024/03/22/improvements-rust-translation-part-3.html index 0886acde..01a039a9 100644 --- a/blog/2024/03/22/improvements-rust-translation-part-3.html +++ b/blog/2024/03/22/improvements-rust-translation-part-3.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/04/03/monadic-notation-for-rust-translation.html b/blog/2024/04/03/monadic-notation-for-rust-translation.html index d111ac39..334aebbc 100644 --- a/blog/2024/04/03/monadic-notation-for-rust-translation.html +++ b/blog/2024/04/03/monadic-notation-for-rust-translation.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/04/26/translation-core-alloc-crates.html b/blog/2024/04/26/translation-core-alloc-crates.html index 190eebdf..23c760c7 100644 --- a/blog/2024/04/26/translation-core-alloc-crates.html +++ b/blog/2024/04/26/translation-core-alloc-crates.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/05/10/translation-of-python-code.html b/blog/2024/05/10/translation-of-python-code.html index 8dae2052..63cac2ed 100644 --- a/blog/2024/05/10/translation-of-python-code.html +++ b/blog/2024/05/10/translation-of-python-code.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/05/14/translation-of-python-code-simulations.html b/blog/2024/05/14/translation-of-python-code-simulations.html index d5f69305..27a6764d 100644 --- a/blog/2024/05/14/translation-of-python-code-simulations.html +++ b/blog/2024/05/14/translation-of-python-code-simulations.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/05/22/translation-of-python-code-simulations-from-trace.html b/blog/2024/05/22/translation-of-python-code-simulations-from-trace.html index db4a1045..ec74e512 100644 --- a/blog/2024/05/22/translation-of-python-code-simulations-from-trace.html +++ b/blog/2024/05/22/translation-of-python-code-simulations-from-trace.html @@ -13,7 +13,7 @@ - + diff --git a/blog/2024/06/05/formal-verification-for-software-correctness.html b/blog/2024/06/05/formal-verification-for-software-correctness.html index bcfc1886..d156ee9e 100644 --- a/blog/2024/06/05/formal-verification-for-software-correctness.html +++ b/blog/2024/06/05/formal-verification-for-software-correctness.html @@ -13,7 +13,7 @@ - + @@ -89,7 +89,7 @@

Comp
For more

If you want to go into more details for the formal verification of Python programs, you can look at our coq-of-python project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for Rust and other languages like OCaml. Contact us at contact@formal.land to discuss!

Conclusion

We have presented here the idea of formal verification, a technique to verify the absence of bugs in a program by reasoning from first principles. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct for all possible lists of integers.

-

We will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!