Skip to content

Commit

Permalink
all: add '#pragma target' directive (#10)
Browse files Browse the repository at this point in the history
Here I add support for a new directive #pragma target for configuring the target
instruction set. Opcode names are resolved against the instruction set, and choosing
a target also configures use of PUSH0.
  • Loading branch information
fjl authored Nov 23, 2024
1 parent e5be52b commit 681ba22
Show file tree
Hide file tree
Showing 22 changed files with 1,135 additions and 649 deletions.
45 changes: 44 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,50 @@ main.eas:
push 2
%StoreSum ;; calling global macro defined in lib.evm

### Configuring the target instruction set

The EVM is a changing environment. Opcodes may be added (and sometimes removed) as new
versions of the EVM are released in protocol forks. Geas is aware of EVM forks and their
respective instruction sets.

Geas always operates on a specific EVM instruction set. It targets the latest known eth
mainnet fork by default, i.e. all opcodes available in that fork can be used, and opcodes
that have been removed in any prior fork cannot.

Use the `#pragma target` directive to change the target instruction set. The basic syntax is

#pragma target "name"

where `name` is a lower-case execution-layer fork name like `homestead`, `berlin`, or `prague`.

Here is an example. This contract uses the CHAINID instruction to check if it is running
on mainnet, and destroys itself otherwise. CHAINID became available in the "istanbul"
fork, and SELFDESTRUCT was removed in a later revision of the EVM, so this program is only
applicable to a certain range of past EVM versions.

#pragma target "berlin"

chainid ; [id]
push 1 ; [1, id]
eq ; [id = 1]
jumpi @mainnet ; []
push 0x0 ; [zeroaddr]
selfdestruct ; []
mainnet:

Note that declaring the target instruction set using `#pragma target` will not prevent the
output bytecode from running on a different EVM version, since it is just a compiler
setting. The example program above will start behaving differently from its intended
version on EVM version "cancun", because SELFDESTRUCT was turned into SENDALL in that
fork. It may even stop working entirely in a later fork.

`#pragma target` can only appear in the program once. It cannot be placed in an include
file. You have to put the directive in the main program file.

### #assemble

When writing contract constructors and advanced CALL scenarios, it can be necessary to
include subprogram bytecode as-is. The `#assemble` directive can do this for you.
include subprogram bytecode as-is. The `#assemble` directive does this for you.

Using `#assemble` runs the assembler on the specified file, and includes the resulting
bytecode into the current program. Labels of the subprogram will start at offset zero.
Expand All @@ -261,5 +301,8 @@ Unlike with `#include`, global definitions of the subprogram are not imported.
#assemble "subprogram.eas"
.end

If a target instruction set is configured with `#pragma target`, it will also be used for
assembling the subprogram. However, the subprogram file can override the instruction set
using its own `#pragma target` directive.

[^1]: Under no circumstances must it be called the geth assembler.
118 changes: 83 additions & 35 deletions asm/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"strings"

"github.com/fjl/geas/internal/ast"
"github.com/fjl/geas/internal/evm"
)

// Compiler performs the assembling.
Expand All @@ -35,7 +36,7 @@ type Compiler struct {
lexDebug bool
maxIncDepth int
maxErrors int
usePush0 bool
defaultFork string

globals *globalScope
errors []error
Expand All @@ -51,7 +52,7 @@ func NewCompiler(fsys fs.FS) *Compiler {
includes: make(map[*ast.IncludeSt]*ast.Document),
maxIncDepth: 128,
maxErrors: 10,
usePush0: true,
defaultFork: evm.LatestFork,
}
}

Expand All @@ -60,10 +61,9 @@ func (c *Compiler) SetDebugLexer(on bool) {
c.lexDebug = on
}

// SetUsePush0 enables/disables use of the PUSH0 instruction.
// It's on by default.
func (c *Compiler) SetUsePush0(on bool) {
c.usePush0 = on
// SetDefaultFork sets the EVM instruction set used by default.
func (c *Compiler) SetDefaultFork(f string) {
c.defaultFork = f
}

// SetDebugLexer enables/disables printing of the token stream to stdout.
Expand Down Expand Up @@ -135,22 +135,26 @@ func (c *Compiler) addErrors(errs []error) {

// compile is the toplevel entry point into the compiler.
func (c *Compiler) compile(doc *ast.Document) (output []byte) {
prevGlobals := c.globals
c.globals = newGlobalScope()
defer func() { c.globals = prevGlobals }()

defer func() {
panicking := recover()
if panicking != nil && panicking != errCancelCompilation {
panic(panicking)
}
}()

c.globals = newGlobalScope()
prog := newCompilerProg(doc)

// First, load all #include files and register their definitions.
c.processIncludes(doc, nil)
// This also configures the instruction set if specified by a #pragma.
c.processIncludes(doc, prog, nil)

// Choose latest eth mainnet instruction set if not configured.
if prog.evm == nil {
prog.evm = evm.FindInstructionSet(c.defaultFork)
}

// Next, the AST document tree is expanded into a flat list of instructions.
prog := newCompilerProg(doc)
c.expand(doc, prog)
if prog.cur != prog.toplevel {
panic("section stack was not unwound by expansion")
Expand Down Expand Up @@ -184,38 +188,53 @@ func (c *Compiler) compile(doc *ast.Document) (output []byte) {
}

// processIncludes reads all #included documents.
func (c *Compiler) processIncludes(doc *ast.Document, stack []ast.Statement) {
func (c *Compiler) processIncludes(doc *ast.Document, prog *compilerProg, stack []ast.Statement) {
errs := c.globals.registerDefinitions(doc)
c.addErrors(errs)

var list []*ast.IncludeSt
for _, inst := range doc.Statements {
inc, ok := inst.(*ast.IncludeSt)
if !ok {
continue
}
file, err := resolveRelative(doc.File, inc.Filename)
if err != nil {
c.addError(inst, err)
continue
}
incdoc := c.parseIncludeFile(file, inc, len(stack)+1)
if incdoc == nil {
continue // there were parse errors
for _, st := range doc.Statements {
switch st := st.(type) {
case *ast.IncludeSt:
file, err := resolveRelative(doc.File, st.Filename)
if err != nil {
c.addError(st, err)
continue
}
incdoc := c.parseIncludeFile(file, st, len(stack)+1)
if incdoc != nil {
c.includes[st] = incdoc
list = append(list, st)
}

case *ast.PragmaSt:
switch st.Option {
case "target":
if len(stack) != 0 {
c.addError(st, ecPragmaTargetInIncludeFile)
}
if prog.evm != nil {
c.addError(st, ecPragmaTargetConflict)
}
prog.evm = evm.FindInstructionSet(st.Value)
if prog.evm == nil {
c.addError(st, fmt.Errorf("%w %q", ecPragmaTargetUnknown, st.Value))
}
default:
c.addError(st, fmt.Errorf("%w %s", ecUnknownPragma, st.Option))
}
}
c.includes[inc] = incdoc
list = append(list, inc)
}

// Process includes in macros.
for _, m := range doc.InstrMacros() {
c.processIncludes(m.Body, append(stack, m))
c.processIncludes(m.Body, prog, append(stack, m))
}

// Recurse.
for _, inst := range list {
incdoc := c.includes[inst]
c.processIncludes(incdoc, append(stack, inst))
c.processIncludes(incdoc, prog, append(stack, inst))
}
}

Expand Down Expand Up @@ -264,19 +283,48 @@ func (c *Compiler) generateOutput(prog *compilerProg) []byte {
if len(c.errors) > 0 {
return nil
}

var output []byte
for _, inst := range prog.iterInstructions() {
if len(output) != inst.pc {
panic(fmt.Sprintf("BUG: instruction pc=%d, but output has size %d", inst.pc, len(output)))
}
if inst.op != "" {
opcode, ok := inst.opcode()
if !ok {

switch {
case isPush(inst.op):
if inst.pushSize > 32 {
panic("BUG: pushSize > 32")
}
if len(inst.data) > inst.pushSize {
panic(fmt.Sprintf("BUG: push inst.data %d > inst.pushSize %d", len(inst.data), inst.pushSize))
}

// resolve the op
var op *evm.Op
if inst.op == "PUSH" {
op = prog.evm.PushBySize(inst.pushSize)
} else {
op = prog.evm.OpByName(inst.op)
}
if op == nil {
panic(fmt.Sprintf("BUG: opcode for %q (size %d) not found", inst.op, inst.pushSize))
}

// Add opcode and data padding to output.
output = append(output, op.Code)
if len(inst.data) < inst.pushSize {
output = append(output, make([]byte, inst.pushSize-len(inst.data))...)
}

case inst.op != "":
op := prog.evm.OpByName(inst.op)
if op == nil {
c.addError(inst.ast, fmt.Errorf("%w %s", ecUnknownOpcode, inst.op))
continue
}
output = append(output, byte(opcode))
output = append(output, op.Code)
}

// Instruction data is always added to output.
output = append(output, inst.data...)
}
return output
Expand Down
24 changes: 10 additions & 14 deletions asm/compiler_eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (c *Compiler) assignInitialPushSizes(e *evaluator, prog *compilerProg) {
c.addError(inst.ast, err)
continue
}
if err := c.assignPushArg(inst, v, true); err != nil {
if err := prog.assignPushArg(inst, v, true); err != nil {
c.addError(inst.ast, err)
continue
}
Expand Down Expand Up @@ -91,7 +91,7 @@ func (c *Compiler) assignArgs(e *evaluator, prog *compilerProg) (inst *instructi
if err != nil {
return inst, err
}
if err := c.assignPushArg(inst, v, false); err != nil {
if err := prog.assignPushArg(inst, v, false); err != nil {
return inst, err
}
}
Expand All @@ -103,7 +103,7 @@ func (c *Compiler) assignArgs(e *evaluator, prog *compilerProg) (inst *instructi
//
// If setSize is true, the pushSize of variable-size "PUSH" instructions will be assigned
// based on the value.
func (c *Compiler) assignPushArg(inst *instruction, v *big.Int, setSize bool) error {
func (prog *compilerProg) assignPushArg(inst *instruction, v *big.Int, setSize bool) error {
if v.Sign() < 0 {
return ecNegativeResult
}
Expand All @@ -115,7 +115,7 @@ func (c *Compiler) assignPushArg(inst *instruction, v *big.Int, setSize bool) er

_, hasExplicitSize := inst.explicitPushSize()
if setSize && !hasExplicitSize {
inst.pushSize = c.autoPushSize(b)
inst.pushSize = prog.autoPushSize(b)
}
if len(b) > inst.pushSize {
if !hasExplicitSize {
Expand All @@ -124,22 +124,18 @@ func (c *Compiler) assignPushArg(inst *instruction, v *big.Int, setSize bool) er
return ecFixedSizePushOverflow
}

// Store data padded.
inst.data = make([]byte, inst.pushSize)
copy(inst.data[len(inst.data)-len(b):], b)
// Store data. Note there is no padding applied here.
// Padding will be added at the bytecode output stage.
inst.data = b
return nil
}

func (c *Compiler) autoPushSize(value []byte) int {
func (prog *compilerProg) autoPushSize(value []byte) int {
if len(value) > 32 {
panic("value too big")
}
if len(value) == 0 {
if c.usePush0 {
return 0
} else {
return 1
}
if len(value) == 0 && !prog.evm.SupportsPush0() {
return 1
}
return len(value)
}
Loading

0 comments on commit 681ba22

Please sign in to comment.