Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 7c5c7f0
Author: Jacob Van Buren <[email protected]>
Date:   Wed Jan 22 15:24:10 2025 -0500

    Squashed commit of the following:

    commit a53391d
    Author: Max Slater <[email protected]>
    Date:   Wed Jan 22 13:53:02 2025 -0500

        Rename `atomic_cas`/`Compare_and_swap` (#3491)

    commit 92b327c
    Author: Max Slater <[email protected]>
    Date:   Tue Jan 21 14:18:56 2025 -0500

        Additional operations for int atomics (#3490)

    commit 38e792c
    Author: Luke Maurer <[email protected]>
    Date:   Tue Jan 21 17:09:23 2025 +0000

        Support `-open Foo` where `Foo` is parameterised (#3489)

        The command line

        ```
        ocamlopt -open Foo -parameter P -c bar.ml
        ```

        should be fine, even if `Foo` is itself parameterised by `P`: as usual, we
        compile `bar.ml` as if it began with `open! Foo`, and by the subset rule, `Bar`
        can refer to `Foo` because it takes at least the same parameters. Unfortunately,
        currently we process `-open` before `-parameter`, so when we go to check the
        implicit reference to `Foo`, we think there are no parameters, and we report an
        error. (Confusingly, the error suggests that the user add `-parameter P` to the
        command line.)

        The fix is simple: move the code that processes `-parameter` earlier so that
        the initial environment is constructed with the parameters already available.

    commit 784dc96
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 21 14:07:41 2025 +0000

        Rename [emit.mlp] to [emit.ml] on amd64 (#3488)

    commit f3b720a
    Author: Zesen Qian <[email protected]>
    Date:   Tue Jan 21 10:35:35 2025 +0000

        Module aliases save locks instead of walking them immediately (#3398)

    commit 389a7c3
    Author: Nick Barnes <[email protected]>
    Date:   Mon Jan 20 17:55:51 2025 +0000

        Add OCAMLRUNPARAM d= parameter for max # domains (#3487)

        Allow maximum number of domains to be specified as a OCAMLRUNPARAM parameter.

        (cherry picked from commit f92715f)

        Co-authored-by: KC Sivaramakrishnan <[email protected]>

    commit 63767d7
    Author: Nick Barnes <[email protected]>
    Date:   Mon Jan 20 17:45:30 2025 +0000

        Add caml_runtime_parameters back (#3468)

        Add caml_runtime_parameters back.

    commit 5e9975e
    Author: Greta Yorsh <[email protected]>
    Date:   Mon Jan 20 17:15:15 2025 +0000

        Emit atomic compare and exchange (#3486)

    commit a9821e8
    Author: Basile Clément <[email protected]>
    Date:   Mon Jan 20 15:36:09 2025 +0100

        Make patricia trees big-endian (#3438)

        This patch switches up the implementation of the `Patricia_tree` module
        from little-endian to big-endian, with the main motivation to be able to
        implement in-order traversal.

        The `caml_int_clz_tagged_to_untagged` and `caml_int_tagged_to_tagged` C
        stubs are recognized and replaced with the `clz` instruction when
        compiling with flambda2, so they are only used in the boot compiler.

    commit b8a9789
    Author: Leo White <[email protected]>
    Date:   Fri Jan 17 13:35:19 2025 +0000

        Generate specific instructions for atomics on immediates (#3477)

        * Generate specific instructions for atomics on immediates

        * Fix formatting

    commit 7b93134
    Author: Greta Yorsh <[email protected]>
    Date:   Fri Jan 17 09:30:30 2025 +0000

        Vectorizer: add tests (#3456)

        * Add tests

        * Disable ocamlformat on unboxed tests

        * Increase -vectorize-max-block-size for tests

        * Fix asssertion failure when vectorizing unboxed int32

        * Disable float32 on arm64 (not yet implemented)

        * improve gen_dune.ml for the vectorizer tests

        Co-authored-by: Xavier Clerc <[email protected]>

    commit 6379678
    Author: Mark Shinwell <[email protected]>
    Date:   Thu Jan 16 16:08:29 2025 +0000

        Add "-ocamlrunparam" linker flag (#3483)

    commit f7b2cbe
    Author: Xavier Clerc <[email protected]>
    Date:   Thu Jan 16 15:25:28 2025 +0000

        Bump the version of `actions/upload-artifact` (#3474)

        * Bump the version of actions/upload-artifact.

        * Ensure artifact names are unique.

        * To trigger CI.

        * Try with commit hash.

    commit afb8a55
    Author: Mark Shinwell <[email protected]>
    Date:   Thu Jan 16 14:03:37 2025 +0000

        Move two macOS CI controllers to runtime5 (#3482)

    commit aae5c40
    Author: Mark Shinwell <[email protected]>
    Date:   Thu Jan 16 13:50:34 2025 +0000

        Fix error in caml_get_init_stack_wsize (#3481)

    commit 525868c
    Author: dkalinichenko-js <[email protected]>
    Date:   Wed Jan 15 17:47:30 2025 -0500

        Use null pointers for `or_null`  (#3267)

        * runtime changes

        * runtime4 changes

        * Change `CODE_UNBOXED_INT64` and `CODE_NULL`

        * make `Is_block` an inline function

        * redefine `Is_long`

        * fix

        * Change `CODE_UNBOXED_INT64` back

        * optimize `Is_block`/`Is_long`

        * `null_tag` for `caml_obj_tag`

        * consistent naming

        * slightly more reassuring comment

        * `inline` is unnecessary and might break `#define inline`

        * optimization incorrect in presence of nulls

        * Constructors and pattern-matching

        * Bytecode compilation

        * `or_null` is `Variant_or_null`

        * Accept tests

        * Runtime tests

        * Delete obsolete or_null test

        ---------

        Co-authored-by: Diana Kalinichenko <[email protected]>

    commit 9796b21
    Author: dkalinichenko-js <[email protected]>
    Date:   Wed Jan 15 17:47:13 2025 -0500

        Runtime changes for `or_null` (#3265)

        * runtime changes

        * runtime4 changes

        * Change `CODE_UNBOXED_INT64` and `CODE_NULL`

        * make `Is_block` an inline function

        * redefine `Is_long`

        * fix

        * Change `CODE_UNBOXED_INT64` back

        * optimize `Is_block`/`Is_long`

        * `null_tag` for `caml_obj_tag`

        * consistent naming

        * slightly more reassuring comment

        * `inline` is unnecessary and might break `#define inline`

        * optimization incorrect in presence of nulls

        ---------

        Co-authored-by: Diana Kalinichenko <[email protected]>

    commit df4a6e0
    Author: Chris Casinghino <[email protected]>
    Date:   Wed Jan 15 13:08:53 2025 -0500

        Bump magic numbers for 5.2.0minus-5 (#3478)

    commit d1c8d85
    Author: Mark Shinwell <[email protected]>
    Date:   Wed Jan 15 16:44:39 2025 +0000

        Peek and poke (#3309)

    commit f8caad4
    Author: Greta Yorsh <[email protected]>
    Date:   Wed Jan 15 16:00:39 2025 +0000

        Vectorizer: xmm register can hold ocaml values (#3455)

        * Add [Valx2] to [Cmm.machtype_component]

        * Vectorizer generates [Valx2]

        * Record live offsets of [Valx2] in the frametable

        For runtime4, xmm register are below [gc_regs], use negative offsets.

        * Move [types_are_compatible] from [Reg] to [Proc]

        This information has to be in sync with register classes, stack
        slock classes, and emit for move instructions.

    commit 34a7873
    Author: Zesen Qian <[email protected]>
    Date:   Wed Jan 15 13:13:18 2025 +0000

        Improve coherence of modality zapping (#3462)

    commit 1a6a9d3
    Author: Mark Shinwell <[email protected]>
    Date:   Wed Jan 15 12:06:24 2025 +0000

        Fix caml_obj_with_tag (#3465)

    commit bc5110a
    Author: Nick Barnes <[email protected]>
    Date:   Wed Jan 15 11:52:25 2025 +0000

        Reset the pacing of major collection after any synchronous major GC (#3463)

        Reset the pacing of major collection at the end of any synchronous major collection.

    commit 9faf700
    Author: Mark Shinwell <[email protected]>
    Date:   Wed Jan 15 11:32:59 2025 +0000

        Remove remnants of caml_obj_truncate (#3469)

    commit ff9430b
    Author: Luke Maurer <[email protected]>
    Date:   Wed Jan 15 11:31:22 2025 +0000

        Mangle instance symbol names using `____` rather than `___` (#3472)

        Apparently there are libraries around that have names ending in single
        underscores, leading to ambiguous symbol names if we use triple
        underscores to delimit instances. Other choices are possible but this PR
        opts for newly-developed quadruple-underscore technology.

    commit 9984700
    Author: Vincent Laviron <[email protected]>
    Date:   Wed Jan 15 11:03:43 2025 +0100

        Port upstream PRs 11542 and 12505 to runtime4 (#3431)

        fix #11482: random crash in large closure allocation (#11542)

        Co-authored-by: Damien Doligez <[email protected]>

    commit 058c4db
    Author: Mark Shinwell <[email protected]>
    Date:   Tue Jan 14 22:19:32 2025 +0000

        Enable all makearray_dynamic tests on runtime4 (#3470)

    commit ba15ee5
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 20:08:31 2025 +0000

        Vectorize [Ifloatarithmem] (#3452)

        * Add [Isimd_mem] to [Arch.Specific] and emit [addpd] with memory arg

        and similar instructions

        * Vectorize [Ifloatarithmem]

        When the memory alignment is known to be 128-bit (currently, never) emits
        [addpd], otherwise emits a vector load followed by an arithmetic
        instruction.

    commit 9755b39
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 18:57:39 2025 +0000

        Fix CI failure (#3473)

    commit 859949c
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 17:56:53 2025 +0000

        Vectorize [Specific.Istore_int] (#3450)

        Used for array initialization (amd64)

    commit 50f73cb
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 17:38:39 2025 +0000

        Do not allow naked pointers (remove configure option) (#3448)

    commit b7c8ad3
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 17:07:39 2025 +0000

        Vectorizer refactor heuristic for select_and_join (#3449)

        * Refactor [Block.find_last_instruction], cache [Computation.last_pos]

        * Improve heuristics in [Computation.select_and_join] using [last_pos]

    commit 22f81d8
    Author: Mark Shinwell <[email protected]>
    Date:   Tue Jan 14 17:00:29 2025 +0000

        Fix mistake in conditional for makearray_dynamic array initialization (#3466)

    commit aaaddfb
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 14:57:17 2025 +0000

        Vectorizer: propagate alignment of memory accesses (#3451)

        Currently it's always 8 but having this argument will help us
        consider alignment for new vector sequences.

    commit b15d44e
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 14:55:29 2025 +0000

        vectorizer: improve debug printout (#3445)

    commit 6239156
    Author: Stephen Dolan <[email protected]>
    Date:   Tue Jan 14 13:43:21 2025 +0000

        Better hugepage alignment of stacks and heap (#3384)

        Co-authored-by: Mark Shinwell <[email protected]>

    commit 677d79a
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 13:23:56 2025 +0000

        Backend dune copy and directive (#3467)

        * Remove unused line directive from [dune]

        * Use [copy_files#] to copy files from ARCH and add a file directive

        * Remove existing file directives

    commit 314b131
    Author: Stephen Dolan <[email protected]>
    Date:   Tue Jan 14 11:58:41 2025 +0000

        Bound stack size in expect tests (#3439)

    commit 02774f8
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 10:52:50 2025 +0000

        all_deps is reflexive (#3464)

    commit 117a0a0
    Author: Stephen Dolan <[email protected]>
    Date:   Tue Jan 14 10:44:15 2025 +0000

        Stub implementation of new custom memory API (#3437)

    commit 4f30aac
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Jan 14 10:31:48 2025 +0000

        Vectorizer bug fix: address argument of memory operations (#3446)

        Fix bug: use address arg of the first instruction in a group

        ... not the last!

        Only matters for arrays at the moment, where the address offset
        argument is not always the same register.

    commit cc91e2b
    Author: Vincent Laviron <[email protected]>
    Date:   Tue Jan 14 11:26:59 2025 +0100

        caml_update_dummy: fail on closure blocks (#3429)

    commit 17a01a9
    Author: Mark Shinwell <[email protected]>
    Date:   Tue Jan 14 10:07:39 2025 +0000

        Implement %array_element_size_in_bytes (#3367)

        Co-authored-by: Chris Casinghino <[email protected]>

    commit b487f71
    Author: Greta Yorsh <[email protected]>
    Date:   Mon Jan 13 14:05:25 2025 +0000

        Runtime: make types explicit when reading [gc_regs] (#3453)

        Runtime4: make types explicit when reading [gc_regs].

    commit 67e6eb3
    Author: Max Slater <[email protected]>
    Date:   Fri Jan 10 16:17:32 2025 -0500

        More capsule API updates (#3440)

    commit c7f573f
    Author: Mark Shinwell <[email protected]>
    Date:   Fri Jan 10 18:26:15 2025 +0000

        Reinstate %makearray_dynamic (#3460)

    commit e1e4fb8
    Author: Zesen Qian <[email protected]>
    Date:   Fri Jan 10 16:15:48 2025 +0000

        `portable` lazy allows `nonportable` thunk (#3436)

        * portable lazy allows nonportable thunk

        * add documentation

        * improve documentation

        * add examples

        * improve comments in test

        * say "not stronger"

    commit c30ec74
    Author: Ryan Tjoa <[email protected]>
    Date:   Fri Jan 10 10:41:08 2025 -0500

        Check for type recursion without boxing (#3407)

    commit cb290c5
    Author: Greta Yorsh <[email protected]>
    Date:   Fri Jan 10 11:00:32 2025 +0000

        Vectorizer: rename New (#3454)

        Rename New to New_vec128 to make the type clear

        and distinguish it from the upcoming Valx2

    commit bd39e02
    Author: Greta Yorsh <[email protected]>
    Date:   Fri Jan 10 10:24:48 2025 +0000

        Add function [DLL.for_all_i] (#3442)

        * Add function [DLL.for_all_i]

        * Rename to [for_alli] to match existing [mapi] and [iteri]

        * Remove unused argument of [aux] in [DLL.for_all*]

    commit c048920
    Author: Greta Yorsh <[email protected]>
    Date:   Thu Jan 9 13:16:36 2025 +0000

        Cleanup machtype_component size (#3441)

        Cleanup size_component

    commit 830d5e7
    Author: Greta Yorsh <[email protected]>
    Date:   Thu Jan 9 13:15:59 2025 +0000

        Add "dump-vectorize" to OCAMLPARAM (#3443)

        Add [dump-vectorize] to OCAMLPARAM for debugging

    commit 157c95e
    Author: Greta Yorsh <[email protected]>
    Date:   Thu Jan 9 13:15:33 2025 +0000

        Vectorizer bug fix: 128-bit vectorized constant   (#3447)

        Fix bug: 128-bit vectorized constant high/low correctly ordered

    commit 648155d
    Author: Greta Yorsh <[email protected]>
    Date:   Thu Jan 9 13:09:48 2025 +0000

        Add [Printreg.reglist] for debugging (#3444)

    commit d40254f
    Author: Stephen Dolan <[email protected]>
    Date:   Tue Jan 7 21:25:45 2025 +0000

        Move two misplaced files (#3435)

    commit 4a0bb69
    Author: dkalinichenko-js <[email protected]>
    Date:   Tue Jan 7 15:34:27 2025 -0500

        `Yielding` mode axis (#3283)

        * `Yielding` mode axis

        * Tests

        * fix printing

        ---------

        Co-authored-by: Diana Kalinichenko <[email protected]>

    commit 00275e0
    Author: Max Slater <[email protected]>
    Date:   Mon Jan 6 13:05:58 2025 -0500

        Unbox_float32 should check custom ops name (#3433)

        check sym name

    commit 2e49469
    Author: Max Slater <[email protected]>
    Date:   Mon Jan 6 13:05:03 2025 -0500

        Make Capsule preserve wrapped exception backtraces (#3421)

        * with_password

        * portable

        * don't use polymorphic parameters

        * review

        * protect encapsulated from other capsule

        * raise wrapped exceptions with existing backtrace

        * cr

    commit 2de23a5
    Author: Ryan Tjoa <[email protected]>
    Date:   Mon Jan 6 04:04:29 2025 -0500

        Fix CI by using `setup-ocaml` v3 for ocamlformat workflow (#3426)

        [CI] Use setup-ocaml v3 for ocamlformat workflow

    commit eada0f1
    Author: Ryan Tjoa <[email protected]>
    Date:   Fri Jan 3 21:23:23 2025 -0500

        Move unboxed records to stable (#3419)

    commit a273a33
    Author: Jacob Van Buren <[email protected]>
    Date:   Fri Jan 3 11:17:18 2025 -0500

        Changed make fmt to run in parallel (#3422)

        changed make fmt to run in parallel

    commit 4de5a72
    Author: Max Slater <[email protected]>
    Date:   Thu Jan 2 20:10:08 2025 -0500

        Add `Capsule.with_password` (#3420)

    commit b084ff3
    Author: Greta Yorsh <[email protected]>
    Date:   Wed Jan 1 15:34:11 2025 +0000

        vectorizer: new test (#3418)

        Add test for register compatiblity

    commit 5549015
    Author: Greta Yorsh <[email protected]>
    Date:   Tue Dec 31 17:20:56 2024 +0000

        Vectorizer: check register compatibility (#3412)

        Check that registers are compatible when joining computations
jvanburen committed Jan 22, 2025
1 parent d0e8914 commit 16f1b23
Showing 325 changed files with 27,359 additions and 3,715 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -78,14 +78,14 @@ jobs:
config: --enable-middle-end=flambda2 --disable-warn-error
os: macos-latest

- name: flambda2_macos_arm64_irc
config: --enable-middle-end=flambda2 --disable-warn-error
- name: flambda2_macos_arm64_runtime5_irc
config: --enable-middle-end=flambda2 --enable-runtime5 --disable-warn-error
os: macos-latest
build_ocamlparam: '_,w=-46,regalloc=irc'
ocamlparam: '_,w=-46,regalloc=irc'

- name: flambda2_macos_arm64_ls
config: --enable-middle-end=flambda2 --disable-warn-error
- name: flambda2_macos_arm64_runtime5_ls
config: --enable-middle-end=flambda2 --enable-runtime5 --disable-warn-error
os: macos-latest
build_ocamlparam: '_,w=-46,regalloc=ls'
ocamlparam: '_,w=-46,regalloc=ls'
@@ -282,22 +282,22 @@ jobs:
run: |
PATH=$GITHUB_WORKSPACE/ocaml-414/_install/bin:$PATH make check_all_arches
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ failure() }} && matrix.os != 'macos-latest'
with:
name: cores
name: cores-${{ github.sha }}
path: /cores

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ failure() }} && matrix.os != 'macos-latest'
with:
name: _build
name: _build-${{ github.sha }}
path: $GITHUB_WORKSPACE/_build

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
if: ${{ failure() }} && matrix.os != 'macos-latest'
with:
name: _runtest
name: _runtest-${{ github.sha }}
path: $GITHUB_WORKSPACE/_runtest

concurrency:
4 changes: 2 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
@@ -89,8 +89,8 @@ jobs:
# BUILD_OCAMLPARAM: ${{ matrix.ocamlparam }}
#
# - name: Publish coverage report
# uses: actions/upload-artifact@v3
# uses: actions/upload-artifact@v4
# with:
# name: coverage
# name: coverage-${{ github.sha }}
# path: flambda_backend/_coverage/**
#
2 changes: 1 addition & 1 deletion .github/workflows/ocamlformat.yml
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ jobs:
path: 'flambda_backend'

- name: Setup OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v2
uses: ocaml/setup-ocaml@v3
with:
ocaml-compiler: ${{ matrix.ocaml-compiler }}

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -87,7 +87,7 @@ promote:

.PHONY: fmt
fmt:
ocamlformat -i $$(find . \( -name "*.ml" -or -name "*.mli" \))
find . \( -name "*.ml" -or -name "*.mli" \) | xargs -P $$(nproc 2>/dev/null || echo 1) -n 20 ocamlformat -i

.PHONY: check-fmt
check-fmt:
12 changes: 12 additions & 0 deletions asmcomp/asmlink.ml
Original file line number Diff line number Diff line change
@@ -349,6 +349,16 @@ let sourcefile_for_dwarf ~named_startup_file filename =
if named_startup_file then filename
else ".startup"

let emit_ocamlrunparam ~ppf_dump =
Asmgen.compile_phrase ~ppf_dump
(Cmm.Cdata [
Cmm.Cdefine_symbol {
sym_name = "caml_ocamlrunparam";
sym_global = Global
};
Cmm.Cstring (!Clflags.ocamlrunparam ^ "\000")
])

let make_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units cached_gen =
Location.input_name := "caml_startup"; (* set name of "current" input *)
let startup_comp_unit =
@@ -361,6 +371,7 @@ let make_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units cached_g
let compile_phrase p = Asmgen.compile_phrase ~ppf_dump p in
let name_list =
List.flatten (List.map (fun u -> u.defines) units) in
emit_ocamlrunparam ~ppf_dump;
List.iter compile_phrase (Cmm_helpers.entry_point name_list);
List.iter compile_phrase
(* Emit the GC roots table, for dynlink. *)
@@ -414,6 +425,7 @@ let make_shared_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units =
Emitaux.Dwarf_helpers.init ~disable_dwarf:(not !Dwarf_flags.dwarf_for_startup_file)
~sourcefile:sourcefile_for_dwarf;
Emit.begin_assembly unix;
emit_ocamlrunparam ~ppf_dump;
List.iter compile_phrase
(Cmm_helpers.emit_gc_roots_table ~symbols:[]
(Generic_fns.compile ~shared:true genfns));
19 changes: 12 additions & 7 deletions backend/amd64/CSE.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# 2 "backend/amd64/CSE.ml"
(**************************************************************************)
(* *)
(* OCaml *)
@@ -21,6 +20,12 @@ open Arch
open Mach
open CSE_utils

let of_simd_class (cl : Simd.operation_class) =
match cl with
| Pure -> Op_pure
| Load { is_mutable = true } -> Op_load Mutable
| Load { is_mutable = false } -> Op_load Immutable

class cse = object

inherit CSEgen.cse_generic as super
@@ -37,9 +42,9 @@ method! class_of_operation op =
| Irdtsc | Irdpmc
| Ilfence | Isfence | Imfence -> Op_other
| Isimd op ->
begin match Simd.class_of_operation op with
| Pure -> Op_pure
end
of_simd_class (Simd.class_of_operation op)
| Isimd_mem (op,_addr) ->
of_simd_class (Simd.Mem.class_of_operation op)
| Ipause
| Icldemote _
| Iprefetch _ -> Op_other
@@ -81,9 +86,9 @@ class cfg_cse = object
| Irdtsc | Irdpmc
| Ilfence | Isfence | Imfence -> Op_other
| Isimd op ->
begin match Simd.class_of_operation op with
| Pure -> Op_pure
end
of_simd_class (Simd.class_of_operation op)
| Isimd_mem (op,_addr) ->
of_simd_class (Simd.Mem.class_of_operation op)
| Ipause
| Icldemote _
| Iprefetch _ -> Op_other
19 changes: 14 additions & 5 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# 2 "backend/amd64/arch.ml"
(**************************************************************************)
(* *)
(* OCaml *)
@@ -153,6 +152,9 @@ type specific_operation =
| Imfence (* memory fence *)
| Ipause (* hint for spin-wait loops *)
| Isimd of Simd.operation (* SIMD instruction set operations *)
| Isimd_mem of Simd.Mem.operation * addressing_mode
(* SIMD instruction set operations
with memory args *)
| Icldemote of addressing_mode (* hint to demote a cacheline to L3 *)
| Iprefetch of (* memory prefetching hint *)
{ is_write: bool;
@@ -273,6 +275,8 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "rdpmc %a" printreg arg.(0)
| Isimd simd ->
Simd.print_operation printreg simd ppf arg
| Isimd_mem (simd, addr) ->
Simd.Mem.print_operation printreg (print_addressing printreg addr) simd ppf arg
| Ipause ->
fprintf ppf "pause"
| Icldemote _ ->
@@ -299,13 +303,14 @@ let operation_is_pure = function
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Icldemote _ | Iprefetch _ -> false
| Isimd op -> Simd.is_pure op
| Isimd_mem (op, _addr) -> Simd.Mem.is_pure op

(* Specific operations that can raise *)
(* Keep in sync with [Vectorize_specific] *)
let operation_can_raise = function
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
| Ifloatarithmem _
| Irdtsc | Irdpmc | Ipause | Isimd _
| Irdtsc | Irdpmc | Ipause | Isimd _ | Isimd_mem _
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Icldemote _ | Iprefetch _ -> false
@@ -314,7 +319,7 @@ let operation_can_raise = function
let operation_allocates = function
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
| Ifloatarithmem _
| Irdtsc | Irdpmc | Ipause | Isimd _
| Irdtsc | Irdpmc | Ipause | Isimd _ | Isimd_mem _
| Ilfence | Isfence | Imfence
| Istore_int (_, _, _) | Ioffset_loc (_, _)
| Icldemote _ | Iprefetch _ -> false
@@ -405,9 +410,11 @@ let equal_specific_operation left right =
&& equal_addressing_mode left_addr right_addr
| Isimd l, Isimd r ->
Simd.equal_operation l r
| Isimd_mem (l,al), Isimd_mem (r,ar) ->
Simd.Mem.equal_operation l r && equal_addressing_mode al ar
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
Ipause | Isimd _ | Isimd_mem _ | Icldemote _ | Iprefetch _), _ ->
false

(* addressing mode functions *)
@@ -512,7 +519,9 @@ let isomorphic_specific_operation op1 op2 =
&& equal_addressing_mode_without_displ left_addr right_addr
| Isimd l, Isimd r ->
Simd.equal_operation l r
| Isimd_mem (l,al), Isimd_mem (r,ar) ->
Simd.Mem.equal_operation l r && equal_addressing_mode_without_displ al ar
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
Ipause | Isimd _ | Isimd_mem _ | Icldemote _ | Iprefetch _), _ ->
false
4 changes: 3 additions & 1 deletion backend/amd64/arch.mli
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# 2 "asmcomp/amd64/arch.mli"
(**************************************************************************)
(* *)
(* OCaml *)
@@ -86,6 +85,9 @@ type specific_operation =
| Imfence (* memory fence *)
| Ipause (* hint for spin-wait loops *)
| Isimd of Simd.operation (* SIMD instruction set operations *)
| Isimd_mem of Simd.Mem.operation * addressing_mode
(* SIMD instruction set operations
with memory args *)
| Icldemote of addressing_mode (* hint to demote a cacheline to L3 *)
| Iprefetch of (* memory prefetching hint *)
{ is_write: bool;
19 changes: 16 additions & 3 deletions backend/amd64/cfg_selection.ml
Original file line number Diff line number Diff line change
@@ -32,12 +32,17 @@ let pseudoregs_for_operation op arg res =
| Intop (Iadd | Isub | Imul | Iand | Ior | Ixor)
| Floatop ((Float32 | Float64), (Iaddf | Isubf | Imulf | Idivf)) ->
[| res.(0); arg.(1) |], res
| Intop_atomic { op = Compare_and_swap; size = _; addr = _ } ->
| Intop_atomic { op = Compare_set; size = _; addr = _ } ->
(* first arg must be rax *)
let arg = Array.copy arg in
arg.(0) <- rax;
arg, res
| Intop_atomic { op = Fetch_and_add; size = _; addr = _ } ->
| Intop_atomic { op = Compare_exchange; size = _; addr = _ } ->
(* first arg must be rax, res.(0) must be rax. *)
let arg = Array.copy arg in
arg.(0) <- rax;
arg, [| rax |]
| Intop_atomic { op = Exchange | Fetch_and_add; size = _; addr = _ } ->
(* first arg must be the same as res.(0) *)
let arg = Array.copy arg in
arg.(0) <- res.(0);
@@ -86,14 +91,22 @@ let pseudoregs_for_operation op arg res =
edx (high) and eax (low). Make it simple and force the argument in rcx,
and rax and rdx clobbered *)
[| rcx |], res
| Specific (Isimd op) -> Simd_selection.pseudoregs_for_operation op arg res
| Specific (Isimd op) ->
Simd_selection.pseudoregs_for_operation
(Simd_proc.register_behavior op)
arg res
| Specific (Isimd_mem (op, _addr)) ->
Simd_selection.pseudoregs_for_operation
(Simd_proc.Mem.register_behavior op)
arg res
| Csel _ ->
(* last arg must be the same as res.(0) *)
let len = Array.length arg in
let arg = Array.copy arg in
arg.(len - 1) <- res.(0);
arg, res
(* Other instructions are regular *)
| Intop_atomic { op = Add | Sub | Land | Lor | Lxor; _ }
| Intop (Ipopcnt | Iclz _ | Ictz _ | Icomp _)
| Intop_imm ((Imulh _ | Idiv | Imod | Icomp _ | Ipopcnt | Iclz _ | Ictz _), _)
| Specific
Loading

0 comments on commit 16f1b23

Please sign in to comment.