From 289a3b6faef36c9a73d7ca54c3a014029dd95279 Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Wed, 11 Sep 2024 15:02:25 -0400 Subject: [PATCH 1/2] Switch to custom max0 operation in Nat.drop --- scheme-libs/racket/unison/arithmetic.rkt | 2 +- scheme-libs/racket/unison/boot.ss | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/scheme-libs/racket/unison/arithmetic.rkt b/scheme-libs/racket/unison/arithmetic.rkt index a50364eb55..9eee336469 100644 --- a/scheme-libs/racket/unison/arithmetic.rkt +++ b/scheme-libs/racket/unison/arithmetic.rkt @@ -48,7 +48,7 @@ (define-unison-builtin (builtin-Nat.drop m n) - (max 0 (- m n))) + (natural-max0 (- m n))) (define-unison-builtin (builtin-Nat.increment n) diff --git a/scheme-libs/racket/unison/boot.ss b/scheme-libs/racket/unison/boot.ss index e8262c14e4..36a7c8112d 100644 --- a/scheme-libs/racket/unison/boot.ss +++ b/scheme-libs/racket/unison/boot.ss @@ -63,6 +63,7 @@ clamp-integer clamp-natural + natural-max0 wrap-natural bit64 bit63 From 1bc79385f43a7b01c86b35f1fec1ef0d353b476b Mon Sep 17 00:00:00 2001 From: Dan Doel Date: Fri, 13 Sep 2024 17:50:51 -0400 Subject: [PATCH 2/2] Tweaks to make tight arithmetic loops behave better on the JIT - Apparently `(max 0 n)` used in `Nat.drop` was slow, so it's been replaced with something that should act the same on natural numbers. - Switched back to the original currying macro behavior. This seems to optimize better in various ways. According to my tests, it should only really be necessary for recursive functions, and so I've added some capabilities to only apply the full macro locally on those. But the racket optimizer also seems very fickle, so using predefined curry functions on various builtins seems to _not_ optimize properly like they do in my localized tests, even when various inlining suggestions are enabled. Hopefully this can be fixed in the future as it makes compile times significantly worse. This also fixes a latent bug where there wouldn't be enough pre-defined currying functions for procedures that take more than 20 arguments. I've instead lowered the predefined functions to a maximum of 9 arguments, and made anything over that just use the macro directly, since those are presumably rare. None of the currying functions are currently used, but hopefully they can be in the future. --- scheme-libs/racket/unison/boot.ss | 56 ++++++++++++--- scheme-libs/racket/unison/curry.rkt | 72 ++++++++----------- .../racket/unison/primops-generated.rkt | 14 ++++ 3 files changed, 90 insertions(+), 52 deletions(-) diff --git a/scheme-libs/racket/unison/boot.ss b/scheme-libs/racket/unison/boot.ss index 36a7c8112d..5402cb9325 100644 --- a/scheme-libs/racket/unison/boot.ss +++ b/scheme-libs/racket/unison/boot.ss @@ -255,13 +255,19 @@ (vector . args) (name:impl #:pure pure? . args)))))))) -(define-for-syntax (make-main loc name:stx ref:stx name:impl:stx n) +(define-for-syntax + (make-main loc recursive? name:stx ref:stx name:impl:stx n) (with-syntax ([name name:stx] [name:impl name:impl:stx] [gr ref:stx] [n (datum->syntax loc n)]) - (syntax/loc loc - (define name (unison-curry n gr name:impl))))) + (if recursive? + (syntax/loc loc + (define name + (unison-curry #:inline n gr name:impl))) + (syntax/loc loc + (define name + (unison-curry n gr name:impl)))))) (define-for-syntax (link-decl no-link-decl? loc name:stx name:fast:stx name:impl:stx) @@ -289,14 +295,18 @@ [force-pure? #t] [gen-link? #f] [no-link-decl? #f] - [trace? #f]) + [trace? #f] + [inline? #f] + [recursive? #t]) ([h hs]) (values (or internal? (eq? h 'internal)) (or force-pure? (eq? h 'force-pure) (eq? h 'internal)) (or gen-link? (eq? h 'gen-link)) (or no-link-decl? (eq? h 'no-link-decl)) - (or trace? (eq? h 'trace))))) + (or trace? (eq? h 'trace)) + (or inline? (eq? h 'inline)) + (or recursive? (eq? h 'recursive))))) (define-for-syntax (make-link-def gen-link? loc name:stx name:link:stx) @@ -325,8 +335,13 @@ #:local [lo 0] loc name:stx arg:stx expr:stx) - (define-values - (internal? force-pure? gen-link? no-link-decl? trace?) + (define-values (internal? + force-pure? + gen-link? + no-link-decl? + trace? + inline? + recursive?) (process-hints hints)) @@ -341,13 +356,19 @@ #:force-pure #t ; force-pure? loc name:fast:stx name:impl:stx arg:stx)] [impl (make-impl name:impl:stx arg:stx expr:stx)] - [main (make-main loc name:stx ref:stx name:impl:stx arity)] + [main (make-main loc recursive? name:stx ref:stx name:impl:stx arity)] [(decls ...) (link-decl no-link-decl? loc name:stx name:fast:stx name:impl:stx)] [(traces ...) (trace-decls trace? loc name:impl:stx)]) - (syntax/loc loc - (begin link ... impl traces ... fast main decls ...))))) + (quasisyntax/loc loc + (begin + link ... + #,(if (or recursive? inline?) #'(begin-encourage-inline impl) #'impl) + traces ... + #,(if (or recursive? inline?) #'(begin-encourage-inline fast) #'fast) + #,(if inline? #'(begin-encourage-inline main) #'main) + decls ...))))) ; Function definition supporting various unison features, like ; partial application and continuation serialization. See above for @@ -387,9 +408,15 @@ (define-syntax (define-unison-builtin stx) (syntax-case stx () + [(define-unison-builtin #:local n #:hints [h ...] . rest) + (syntax/loc stx + (define-unison #:local n #:hints [internal gen-link h ...] . rest))] [(define-unison-builtin #:local n . rest) (syntax/loc stx (define-unison #:local n #:hints [internal gen-link] . rest))] + [(define-unison-builtin #:hints [h ...] . rest) + (syntax/loc stx + (define-unison #:hints [internal gen-link h ...] . rest))] [(define-unison-builtin . rest) (syntax/loc stx (define-unison #:hints [internal gen-link] . rest))])) @@ -758,6 +785,15 @@ (if (fixnum? n) n (modulo n bit64))) + ; For natural arithmetic operations that can yield negatives, this + ; ensures that they are clamped back to 0. + ; + ; Note: (max 0 n) is apparently around 2-3x slower than this, hence + ; the custom operation. I've factored it out here in case something + ; even better is found, but this seems to match the performance of + ; the underlying operation. + (define (natural-max0 n) (if (>= n 0) n 0)) + ; module arithmetic appropriate for when a Nat operation my either ; have too large or a negative result. (define (wrap-natural n) diff --git a/scheme-libs/racket/unison/curry.rkt b/scheme-libs/racket/unison/curry.rkt index 8ae900dd9f..0fe7a080f5 100644 --- a/scheme-libs/racket/unison/curry.rkt +++ b/scheme-libs/racket/unison/curry.rkt @@ -12,18 +12,7 @@ unison-curry-6 unison-curry-7 unison-curry-8 - unison-curry-9 - unison-curry-10 - unison-curry-11 - unison-curry-12 - unison-curry-13 - unison-curry-14 - unison-curry-15 - unison-curry-16 - unison-curry-17 - unison-curry-18 - unison-curry-19 - unison-curry-20) + unison-curry-9) (require racket/performance-hint racket/unsafe/undefined @@ -80,19 +69,24 @@ (define-for-syntax (in-partitions xs) (in-parts '() xs)) -(define-for-syntax (build-curry loc n) +(define-for-syntax (build-curried loc n ref:stx fun:stx) (define xs:stx (generate-temporaries (map (const 'x) (range n)))) + + (curry-expr loc 2 ref:stx fun:stx '() xs:stx)) + +(define-for-syntax (build-curry loc n) (define ref:stx (syntax/loc loc gr)) (define fun:stx (syntax/loc loc f)) - (with-syntax ([body (curry-expr loc 2 ref:stx fun:stx '() xs:stx)]) + (with-syntax ([body (build-curried loc n ref:stx fun:stx)]) (syntax/loc loc (lambda (gr f) body)))) (define-syntax (make-curry stx) (syntax-case stx () - [(make-curry n) - (build-curry stx (syntax->datum #'n))])) + [(make-curry n gr f) + (build-curried stx (syntax->datum #'n) #'gr #'f)])) + ; (build-curry stx (syntax->datum #'n))])) (begin-encourage-inline (define ((unison-curry-0 gr f) #:reflect [ref? unsafe-undefined] . rest) @@ -102,35 +96,29 @@ (apply (f) rest)) (unison-closure gr f rest))) - (define unison-curry-1 (make-curry 1)) - (define unison-curry-2 (make-curry 2)) - (define unison-curry-3 (make-curry 3)) - (define unison-curry-4 (make-curry 4)) - (define unison-curry-5 (make-curry 5)) - (define unison-curry-6 (make-curry 6)) - (define unison-curry-7 (make-curry 7)) - (define unison-curry-8 (make-curry 8)) - (define unison-curry-9 (make-curry 9)) - (define unison-curry-10 (make-curry 10)) - (define unison-curry-11 (make-curry 11)) - (define unison-curry-12 (make-curry 12)) - (define unison-curry-13 (make-curry 13)) - (define unison-curry-14 (make-curry 14)) - (define unison-curry-15 (make-curry 15)) - (define unison-curry-16 (make-curry 16)) - (define unison-curry-17 (make-curry 17)) - (define unison-curry-18 (make-curry 18)) - (define unison-curry-19 (make-curry 19)) - (define unison-curry-20 (make-curry 20))) + (define (unison-curry-1 gr f) (make-curry 1 gr f)) + (define (unison-curry-2 gr f) (make-curry 2 gr f)) + (define (unison-curry-3 gr f) (make-curry 3 gr f)) + (define (unison-curry-4 gr f) (make-curry 4 gr f)) + (define (unison-curry-5 gr f) (make-curry 5 gr f)) + (define (unison-curry-6 gr f) (make-curry 6 gr f)) + (define (unison-curry-7 gr f) (make-curry 7 gr f)) + (define (unison-curry-8 gr f) (make-curry 8 gr f)) + (define (unison-curry-9 gr f) (make-curry 9 gr f))) (define-syntax (unison-curry stx) (syntax-case stx () + [(unison-curry #:inline n gr f) + (build-curried stx (syntax->datum #'n) #'gr #'f)] [(unison-curry n gr f) - (begin - (define m (syntax->datum #'n)) - (define curry:stx (vsym #:pre "unison-curry-" m)) - (with-syntax ([u-curry curry:stx]) - (syntax/loc stx - (u-curry gr f))))])) + (let ([m (syntax->datum #'n)]) + (cond + [(< m 10) + (define curry:stx (vsym #:pre "unison-curry-" m)) + (with-syntax ([u-curry curry:stx]) + (syntax/loc stx + (u-curry gr f)))] + [else + (build-curried stx m #'gr #'f)]))])) diff --git a/scheme-libs/racket/unison/primops-generated.rkt b/scheme-libs/racket/unison/primops-generated.rkt index ba719d63d7..09bbb2b41f 100644 --- a/scheme-libs/racket/unison/primops-generated.rkt +++ b/scheme-libs/racket/unison/primops-generated.rkt @@ -976,6 +976,20 @@ (add-runtime-code-proc mname0 tdefs)])])) +; Given a termlink and a list of dependencies for said link, tests +; if the code is recursive. This is done by seeing if it references +; any link with the same bytes. If it does, it must be (mututally) +; recursive. The only way for two definitions to get the same parent +; hash at this point is if they refer to one another. +(define (detect-recursion link deps) + (define self (termlink-bytes link)) + (ormap (lambda (other) + (match other + [(unison-termlink-derived other _) + (equal? self other)] + [else #f])) + deps)) + ; Creates and adds a module for given module name and definitions. ; ; Passing #f for mname0 makes the procedure make up a fresh name.