-
Notifications
You must be signed in to change notification settings - Fork 7
/
sse-intrinsics.lisp
731 lines (582 loc) · 34.8 KB
/
sse-intrinsics.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
;;; -*- mode: Lisp; indent-tabs-mode: nil; -*-
;;;
;;; Copyright (c) 2010, Alexander Gavrilov ([email protected])
;;;
;;; This file contains definitions for all SSE intrinsics.
;;;
;;; The macros are defined in the *-core.lisp files.
;;; On SBCL wrapping functions are defined by sbcl-functions.lisp.
;;;
(in-package #:SSE)
#+(and ecl (or ecl_min stage1 cross))
(eval-when (:compile-toplevel)
;; During the initial bootstrap sequence when the contribs are
;; compiled, the system does not load fasls after building them.
;; (For all it knows, it might be cross-compiling to another architecture.)
;; Work around by loading the macro definition file into the interpreter:
(load (merge-pathnames #P"ecl-sse-core.lisp" *compile-file-truename*)))
#|---------------------------------|
| MISC INTRINSICS DEFINITIONS |
|---------------------------------|#
;;; Prefetch
(def-load-intrinsic cpu-prefetch-t0 nil prefetch "_mm_prefetch" :tags (:t0) :size :byte :postfix-fmt ",_MM_HINT_T0" :side-effect? t)
(def-load-intrinsic cpu-prefetch-t1 nil prefetch "_mm_prefetch" :tags (:t1) :size :byte :postfix-fmt ",_MM_HINT_T1" :side-effect? t)
(def-load-intrinsic cpu-prefetch-t2 nil prefetch "_mm_prefetch" :tags (:t2) :size :byte :postfix-fmt ",_MM_HINT_T2" :side-effect? t)
(def-load-intrinsic cpu-prefetch-nta nil prefetch "_mm_prefetch" :tags (:nta) :size :byte :postfix-fmt ",_MM_HINT_NTA" :side-effect? t)
(def-load-intrinsic cpu-clflush nil clflush "_mm_clflush" :size :byte :side-effect? t)
;;; CPU control
#+sbcl
(progn
(defknown cpu-mxcsr () (unsigned-byte 32) (flushable))
(define-vop (cpu-mxcsr)
(:translate cpu-mxcsr)
(:args) (:arg-types)
(:results (result :scs (unsigned-reg)))
(:result-types unsigned-num)
(:temporary (:sc unsigned-stack) tmp)
(:policy :fast-safe)
(:generator 3
(let ((ea (make-ea :dword :base rbp-tn
:disp (frame-byte-offset (tn-offset tmp)))))
(inst stmxcsr ea)
(inst mov (reg-in-size result :dword) ea))))
(defknown %set-cpu-mxcsr ((unsigned-byte 32)) (unsigned-byte 32) ())
(define-vop (%set-cpu-mxcsr)
(:translate %set-cpu-mxcsr)
(:args (value :scs (unsigned-reg unsigned-stack) :target result))
(:arg-types unsigned-num)
(:results (result :scs (unsigned-reg)
:load-if (not (and (sc-is result unsigned-stack)
(or (sc-is value unsigned-reg)
(location= value result))))))
(:result-types unsigned-num)
(:temporary (:sc unsigned-stack) tmp)
(:policy :fast-safe)
(:generator 3
(cond ((sc-is value unsigned-stack)
(setf tmp value))
((sc-is result unsigned-stack)
(setf tmp result)))
(move tmp value)
(unless (location= result tmp)
(move result value))
(let ((ea (make-ea :dword :base rbp-tn
:disp (frame-byte-offset (tn-offset tmp)))))
(inst ldmxcsr ea))))
(macrolet ((defvoid (name insn)
`(progn
(export ',name)
(defknown ,name () (values) ())
(define-vop (,name)
(:translate ,name)
(:policy :fast-safe)
(:generator 1
(inst ,insn))))))
(defvoid cpu-load-fence lfence)
(defvoid cpu-store-fence sfence)
(defvoid cpu-memory-fence mfence)
(defvoid cpu-pause pause)))
#+ecl
(progn
(def-intrinsic cpu-mxcsr () fixnum "_mm_getcsr")
(def-intrinsic %set-cpu-mxcsr (fixnum) fixnum "_mm_setcsr" :export nil :ret-arg 0)
(defsetf cpu-mxcsr %set-cpu-mxcsr)
(def-intrinsic cpu-load-fence () nil "_mm_lfence")
(def-intrinsic cpu-store-fence () nil "_mm_sfence")
(def-intrinsic cpu-memory-fence () nil "_mm_mfence")
(def-intrinsic cpu-pause () nil "_mm_pause"))
#|-----------------------------------------|
| SINGLE-FLOAT INTRINSICS DEFINITIONS |
|-----------------------------------------|#
;; Initialization
#+sbcl
(def-float-set-intrinsic set-ss %set-ss single-float single-reg float-sse-pack movaps)
#+ecl
(progn
(def-intrinsic set-ss (single-float) float-sse-pack "_mm_set_ss")
(def-intrinsic set1-ps (single-float) float-sse-pack "_mm_set1_ps")
(def-intrinsic set-ps (single-float single-float single-float single-float) float-sse-pack "_mm_set_ps")
(def-intrinsic setr-ps (single-float single-float single-float single-float) float-sse-pack "_mm_setr_ps")
(def-intrinsic setzero-ps () float-sse-pack "_mm_setzero_ps"))
;; Memory
(def-load-intrinsic mem-ref-ss float-sse-pack movss "_mm_load_ss")
(def-load-intrinsic mem-ref-ps float-sse-pack movups "_mm_loadu_ps")
(def-load-intrinsic mem-ref-aps float-sse-pack movaps "_mm_load_ps")
(def-store-intrinsic mem-set-ss float-sse-pack movss "_mm_store_ss" :setf-name mem-ref-ss)
(def-store-intrinsic mem-set-ps float-sse-pack movups "_mm_storeu_ps" :setf-name mem-ref-ps)
(def-store-intrinsic mem-set-aps float-sse-pack movaps "_mm_store_ps" :setf-name mem-ref-aps)
(def-store-intrinsic stream-ps float-sse-pack movntps "_mm_stream_ps")
;; Arithmetics
(def-binary-intrinsic add-ss float-sse-pack addss 3 "_mm_add_ss")
(def-binary-intrinsic add-ps float-sse-pack addps 3 "_mm_add_ps" :commutative t)
(def-binary-intrinsic sub-ss float-sse-pack subss 3 "_mm_sub_ss")
(def-binary-intrinsic sub-ps float-sse-pack subps 3 "_mm_sub_ps")
(def-binary-intrinsic mul-ss float-sse-pack mulss 5 "_mm_mul_ss")
(def-binary-intrinsic mul-ps float-sse-pack mulps 5 "_mm_mul_ps" :commutative t)
(def-binary-intrinsic div-ss float-sse-pack divss 13 "_mm_div_ss")
(def-binary-intrinsic div-ps float-sse-pack divps 13 "_mm_div_ps")
(def-binary-intrinsic min-ss float-sse-pack minss 3 "_mm_min_ss")
(def-binary-intrinsic min-ps float-sse-pack minps 3 "_mm_min_ps":commutative t)
(def-binary-intrinsic max-ss float-sse-pack maxss 3 "_mm_max_ss")
(def-binary-intrinsic max-ps float-sse-pack maxps 3 "_mm_max_ps" :commutative t)
(def-unary-intrinsic sqrt-ss float-sse-pack sqrtss 20 "_mm_sqrt_ss" :partial t)
(def-unary-intrinsic sqrt-ps float-sse-pack sqrtps 20 "_mm_sqrt_ps")
(def-unary-intrinsic rsqrt-ss float-sse-pack rsqrtss 20 "_mm_rsqrt_ss" :partial t)
(def-unary-intrinsic rsqrt-ps float-sse-pack rsqrtps 20 "_mm_rsqrt_ps")
(def-unary-intrinsic rcp-ss float-sse-pack rcpss 13 "_mm_rcp_ss" :partial t)
(def-unary-intrinsic rcp-ps float-sse-pack rcpps 13 "_mm_rcp_ps")
;; Bitwise logic
#+sbcl
(def-not-intrinsic not-ps float-sse-pack xorps)
(def-binary-intrinsic and-ps float-sse-pack andps 1 "_mm_and_ps" :commutative t)
(def-binary-intrinsic andnot-ps float-sse-pack andnps 1 "_mm_andnot_ps")
(def-binary-intrinsic or-ps float-sse-pack orps 1 "_mm_or_ps" :commutative t)
(def-binary-intrinsic xor-ps float-sse-pack xorps 1 "_mm_xor_ps" :commutative t)
;; Comparisons
(def-binary-intrinsic =-ss float-sse-pack cmpss 3 "_mm_cmpeq_ss" :tags (:eq))
(def-binary-intrinsic =-ps float-sse-pack cmpps 3 "_mm_cmpeq_ps" :tags (:eq) :commutative t)
(def-binary-intrinsic <-ss float-sse-pack cmpss 3 "_mm_cmplt_ss" :tags (:lt))
(def-binary-intrinsic <-ps float-sse-pack cmpps 3 "_mm_cmplt_ps" :tags (:lt))
(def-binary-intrinsic <=-ss float-sse-pack cmpss 3 "_mm_cmple_ss" :tags (:le))
(def-binary-intrinsic <=-ps float-sse-pack cmpps 3 "_mm_cmple_ps" :tags (:le))
#+ecl
(def-binary-intrinsic >-ss float-sse-pack nil nil "_mm_cmpgt_ss")
#+ecl
(def-binary-intrinsic >-ps float-sse-pack nil nil "_mm_cmpgt_ps")
#+ecl
(def-binary-intrinsic >=-ss float-sse-pack nil nil "_mm_cmpge_ss")
#+ecl
(def-binary-intrinsic >=-ps float-sse-pack nil nil "_mm_cmpge_ps")
(def-binary-intrinsic /=-ss float-sse-pack cmpss 3 "_mm_cmpneq_ss" :tags (:neq))
(def-binary-intrinsic /=-ps float-sse-pack cmpps 3 "_mm_cmpneq_ps" :tags (:neq) :commutative t)
(def-binary-intrinsic /<-ss float-sse-pack cmpss 3 "_mm_cmpnlt_ss" :tags (:nlt))
(def-binary-intrinsic /<-ps float-sse-pack cmpps 3 "_mm_cmpnlt_ps" :tags (:nlt))
(def-binary-intrinsic /<=-ss float-sse-pack cmpss 3 "_mm_cmpnle_ss" :tags (:nle))
(def-binary-intrinsic /<=-ps float-sse-pack cmpps 3 "_mm_cmpnle_ps" :tags (:nle))
#+ecl
(def-binary-intrinsic />-ss float-sse-pack nil nil "_mm_cmpngt_ss")
#+ecl
(def-binary-intrinsic />-ps float-sse-pack nil nil "_mm_cmpngt_ps")
#+ecl
(def-binary-intrinsic />=-ss float-sse-pack nil nil "_mm_cmpnge_ss")
#+ecl
(def-binary-intrinsic />=-ps float-sse-pack nil nil "_mm_cmpnge_ps")
(def-binary-intrinsic cmpord-ss float-sse-pack cmpss 3 "_mm_cmpord_ss" :tags (:ord)) ; neither is NaN
(def-binary-intrinsic cmpord-ps float-sse-pack cmpps 3 "_mm_cmpord_ps" :tags (:ord) :commutative t)
(def-binary-intrinsic cmpunord-ss float-sse-pack cmpss 3 "_mm_cmpunord_ss" :tags (:unord))
(def-binary-intrinsic cmpunord-ps float-sse-pack cmpps 3 "_mm_cmpunord_ps" :tags (:unord) :commutative t)
(def-comparison-intrinsic =-ss? float-sse-pack comiss 3 "_mm_comieq_ss" :commutative t :tags (:e))
(def-comparison-intrinsic =-ssu? float-sse-pack ucomiss 3 "_mm_ucomieq_ss" :commutative t :tags (:e))
(def-comparison-intrinsic <-ss? float-sse-pack comiss 3 "_mm_comilt_ss" :tags (:b))
(def-comparison-intrinsic <-ssu? float-sse-pack ucomiss 3 "_mm_ucomilt_ss" :tags (:b))
(def-comparison-intrinsic <=-ss? float-sse-pack comiss 3 "_mm_comile_ss" :tags (:be))
(def-comparison-intrinsic <=-ssu? float-sse-pack ucomiss 3 "_mm_ucomile_ss" :tags (:be))
(def-comparison-intrinsic >-ss? float-sse-pack comiss 3 "_mm_comigt_ss" :tags (:a))
(def-comparison-intrinsic >-ssu? float-sse-pack ucomiss 3 "_mm_ucomigt_ss" :tags (:a))
(def-comparison-intrinsic >=-ss? float-sse-pack comiss 3 "_mm_comige_ss" :tags (:ae))
(def-comparison-intrinsic >=-ssu? float-sse-pack ucomiss 3 "_mm_ucomige_ss" :tags (:ae))
(def-comparison-intrinsic /=-ss? float-sse-pack comiss 3 "_mm_comineq_ss" :commutative t :tags (:ne))
(def-comparison-intrinsic /=-ssu? float-sse-pack ucomiss 3 "_mm_ucomineq_ss" :commutative t :tags (:ne))
;; Misc
(def-binary-intrinsic unpackhi-ps float-sse-pack unpckhps 1 "_mm_unpackhi_ps")
(def-binary-intrinsic unpacklo-ps float-sse-pack unpcklps 1 "_mm_unpacklo_ps")
(def-binary-intrinsic move-ss float-sse-pack movss 1 "_mm_move_ss")
(def-binary-intrinsic movehl-ps float-sse-pack movhlps 1 "_mm_movehl_ps")
(def-binary-intrinsic movelh-ps float-sse-pack movlhps 1 "_mm_movelh_ps")
(def-unary-intrinsic movemask-ps (unsigned-byte 4) movmskps 1 "_mm_movemask_ps" :arg-type float-sse-pack)
;; Shuffle
(def-binary-intrinsic shuffle-ps float-sse-pack shufps 1 "_mm_shuffle_ps" :immediate-arg (unsigned-byte 8))
;; Conversion
(def-unary-intrinsic convert-pi32-to-ps float-sse-pack cvtdq2ps 3 "_mm_cvtepi32_ps" :arg-type int-sse-pack)
(def-unary-intrinsic convert-ps-to-pi32 int-sse-pack cvtps2dq 3 "_mm_cvtps_epi32" :arg-type float-sse-pack)
(def-unary-intrinsic truncate-ps-to-pi32 int-sse-pack cvttps2dq 3 "_mm_cvttps_epi32" :arg-type float-sse-pack)
(def-sse-int-intrinsic convert-si32-to-ss (signed-byte 32) float-sse-pack cvtsi2ss 3 "_mm_cvtsi32_ss")
(def-cvt-to-int32-intrinsic convert-ss-to-si32 (signed-byte 32) cvtss2si 3 "_mm_cvtss_si32" :arg-type float-sse-pack)
(def-cvt-to-int32-intrinsic truncate-ss-to-si32 (signed-byte 32) cvttss2si 3 "_mm_cvttss_si32" :arg-type float-sse-pack)
#+(or x86_64 x86-64)
(def-sse-int-intrinsic convert-si64-to-ss (signed-byte 64) float-sse-pack cvtsi2ss 3
#-msvc "_mm_cvtsi64_ss" #+msvc "_mm_cvtsi64x_ss")
#+(or x86_64 x86-64)
(def-unary-intrinsic convert-ss-to-si64 (signed-byte 64) cvtss2si 3
#-msvc "_mm_cvtss_si64" #+msvc "_mm_cvtss_si64x" :arg-type float-sse-pack)
#+(or x86_64 x86-64)
(def-unary-intrinsic truncate-ss-to-si64 (signed-byte 64) cvttss2si 3
#-msvc "_mm_cvttss_si64" #+msvc "_mm_cvttss_si64x" :arg-type float-sse-pack)
#|-----------------------------------------|
| DOUBLE-FLOAT INTRINSICS DEFINITIONS |
|-----------------------------------------|#
;; Initialization
#+sbcl
(def-float-set-intrinsic set-sd %set-sd double-float double-reg double-sse-pack movapd)
#+ecl
(progn
(def-intrinsic set-sd (double-float) double-sse-pack "_mm_set_sd")
(def-intrinsic set1-pd (double-float) double-sse-pack "_mm_set1_pd")
(def-intrinsic set-pd (double-float double-float) double-sse-pack "_mm_set_pd")
(def-intrinsic setr-pd (double-float double-float) double-sse-pack "_mm_setr_pd")
(def-intrinsic setzero-pd () double-sse-pack "_mm_setzero_pd"))
;; Memory
(def-load-intrinsic mem-ref-sd double-sse-pack movsd "_mm_load_sd")
(def-load-intrinsic mem-ref-pd double-sse-pack movupd "_mm_loadu_pd")
(def-load-intrinsic mem-ref-apd double-sse-pack movapd "_mm_load_pd")
(def-load-intrinsic loadh-pd double-sse-pack movhpd "_mm_loadh_pd" :register-arg t)
(def-load-intrinsic loadl-pd double-sse-pack movlpd "_mm_loadl_pd" :register-arg t)
(def-store-intrinsic mem-set-sd double-sse-pack movsd "_mm_store_sd" :setf-name mem-ref-sd)
(def-store-intrinsic mem-set-pd double-sse-pack movupd "_mm_storeu_pd" :setf-name mem-ref-pd)
(def-store-intrinsic mem-set-apd double-sse-pack movapd "_mm_store_pd" :setf-name mem-ref-apd)
(def-store-intrinsic storeh-pd double-sse-pack movhpd "_mm_storeh_pd")
(def-store-intrinsic storel-pd double-sse-pack movlpd "_mm_storel_pd")
(def-store-intrinsic stream-pd double-sse-pack movntpd "_mm_stream_pd")
;; Arithmetics
(def-binary-intrinsic add-sd double-sse-pack addsd 3 "_mm_add_sd")
(def-binary-intrinsic add-pd double-sse-pack addpd 3 "_mm_add_pd" :commutative t)
(def-binary-intrinsic sub-sd double-sse-pack subsd 3 "_mm_sub_sd")
(def-binary-intrinsic sub-pd double-sse-pack subpd 3 "_mm_sub_pd")
(def-binary-intrinsic mul-sd double-sse-pack mulsd 5 "_mm_mul_sd")
(def-binary-intrinsic mul-pd double-sse-pack mulpd 5 "_mm_mul_pd" :commutative t)
(def-binary-intrinsic div-sd double-sse-pack divsd 13 "_mm_div_sd")
(def-binary-intrinsic div-pd double-sse-pack divpd 13 "_mm_div_pd")
(def-binary-intrinsic min-sd double-sse-pack minsd 3 "_mm_min_sd")
(def-binary-intrinsic min-pd double-sse-pack minpd 3 "_mm_min_pd" :commutative t)
(def-binary-intrinsic max-sd double-sse-pack maxsd 3 "_mm_max_sd")
(def-binary-intrinsic max-pd double-sse-pack maxpd 3 "_mm_max_pd" :commutative t)
(def-binary-intrinsic sqrt-sd double-sse-pack sqrtsd 20 "_mm_sqrt_sd")
(def-unary-intrinsic sqrt-pd double-sse-pack sqrtpd 20 "_mm_sqrt_pd")
;; Bitwise logic
#+sbcl
(def-not-intrinsic not-pd double-sse-pack xorpd)
(def-binary-intrinsic and-pd double-sse-pack andpd 1 "_mm_and_pd" :commutative t)
(def-binary-intrinsic andnot-pd double-sse-pack andnpd 1 "_mm_andnot_pd")
(def-binary-intrinsic or-pd double-sse-pack orpd 1 "_mm_or_pd" :commutative t)
(def-binary-intrinsic xor-pd double-sse-pack xorpd 1 "_mm_xor_pd" :commutative t)
;; Comparisons
(def-binary-intrinsic =-sd double-sse-pack cmpsd 3 "_mm_cmpeq_sd" :tags (:eq))
(def-binary-intrinsic =-pd double-sse-pack cmppd 3 "_mm_cmpeq_pd" :tags (:eq) :commutative t)
(def-binary-intrinsic <-sd double-sse-pack cmpsd 3 "_mm_cmplt_sd" :tags (:lt))
(def-binary-intrinsic <-pd double-sse-pack cmppd 3 "_mm_cmplt_pd" :tags (:lt))
(def-binary-intrinsic <=-sd double-sse-pack cmpsd 3 "_mm_cmple_sd" :tags (:le))
(def-binary-intrinsic <=-pd double-sse-pack cmppd 3 "_mm_cmple_pd" :tags (:le))
#+ecl
(def-binary-intrinsic >-sd double-sse-pack nil nil "_mm_cmpgt_sd")
#+ecl
(def-binary-intrinsic >-pd double-sse-pack nil nil "_mm_cmpgt_pd")
#+ecl
(def-binary-intrinsic >=-sd double-sse-pack nil nil "_mm_cmpge_sd")
#+ecl
(def-binary-intrinsic >=-pd double-sse-pack nil nil "_mm_cmpge_pd")
(def-binary-intrinsic /=-sd double-sse-pack cmpsd 3 "_mm_cmpneq_sd" :tags (:neq))
(def-binary-intrinsic /=-pd double-sse-pack cmppd 3 "_mm_cmpneq_pd" :tags (:neq) :commutative t)
(def-binary-intrinsic /<-sd double-sse-pack cmpsd 3 "_mm_cmpnlt_sd" :tags (:nlt))
(def-binary-intrinsic /<-pd double-sse-pack cmppd 3 "_mm_cmpnlt_pd" :tags (:nlt))
(def-binary-intrinsic /<=-sd double-sse-pack cmpsd 3 "_mm_cmpnle_sd" :tags (:nle))
(def-binary-intrinsic /<=-pd double-sse-pack cmppd 3 "_mm_cmpnle_pd" :tags (:nle))
#+ecl
(def-binary-intrinsic />-sd double-sse-pack nil nil "_mm_cmpngt_sd")
#+ecl
(def-binary-intrinsic />-pd double-sse-pack nil nil "_mm_cmpngt_pd")
#+ecl
(def-binary-intrinsic />=-sd double-sse-pack nil nil "_mm_cmpnge_sd")
#+ecl
(def-binary-intrinsic />=-pd double-sse-pack nil nil "_mm_cmpnge_pd")
(def-binary-intrinsic cmpord-sd double-sse-pack cmpsd 3 "_mm_cmpord_sd" :tags (:ord)) ; neither is NaN
(def-binary-intrinsic cmpord-pd double-sse-pack cmppd 3 "_mm_cmpord_pd" :tags (:ord) :commutative t)
(def-binary-intrinsic cmpunord-sd double-sse-pack cmpsd 3 "_mm_cmpunord_sd" :tags (:unord))
(def-binary-intrinsic cmpunord-pd double-sse-pack cmppd 3 "_mm_cmpunord_pd" :tags (:unord) :commutative t)
(def-comparison-intrinsic =-sd? double-sse-pack comisd 3 "_mm_comieq_sd" :commutative t :tags (:e))
(def-comparison-intrinsic =-sdu? double-sse-pack ucomisd 3 "_mm_ucomieq_sd" :commutative t :tags (:e))
(def-comparison-intrinsic <-sd? double-sse-pack comisd 3 "_mm_comilt_sd" :tags (:b))
(def-comparison-intrinsic <-sdu? double-sse-pack ucomisd 3 "_mm_ucomilt_sd" :tags (:b))
(def-comparison-intrinsic <=-sd? double-sse-pack comisd 3 "_mm_comile_sd" :tags (:be))
(def-comparison-intrinsic <=-sdu? double-sse-pack ucomisd 3 "_mm_ucomile_sd" :tags (:be))
(def-comparison-intrinsic >-sd? double-sse-pack comisd 3 "_mm_comigt_sd" :tags (:a))
(def-comparison-intrinsic >-sdu? double-sse-pack ucomisd 3 "_mm_ucomigt_sd" :tags (:a))
(def-comparison-intrinsic >=-sd? double-sse-pack comisd 3 "_mm_comige_sd" :tags (:ae))
(def-comparison-intrinsic >=-sdu? double-sse-pack ucomisd 3 "_mm_ucomige_sd" :tags (:ae))
(def-comparison-intrinsic /=-sd? double-sse-pack comisd 3 "_mm_comineq_sd" :commutative t :tags (:ne))
(def-comparison-intrinsic /=-sdu? double-sse-pack ucomisd 3 "_mm_ucomineq_sd" :commutative t :tags (:ne))
;; Misc
(def-binary-intrinsic unpackhi-pd double-sse-pack unpckhpd 1 "_mm_unpackhi_pd")
(def-binary-intrinsic unpacklo-pd double-sse-pack unpcklpd 1 "_mm_unpacklo_pd")
(def-binary-intrinsic move-sd double-sse-pack movsd 1 "_mm_move_sd")
(def-unary-intrinsic movemask-pd (unsigned-byte 2) movmskpd 1 "_mm_movemask_pd" :arg-type double-sse-pack)
;; Shuffle
(def-binary-intrinsic shuffle-pd double-sse-pack shufpd 1 "_mm_shuffle_pd" :immediate-arg (unsigned-byte 2))
;; Conversion
(def-unary-intrinsic convert-ps-to-pd double-sse-pack cvtps2pd 3 "_mm_cvtps_pd" :arg-type float-sse-pack)
(def-unary-intrinsic convert-pd-to-ps float-sse-pack cvtpd2ps 3 "_mm_cvtpd_ps" :arg-type double-sse-pack)
(def-binary-intrinsic convert-ss-to-sd double-sse-pack cvtss2sd 3 "_mm_cvtss_sd" :y-type float-sse-pack)
(def-binary-intrinsic convert-sd-to-ss float-sse-pack cvtsd2ss 3 "_mm_cvtsd_ss" :y-type double-sse-pack)
(def-unary-intrinsic convert-pi32-to-pd double-sse-pack cvtdq2pd 3 "_mm_cvtepi32_pd" :arg-type int-sse-pack)
(def-unary-intrinsic convert-pd-to-pi32 int-sse-pack cvtpd2dq 3 "_mm_cvtpd_epi32" :arg-type double-sse-pack)
(def-unary-intrinsic truncate-pd-to-pi32 int-sse-pack cvttpd2dq 3 "_mm_cvttpd_epi32" :arg-type double-sse-pack)
(def-sse-int-intrinsic convert-si32-to-sd (signed-byte 32) double-sse-pack cvtsi2ss 3 "_mm_cvtsi32_sd")
(def-cvt-to-int32-intrinsic convert-sd-to-si32 (signed-byte 32) cvtsd2si 3 "_mm_cvtsd_si32" :arg-type double-sse-pack)
(def-cvt-to-int32-intrinsic truncate-sd-to-si32 (signed-byte 32) cvttsd2si 3 "_mm_cvttsd_si32" :arg-type double-sse-pack)
#+(or x86_64 x86-64)
(def-sse-int-intrinsic convert-si64-to-sd (signed-byte 64) double-sse-pack cvtsi2ss 3
#-msvc "_mm_cvtsi64_sd" #+msvc "_mm_cvtsi64x_sd")
#+(or x86_64 x86-64)
(def-unary-intrinsic convert-sd-to-si64 (signed-byte 64) cvtsd2si 3
#-msvc "_mm_cvtsd_si64" #+msvc "_mm_cvtsd_si64x" :arg-type double-sse-pack)
#+(or x86_64 x86-64)
(def-unary-intrinsic truncate-sd-to-si64 (signed-byte 64) cvttsd2si 3
#-msvc "_mm_cvttsd_si64" #+msvc "_mm_cvttsd_si64x" :arg-type double-sse-pack)
#|-----------------------------------------|
| INTEGER INTRINSICS DEFINITIONS |
|-----------------------------------------|#
;; Initialization
#+ecl
(progn
(def-intrinsic set1-pi8 (fixnum) int-sse-pack "_mm_set1_epi8")
(def-intrinsic set1-pi16 (fixnum) int-sse-pack "_mm_set1_epi16")
(def-intrinsic set1-pi32 (ext:integer32) int-sse-pack "_mm_set1_epi32")
#+x86_64
(def-intrinsic set1-pi64 (ext:integer64) int-sse-pack "_mm_set1_epi64x")
(def-intrinsic set1-pu32 (ext:byte32) int-sse-pack "_mm_set1_epi32")
#+x86_64
(def-intrinsic set1-pu64 (ext:byte64) int-sse-pack "_mm_set1_epi64x")
;;-----
(def-intrinsic set-pi8 (fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_set_epi8")
(def-intrinsic set-pi16 (fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_set_epi16")
(def-intrinsic set-pi32 (ext:integer32 ext:integer32 ext:integer32 ext:integer32) int-sse-pack "_mm_set_epi32")
#+x86_64
(def-intrinsic set-pi64 (ext:integer64 ext:integer64) int-sse-pack "_mm_set_epi64x")
(def-intrinsic set-pu32 (ext:byte32 ext:byte32 ext:byte32 ext:byte32) int-sse-pack "_mm_set_epi32")
#+x86_64
(def-intrinsic set-pu64 (ext:byte64 ext:byte64) int-sse-pack "_mm_set_epi64x")
;;-----
(def-intrinsic setr-pi8 (fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_setr_epi8")
(def-intrinsic setr-pi16 (fixnum fixnum fixnum fixnum
fixnum fixnum fixnum fixnum) int-sse-pack "_mm_setr_epi16")
(def-intrinsic setr-pi32 (ext:integer32 ext:integer32 ext:integer32 ext:integer32) int-sse-pack "_mm_setr_epi32")
#+x86_64
(def-intrinsic setr-pi64 (ext:integer64 ext:integer64) int-sse-pack "_mm_set_epi64x" :reorder-args t)
(def-intrinsic setr-pu32 (ext:byte32 ext:byte32 ext:byte32 ext:byte32) int-sse-pack "_mm_setr_epi32")
#+x86_64
(def-intrinsic setr-pu64 (ext:byte64 ext:byte64) int-sse-pack "_mm_set_epi64x" :reorder-args t)
;;-----
(def-intrinsic setzero-pi () int-sse-pack "_mm_setzero_si128"))
;; Memory
(def-load-intrinsic mem-ref-pi int-sse-pack movdqu "_mm_loadu_si128")
(def-load-intrinsic mem-ref-api int-sse-pack movdqa "_mm_load_si128")
(def-load-intrinsic mem-ref-si64 int-sse-pack movd "_mm_loadl_epi64")
(def-store-intrinsic mem-set-pi int-sse-pack movdqu "_mm_storeu_si128" :setf-name mem-ref-pi)
(def-store-intrinsic mem-set-api int-sse-pack movdqa "_mm_store_si128" :setf-name mem-ref-api)
(def-store-intrinsic mem-set-si64 int-sse-pack movd "_mm_storel_epi64" :setf-name mem-ref-si64)
(def-store-intrinsic stream-pi int-sse-pack movntdq "_mm_stream_si128")
;; Masked move
#+ecl
(def-mem-intrinsic maskmoveu-pi "char" nil "_mm_maskmoveu_si128" :prefix-args (int-sse-pack int-sse-pack))
#+sbcl
(progn
(defknown %maskmoveu-pi (sse-pack sse-pack system-area-pointer fixnum) (values) ())
(define-vop (%maskmoveu-pi)
(:translate %maskmoveu-pi)
(:args (value :scs #.+any-sse-reg+)
(mask :scs #.+any-sse-reg+)
(sap :scs (sap-reg) :target rdi)
(offset :scs (signed-reg)))
(:arg-types simd-pack simd-pack system-area-pointer signed-num)
(:temporary (:sc sap-reg :offset rdi-offset :from :eval) rdi)
(:policy :fast-safe)
(:note "inline MASKMOVEU operation")
(:generator 5
(if (location= sap rdi)
(inst add rdi offset)
(inst lea rdi (make-ea :qword :base sap :index offset)))
(inst maskmovdqu value mask)))
(define-vop (%maskmoveu-pi-c)
(:translate %maskmoveu-pi)
(:args (value :scs #.+any-sse-reg+)
(mask :scs #.+any-sse-reg+)
(sap :scs (sap-reg) :target rdi))
(:arg-types simd-pack simd-pack system-area-pointer (:constant (signed-byte 32)))
(:info offset)
(:temporary (:sc sap-reg :offset rdi-offset :from :eval) rdi)
(:policy :fast-safe)
(:note "inline MASKMOVEU operation")
(:generator 4
(if (location= sap rdi)
(unless (= offset 0)
(inst add rdi offset))
(if (= offset 0)
(inst mov rdi sap)
(inst lea rdi (make-ea :qword :base sap :disp offset))))
(inst maskmovdqu value mask)))
(def-splice-transform %maskmoveu-pi (value mask (sap+ sap offset1) offset2)
(%maskmoveu-pi value mask sap (+ offset1 offset2))))
;; Arithmetics
(def-binary-intrinsic add-pi8 int-sse-pack paddb 1 "_mm_add_epi8" :commutative t)
(def-binary-intrinsic add-pi16 int-sse-pack paddw 1 "_mm_add_epi16" :commutative t)
(def-binary-intrinsic add-pi32 int-sse-pack paddd 1 "_mm_add_epi32" :commutative t)
(def-binary-intrinsic add-pi64 int-sse-pack paddq 1 "_mm_add_epi64" :commutative t)
(def-binary-intrinsic adds-pi8 int-sse-pack paddsb 1 "_mm_adds_epi8" :commutative t)
(def-binary-intrinsic adds-pi16 int-sse-pack paddsw 1 "_mm_adds_epi16" :commutative t)
(def-binary-intrinsic adds-pu8 int-sse-pack paddusb 1 "_mm_adds_epu8" :commutative t)
(def-binary-intrinsic adds-pu16 int-sse-pack paddusw 1 "_mm_adds_epu16" :commutative t)
(def-binary-intrinsic avg-pu8 int-sse-pack pavgb 1 "_mm_avg_epu8" :commutative t)
(def-binary-intrinsic avg-pu16 int-sse-pack pavgw 1 "_mm_avg_epu16" :commutative t)
(def-binary-intrinsic madd-pi16 int-sse-pack pmaddwd 1 "_mm_madd_epi16" :commutative t)
(def-binary-intrinsic max-pu8 int-sse-pack pmaxub 1 "_mm_max_epu8" :commutative t)
(def-binary-intrinsic max-pi16 int-sse-pack pmaxsw 1 "_mm_max_epi16" :commutative t)
(def-binary-intrinsic min-pu8 int-sse-pack pminub 1 "_mm_min_epu8" :commutative t)
(def-binary-intrinsic min-pi16 int-sse-pack pminsw 1 "_mm_min_epi16" :commutative t)
(def-binary-intrinsic mulhi-pi16 int-sse-pack pmulhw 3 "_mm_mulhi_epi16" :commutative t)
(def-binary-intrinsic mulhi-pu16 int-sse-pack pmulhuw 3 "_mm_mulhi_epu16" :commutative t)
(def-binary-intrinsic mullo-pi16 int-sse-pack pmullw 3 "_mm_mullo_epi16" :commutative t)
(def-binary-intrinsic mul-pu32 int-sse-pack pmuludq 3 "_mm_mul_epu32" :commutative t)
(def-binary-intrinsic sad-pu8 int-sse-pack psadbw 1 "_mm_sad_epu8" :commutative t)
(def-binary-intrinsic sub-pi8 int-sse-pack psubb 1 "_mm_sub_epi8")
(def-binary-intrinsic sub-pi16 int-sse-pack psubw 1 "_mm_sub_epi16")
(def-binary-intrinsic sub-pi32 int-sse-pack psubd 1 "_mm_sub_epi32")
(def-binary-intrinsic sub-pi64 int-sse-pack psubq 1 "_mm_sub_epi64")
(def-binary-intrinsic subs-pi8 int-sse-pack psubsb 1 "_mm_subs_epi8")
(def-binary-intrinsic subs-pi16 int-sse-pack psubsw 1 "_mm_subs_epi16")
(def-binary-intrinsic subs-pu8 int-sse-pack psubusb 1 "_mm_subs_epu8")
(def-binary-intrinsic subs-pu16 int-sse-pack psubusw 1 "_mm_subs_epu16")
;; Bitwise logic
#+sbcl
(def-not-intrinsic not-pi int-sse-pack pxor)
(def-binary-intrinsic and-pi int-sse-pack pand 1 "_mm_and_si128" :commutative t)
(def-binary-intrinsic andnot-pi int-sse-pack pandn 1 "_mm_andnot_si128")
(def-binary-intrinsic or-pi int-sse-pack por 1 "_mm_or_si128" :commutative t)
(def-binary-intrinsic xor-pi int-sse-pack pxor 1 "_mm_xor_si128" :commutative t)
;; Shifts
(def-unary-intrinsic slli-pi int-sse-pack pslldq 1 "_mm_slli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8))
(def-sse-int-intrinsic slli-pi16 fixnum int-sse-pack psllw 3 "_mm_slli_epi16" :make-temporary t
:defun-body "_mm_sll_epi16(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic slli-pi32 fixnum int-sse-pack pslld 3 "_mm_slli_epi32" :make-temporary t
:defun-body "_mm_sll_epi32(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic slli-pi64 fixnum int-sse-pack psllq 3 "_mm_slli_epi64" :make-temporary t
:defun-body "_mm_sll_epi64(#0,_mm_cvtsi32_si128(#1))")
(def-binary-intrinsic sll-pi16 int-sse-pack psllw 1 "_mm_sll_epi16")
(def-binary-intrinsic sll-pi32 int-sse-pack pslld 1 "_mm_sll_epi32")
(def-binary-intrinsic sll-pi64 int-sse-pack psllq 1 "_mm_sll_epi64")
(def-sse-int-intrinsic srai-pi16 fixnum int-sse-pack psraw 3 "_mm_srai_epi16" :make-temporary t
:defun-body "_mm_sra_epi16(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic srai-pi32 fixnum int-sse-pack psrad 3 "_mm_srai_epi32" :make-temporary t
:defun-body "_mm_sra_epi32(#0,_mm_cvtsi32_si128(#1))")
(def-binary-intrinsic sra-pi16 int-sse-pack psraw 1 "_mm_sra_epi16")
(def-binary-intrinsic sra-pi32 int-sse-pack psrad 1 "_mm_sra_epi32")
(def-unary-intrinsic srli-pi int-sse-pack psrldq 1 "_mm_srli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8))
(def-sse-int-intrinsic srli-pi16 fixnum int-sse-pack psrlw 3 "_mm_srli_epi16" :make-temporary t
:defun-body "_mm_srl_epi16(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic srli-pi32 fixnum int-sse-pack psrld 3 "_mm_srli_epi32" :make-temporary t
:defun-body "_mm_srl_epi32(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic srli-pi64 fixnum int-sse-pack psrlq 3 "_mm_srli_epi64" :make-temporary t
:defun-body "_mm_srl_epi64(#0,_mm_cvtsi32_si128(#1))")
(def-binary-intrinsic srl-pi16 int-sse-pack psrlw 1 "_mm_srl_epi16")
(def-binary-intrinsic srl-pi32 int-sse-pack psrld 1 "_mm_srl_epi32")
(def-binary-intrinsic srl-pi64 int-sse-pack psrlq 1 "_mm_srl_epi64")
#+sbcl
(macrolet ((defimm (name insn bits &key arithmetic)
`(define-vop (,(symbolicate "%" name "-IMM") sse-int-base-op)
(:translate ,name)
(:args (x :scs #.+any-sse-reg+ :target r))
(:arg-types simd-pack (:constant fixnum))
(:result-types sb-kernel:simd-pack-int)
(:info immv)
(:generator 1
,@(let ((core `(progn
(ensure-move int-sse-pack r x)
(unless (= immv 0)
(inst ,insn r immv)))))
(if arithmetic
`((when (or (< immv 0) (>= immv ,bits))
(setf immv ,bits))
,core)
`((if (or (< immv 0) (>= immv ,bits))
(inst pxor r r)
,core))))))))
(defimm slli-pi16 psllw-imm 16)
(defimm slli-pi32 pslld-imm 32)
(defimm slli-pi64 psllq-imm 64)
(defimm srai-pi16 psraw-imm 16 :arithmetic t)
(defimm srai-pi32 psrad-imm 32 :arithmetic t)
(defimm srli-pi16 psrlw-imm 16)
(defimm srli-pi32 psrld-imm 32)
(defimm srli-pi64 psrlq-imm 64))
;; Comparisons
(def-binary-intrinsic =-pi8 int-sse-pack pcmpeqb 1 "_mm_cmpeq_epi8")
(def-binary-intrinsic =-pi16 int-sse-pack pcmpeqw 1 "_mm_cmpeq_epi16")
(def-binary-intrinsic =-pi32 int-sse-pack pcmpeqd 1 "_mm_cmpeq_epi32")
#+ecl
(def-binary-intrinsic <-pi8 int-sse-pack nil nil "_mm_cmplt_epi8")
#+ecl
(def-binary-intrinsic <-pi16 int-sse-pack nil nil "_mm_cmplt_epi16")
#+ecl
(def-binary-intrinsic <-pi32 int-sse-pack nil nil "_mm_cmplt_epi32")
(def-binary-intrinsic >-pi8 int-sse-pack pcmpgtb 1 "_mm_cmpgt_epi8")
(def-binary-intrinsic >-pi16 int-sse-pack pcmpgtw 1 "_mm_cmpgt_epi16")
(def-binary-intrinsic >-pi32 int-sse-pack pcmpgtd 1 "_mm_cmpgt_epi32")
;; Misc
(def-binary-intrinsic packs-pi16 int-sse-pack packsswb 1 "_mm_packs_epi16")
(def-binary-intrinsic packs-pi32 int-sse-pack packssdw 1 "_mm_packs_epi32")
(def-binary-intrinsic packus-pi16 int-sse-pack packuswb 1 "_mm_packus_epi16")
(def-unary-intrinsic extract-pi16 (unsigned-byte 16) pextrw 1 "_mm_extract_epi16"
:immediate-arg (unsigned-byte 8) :arg-type int-sse-pack)
(def-sse-int-intrinsic insert-pi16 fixnum int-sse-pack pinsrw 1 "_mm_insert_epi16"
:immediate-arg (unsigned-byte 8))
(def-unary-intrinsic movemask-pi8 (unsigned-byte 16) pmovmskb 1 "_mm_movemask_epi8" :arg-type int-sse-pack)
(def-binary-intrinsic unpackhi-pi8 int-sse-pack punpckhbw 1 "_mm_unpackhi_epi8")
(def-binary-intrinsic unpackhi-pi16 int-sse-pack punpckhwd 1 "_mm_unpackhi_epi16")
(def-binary-intrinsic unpackhi-pi32 int-sse-pack punpckhdq 1 "_mm_unpackhi_epi32")
(def-binary-intrinsic unpackhi-pi64 int-sse-pack punpckhqdq 1 "_mm_unpackhi_epi64")
(def-binary-intrinsic unpacklo-pi8 int-sse-pack punpcklbw 1 "_mm_unpacklo_epi8")
(def-binary-intrinsic unpacklo-pi16 int-sse-pack punpcklwd 1 "_mm_unpacklo_epi16")
(def-binary-intrinsic unpacklo-pi32 int-sse-pack punpckldq 1 "_mm_unpacklo_epi32")
(def-binary-intrinsic unpacklo-pi64 int-sse-pack punpcklqdq 1 "_mm_unpacklo_epi64")
(def-unary-intrinsic move-pi64 int-sse-pack movq 1 "_mm_move_epi64")
;; Shuffle
(def-unary-intrinsic shuffle-pi32 int-sse-pack pshufd 1 "_mm_shuffle_epi32" :immediate-arg (unsigned-byte 8))
(def-unary-intrinsic shufflelo-pi16 int-sse-pack pshuflw 1 "_mm_shufflelo_epi16" :immediate-arg (unsigned-byte 8))
(def-unary-intrinsic shufflehi-pi16 int-sse-pack pshufhw 1 "_mm_shufflehi_epi16" :immediate-arg (unsigned-byte 8))
;; Conversion
#+sbcl
(progn
(export 'convert-si32-to-pi)
(defknown convert-si32-to-pi ((signed-byte 32)) int-sse-pack (foldable flushable dx-safe))
(export 'convert-su32-to-pi)
(defknown convert-su32-to-pi ((unsigned-byte 32)) int-sse-pack (foldable flushable dx-safe))
(export 'convert-si64-to-pi)
(defknown convert-si64-to-pi ((signed-byte 64)) int-sse-pack (foldable flushable dx-safe))
(export 'convert-su64-to-pi)
(defknown convert-su64-to-pi ((unsigned-byte 64)) int-sse-pack (foldable flushable dx-safe))
(defknown %set-int ((signed-byte 64)) int-sse-pack (foldable flushable always-translatable dx-safe))
(defknown %set-uint ((unsigned-byte 64)) int-sse-pack (foldable flushable always-translatable dx-safe))
(define-vop (%set-int)
(:translate %set-int %set-uint
convert-si32-to-pi convert-su32-to-pi
convert-si64-to-pi convert-su64-to-pi)
(:args (arg :scs (signed-reg unsigned-reg signed-stack unsigned-stack)))
(:arg-types untagged-num)
(:results (dst :scs (int-sse-reg)))
(:result-types sb-kernel:simd-pack-int)
(:policy :fast-safe)
(:generator 1
(inst movd dst arg))))
#+ecl
(progn
(def-intrinsic convert-si32-to-pi (ext:integer32) int-sse-pack "_mm_cvtsi32_si128")
(def-intrinsic convert-su32-to-pi (ext:byte32) int-sse-pack "_mm_cvtsi32_si128")
#+x86_64
(def-intrinsic convert-si64-to-pi (ext:integer64) int-sse-pack #-msvc "_mm_cvtsi64_si128" #+msvc "_mm_cvtsi64x_si128")
#+x86_64
(def-intrinsic convert-su64-to-pi (ext:byte64) int-sse-pack #-msvc "_mm_cvtsi64_si128" #+msvc "_mm_cvtsi64x_si128"))
(def-cvt-to-int32-intrinsic convert-pi-to-si32 (signed-byte 32) movd 1 "_mm_cvtsi128_si32"
:arg-type int-sse-pack)
(def-unary-intrinsic convert-pi-to-su32 (unsigned-byte 32) movd 1 "_mm_cvtsi128_si32"
:result-size :dword :arg-type int-sse-pack)
#+(or x86_64 x86-64)
(def-unary-intrinsic convert-pi-to-si64 (signed-byte 64) movd 1
#-msvc "_mm_cvtsi128_si64" #+msvc "_mm_cvtsi128_si64x" :arg-type int-sse-pack)
#+(or x86_64 x86-64)
(def-unary-intrinsic convert-pi-to-su64 (unsigned-byte 64) movd 1
#-msvc "_mm_cvtsi128_si64" #+msvc "_mm_cvtsi128_si64x" :arg-type int-sse-pack)