@@ -401,4 +401,176 @@ extern "C" void acc_combined(int N) {
401401  //  CHECK-NEXT: } loc
402402  //  CHECK-NEXT: acc.terminator
403403  //  CHECK-NEXT: } loc
404+ 
405+ #pragma  acc kernels loop worker
406+   for (unsigned  I = 0 ; I < N; ++I);
407+   //  CHECK-NEXT: acc.kernels combined(loop) {
408+   //  CHECK-NEXT: acc.loop combined(kernels) worker {
409+   //  CHECK: acc.yield
410+   //  CHECK-NEXT: } loc
411+   //  CHECK: acc.terminator
412+   //  CHECK-NEXT: } loc
413+ 
414+ #pragma  acc kernels loop worker(N)
415+   for (unsigned  I = 0 ; I < N; ++I);
416+   //  CHECK-NEXT: acc.kernels combined(loop) {
417+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
418+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
419+   //  CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32) {
420+   //  CHECK: acc.yield
421+   //  CHECK-NEXT: } loc
422+   //  CHECK: acc.terminator
423+   //  CHECK-NEXT: } loc
424+ 
425+ #pragma  acc kernels loop worker device_type(nvidia, radeon) worker
426+   for (unsigned  I = 0 ; I < N; ++I);
427+   //  CHECK-NEXT: acc.kernels combined(loop) {
428+   //  CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
429+   //  CHECK: acc.yield
430+   //  CHECK-NEXT: } loc
431+   //  CHECK: acc.terminator
432+   //  CHECK-NEXT: } loc
433+ 
434+ #pragma  acc kernels loop worker(N) device_type(nvidia, radeon) worker
435+   for (unsigned  I = 0 ; I < N; ++I);
436+   //  CHECK-NEXT: acc.kernels combined(loop) {
437+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
438+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
439+   //  CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
440+   //  CHECK: acc.yield
441+   //  CHECK-NEXT: } loc
442+   //  CHECK: acc.terminator
443+   //  CHECK-NEXT: } loc
444+ 
445+ #pragma  acc kernels loop worker device_type(nvidia, radeon) worker(N)
446+   for (unsigned  I = 0 ; I < N; ++I);
447+   //  CHECK-NEXT: acc.kernels combined(loop) {
448+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
449+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
450+   //  CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
451+   //  CHECK: acc.yield
452+   //  CHECK-NEXT: } loc
453+   //  CHECK: acc.terminator
454+   //  CHECK-NEXT: } loc
455+ 
456+ #pragma  acc kernels loop worker(N) device_type(nvidia, radeon) worker(N + 1)
457+   for (unsigned  I = 0 ; I < N; ++I);
458+   //  CHECK-NEXT: acc.kernels combined(loop) {
459+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
460+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
461+   //  CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
462+   //  CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
463+   //  CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
464+   //  CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
465+   //  CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
466+   //  CHECK: acc.yield
467+   //  CHECK-NEXT: } loc
468+   //  CHECK: acc.terminator
469+   //  CHECK-NEXT: } loc
470+ 
471+ #pragma  acc kernels loop device_type(nvidia, radeon) worker(num:N + 1)
472+   for (unsigned  I = 0 ; I < N; ++I);
473+   //  CHECK-NEXT: acc.kernels combined(loop) {
474+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
475+   //  CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
476+   //  CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
477+   //  CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
478+   //  CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
479+   //  CHECK: acc.terminator
480+   //  CHECK-NEXT: } loc
481+ 
482+ 
483+ #pragma  acc kernels loop worker vector device_type(nvidia) worker vector
484+   for (unsigned  I = 0 ; I < N; ++I);
485+   //  CHECK-NEXT: acc.kernels combined(loop) {
486+   //  CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
487+   //  CHECK: acc.yield
488+   //  CHECK-NEXT: } loc
489+   //  CHECK: acc.terminator
490+   //  CHECK-NEXT: } loc
491+ 
492+ #pragma  acc kernels loop vector
493+   for (unsigned  I = 0 ; I < N; ++I);
494+   //  CHECK-NEXT: acc.kernels combined(loop) {
495+   //  CHECK: acc.loop combined(kernels) vector {
496+   //  CHECK: acc.yield
497+   //  CHECK-NEXT: } loc
498+   //  CHECK: acc.terminator
499+   //  CHECK-NEXT: } loc
500+ 
501+ #pragma  acc kernels loop vector(N)
502+   for (unsigned  I = 0 ; I < N; ++I);
503+   //  CHECK-NEXT: acc.kernels combined(loop) {
504+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
505+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
506+   //  CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_CONV]] : si32) {
507+   //  CHECK: acc.yield
508+   //  CHECK-NEXT: } loc
509+   //  CHECK: acc.terminator
510+   //  CHECK-NEXT: } loc
511+ 
512+ #pragma  acc kernels loop vector device_type(nvidia, radeon) vector
513+   for (unsigned  I = 0 ; I < N; ++I);
514+   //  CHECK-NEXT: acc.kernels combined(loop) {
515+   //  CHECK-NEXT: acc.loop combined(kernels) vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
516+   //  CHECK: acc.yield
517+   //  CHECK-NEXT: } loc
518+   //  CHECK: acc.terminator
519+   //  CHECK-NEXT: } loc
520+ 
521+ #pragma  acc kernels loop vector(N) device_type(nvidia, radeon) vector
522+   for (unsigned  I = 0 ; I < N; ++I);
523+   //  CHECK-NEXT: acc.kernels combined(loop) {
524+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
525+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
526+   //  CHECK-NEXT: acc.loop combined(kernels) vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
527+   //  CHECK: acc.yield
528+   //  CHECK-NEXT: } loc
529+   //  CHECK: acc.terminator
530+   //  CHECK-NEXT: } loc
531+ 
532+ #pragma  acc kernels loop vector(N) device_type(nvidia, radeon) vector(N + 1)
533+   for (unsigned  I = 0 ; I < N; ++I);
534+   //  CHECK-NEXT: acc.kernels combined(loop) {
535+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
536+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
537+   //  CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
538+   //  CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
539+   //  CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
540+   //  CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
541+   //  CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
542+   //  CHECK: acc.yield
543+   //  CHECK-NEXT: } loc
544+   //  CHECK: acc.terminator
545+   //  CHECK-NEXT: } loc
546+ 
547+ #pragma  acc kernels loop device_type(nvidia, radeon) vector(length:N + 1)
548+   for (unsigned  I = 0 ; I < N; ++I);
549+   //  CHECK-NEXT: acc.kernels combined(loop) {
550+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
551+   //  CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
552+   //  CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
553+   //  CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
554+   //  CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
555+   //  CHECK: acc.yield
556+   //  CHECK-NEXT: } loc
557+   //  CHECK: acc.terminator
558+   //  CHECK-NEXT: } loc
559+ 
560+ #pragma  acc kernels loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
561+   for (unsigned  I = 0 ; I < N; ++I);
562+   //  CHECK-NEXT: acc.kernels combined(loop) {
563+   //  CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
564+   //  CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
565+   //  CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
566+   //  CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
567+   //  CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
568+   //  CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
569+   //  CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
570+   //  CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
571+   //  CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
572+   //  CHECK: acc.yield
573+   //  CHECK-NEXT: } loc
574+   //  CHECK: acc.terminator
575+   //  CHECK-NEXT: } loc
404576}
0 commit comments