32 ld.param.u64 %rd6, [_Z14invert_mappingPfS_ii_param_0];
33 ld.param.u64 %rd7, [_Z14invert_mappingPfS_ii_param_1];
34 ld.param.u32 %r18, [_Z14invert_mappingPfS_ii_param_2];
35 ld.param.u32 %r19, [_Z14invert_mappingPfS_ii_param_3];
36 cvta.to.global.u64 %rd1, %rd7;
37 cvta.to.global.u64 %rd2, %rd6;
38 mov.u32 %r1, %ctaid.x;
39 mov.u32 %r2, %ntid.x;
40 mov.u32 %r3, %tid.x;
41 mad.lo.s32 %r4, %r1, %r2, %r3;
42 setp.lt.s32%p1, %r4, %r18;
43 setp.gt.s32%p2, %r19, 0;
44 and.pred %p3, %p1, %p2;
45 @!%p3 bra BB0_10;
46 bra.uni BB0_1;
49 mul.lo.s32 %r5, %r4, %r19;
50 and.b32 %r23, %r19, 3;
51 mov.u32 %r33, 0;
52 setp.eq.s32%p4, %r23, 0;
53 @%p4 bra BB0_7;
55 setp.eq.s32%p5, %r23, 1;
56 @%p5 bra BB0_6;
58 setp.eq.s32%p6, %r23, 2;
59 @%p6 bra BB0_5;
70 add.s32 %r25, %r33, %r5;
71 mul.wide.s32 %rd12, %r25, 4;
72 add.s64 %rd13, %rd2, %rd12;
73 ld.global.f32 %f2, [%rd13];
74 neg.s32 %r26, %r33;
75 and.b32 %r27, %r26, %r18;
76 add.s32 %r28, %r27, %r4;
77 mul.wide.s32 %rd14, %r28, 4;
78 add.s64 %rd15, %rd1, %rd14;
79 st.global.f32 [%rd15], %f2;
80 add.s32 %r33, %r33, 1;
83 add.s32 %r29, %r33, %r5;
84 mul.wide.s32 %rd16, %r29, 4;
85 add.s64 %rd17, %rd2, %rd16;
86 ld.global.f32 %f3, [%rd17];
87 mad.lo.s32 %r30, %r33, %r18, %r4;
88 mul.wide.s32 %rd18, %r30, 4;
89 add.s64 %rd19, %rd1, %rd18;
90 st.global.f32 [%rd19], %f3;
91 add.s32 %r33, %r33, 1;
94 setp.lt.u32%p7, %r19, 4;
95 @%p7 bra BB0_10;
97 shl.b32 %r11, %r18, 2;
98 mad.lo.s32 %r32, %r19, %r4, %r33;
99 mul.wide.s32 %rd20, %r32, 4;
100 add.s64 %rd27, %rd2, %rd20;
101 mad.lo.s32 %r36, %r33, %r18, %r4;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
104 ld.global.f32 %f4, [%rd27];
105 mul.wide.s32 %rd21, %r36, 4;
106 add.s64 %rd22, %rd1, %rd21;
107 st.global.f32 [%rd22], %f4;
108 ld.global.f32 %f5, [%rd27+4];
109 cvt.s64.s32%rd23, %r11;
110 add.s64 %rd24, %rd22, %rd23;
111 st.global.f32 [%rd24], %f5;
112 ld.global.f32 %f6, [%rd27+8];
113 add.s64 %rd25, %rd24, %rd23;
114 st.global.f32 [%rd25], %f6;
115 ld.global.f32 %f7, [%rd27+12];
116 add.s64 %rd26, %rd25, %rd23;
117 st.global.f32 [%rd26], %f7;
118 add.s64 %rd27, %rd27, 16;
119 add.s32 %r36, %r36, %r11;
120 add.s32 %r33, %r33, 4;
121 setp.lt.s32%p8, %r33, %r19;
122 @%p8 bra BB0_9;
125 ret;
